From 20132214993d2760ae52a494c16fdf696878a32b Mon Sep 17 00:00:00 2001 From: Ivan Lee <84584280+ivanleekk@users.noreply.github.com> Date: Mon, 18 Aug 2025 22:53:08 +0800 Subject: [PATCH] feat: llama.cpp --- .gitignore | 3 ++- main.py | 10 +++++++--- pyproject.toml | 1 + tradingagents/graph/trading_graph.py | 19 ++++++++++++++++--- uv.lock | 14 ++++++++++++++ 5 files changed, 40 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 5cd1c383..5023074d 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,5 @@ eval_data/ .env *.log results -.idea \ No newline at end of file +.idea +models \ No newline at end of file diff --git a/main.py b/main.py index be471397..057781cd 100644 --- a/main.py +++ b/main.py @@ -7,10 +7,14 @@ load_dotenv() # Create a custom config config = DEFAULT_CONFIG.copy() -config["llm_provider"] = "vllm" # Use a different model +config["llm_provider"] = "llamacpp" # Use a different model config["backend_url"] = "http://localhost:8000/v1" # Use a different backend -config["deep_think_llm"] = "zai-org/GLM-4.5" # Use a different model -config["quick_think_llm"] = "zai-org/GLM-4.5" # Use a different model +config["deep_think_llm"] = ( + "models/Qwen3-4B-Thinking-2507-UD-Q8_K_XL.gguf" # Use a different model +) +config["quick_think_llm"] = ( + "models/Qwen3-4B-Thinking-2507-UD-Q8_K_XL.gguf" # Use a different model +) config["max_debate_rounds"] = 1 # Increase debate rounds config["online_tools"] = True # Increase debate rounds diff --git a/pyproject.toml b/pyproject.toml index fff006df..72057b81 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ "langchain-ollama>=0.3.6", "langchain-openai>=0.3.23", "langgraph>=0.4.8", + "llama-cpp-python>=0.3.16", "pandas>=2.3.0", "parsel>=1.10.0", "praw>=7.8.1", diff --git a/tradingagents/graph/trading_graph.py b/tradingagents/graph/trading_graph.py index fe9b9f6c..b3b00a72 100644 --- a/tradingagents/graph/trading_graph.py +++ b/tradingagents/graph/trading_graph.py @@ -10,7 +10,10 @@ from langchain_openai import ChatOpenAI from langchain_anthropic import ChatAnthropic from langchain_google_genai import ChatGoogleGenerativeAI from langchain_ollama import ChatOllama -from langchain_community.llms import VLLM +from langchain_community.llms import VLLM, VLLMOpenAI +import multiprocessing + +from langchain_community.chat_models import ChatLlamaCpp from langgraph.prebuilt import ToolNode @@ -91,16 +94,26 @@ class TradingAgentsGraph: model=self.config["quick_think_llm"] ) elif self.config["llm_provider"].lower() == "vllm": - self.deep_thinking_llm = VLLM( + self.deep_thinking_llm = VLLMOpenAI( model=self.config["deep_think_llm"], trust_remote_code=True, tensor_parallel_size=1, ) - self.quick_thinking_llm = VLLM( + self.quick_thinking_llm = VLLMOpenAI( model=self.config["quick_think_llm"], trust_remote_code=True, tensor_parallel_size=1, ) + elif self.config["llm_provider"].lower() == "llamacpp": + self.deep_thinking_llm = ChatLlamaCpp( + model_path=self.config["deep_think_llm"], + n_ctx=8192, + n_batch=512, + n_gpu_layers=20, + n_threads=multiprocessing.cpu_count() - 1, + verbose=True, + ) + self.quick_thinking_llm = self.deep_thinking_llm else: raise ValueError(f"Unsupported LLM provider: {self.config['llm_provider']}") diff --git a/uv.lock b/uv.lock index 53132759..be996cfa 100644 --- a/uv.lock +++ b/uv.lock @@ -2440,6 +2440,18 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/7e/c1/7bd34ad0ae6cfd99512f8a40b28b9624c3b1f4e1d40c9038eabc2f870b15/literalai-0.1.201.tar.gz", hash = "sha256:29e4ccadd9d68bfea319a7f0b4fc32611b081990d9195f98e5e97a14d24d3713", size = 67832, upload-time = "2025-03-24T10:01:51.559Z" } +[[package]] +name = "llama-cpp-python" +version = "0.3.16" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "diskcache" }, + { name = "jinja2" }, + { name = "numpy" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e4/b4/c8cd17629ced0b9644a71d399a91145aedef109c0333443bef015e45b704/llama_cpp_python-0.3.16.tar.gz", hash = "sha256:34ed0f9bd9431af045bb63d9324ae620ad0536653740e9bb163a2e1fcb973be6", size = 50688636, upload-time = "2025-08-15T04:58:29.212Z" } + [[package]] name = "llguidance" version = "0.7.30" @@ -6702,6 +6714,7 @@ dependencies = [ { name = "langchain-ollama" }, { name = "langchain-openai" }, { name = "langgraph" }, + { name = "llama-cpp-python" }, { name = "pandas" }, { name = "parsel" }, { name = "praw" }, @@ -6740,6 +6753,7 @@ requires-dist = [ { name = "langchain-ollama", specifier = ">=0.3.6" }, { name = "langchain-openai", specifier = ">=0.3.23" }, { name = "langgraph", specifier = ">=0.4.8" }, + { name = "llama-cpp-python", specifier = ">=0.3.16" }, { name = "pandas", specifier = ">=2.3.0" }, { name = "parsel", specifier = ">=1.10.0" }, { name = "praw", specifier = ">=7.8.1" },