From 20132214993d2760ae52a494c16fdf696878a32b Mon Sep 17 00:00:00 2001
From: Ivan Lee <84584280+ivanleekk@users.noreply.github.com>
Date: Mon, 18 Aug 2025 22:53:08 +0800
Subject: [PATCH] feat: llama.cpp

---
 .gitignore                           |  3 ++-
 main.py                              | 10 +++++++---
 pyproject.toml                       |  1 +
 tradingagents/graph/trading_graph.py | 19 ++++++++++++++++---
 uv.lock                              | 14 ++++++++++++++
 5 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5cd1c383..5023074d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,5 @@ eval_data/
 .env
 *.log
 results
-.idea
\ No newline at end of file
+.idea
+models
\ No newline at end of file
diff --git a/main.py b/main.py
index be471397..057781cd 100644
--- a/main.py
+++ b/main.py
@@ -7,10 +7,14 @@ load_dotenv()
 
 # Create a custom config
 config = DEFAULT_CONFIG.copy()
-config["llm_provider"] = "vllm"  # Use a different model
+config["llm_provider"] = "llamacpp"  # Use a different model
 config["backend_url"] = "http://localhost:8000/v1"  # Use a different backend
-config["deep_think_llm"] = "zai-org/GLM-4.5"  # Use a different model
-config["quick_think_llm"] = "zai-org/GLM-4.5"  # Use a different model
+config["deep_think_llm"] = (
+    "models/Qwen3-4B-Thinking-2507-UD-Q8_K_XL.gguf"  # Use a different model
+)
+config["quick_think_llm"] = (
+    "models/Qwen3-4B-Thinking-2507-UD-Q8_K_XL.gguf"  # Use a different model
+)
 config["max_debate_rounds"] = 1  # Increase debate rounds
 config["online_tools"] = True  # Increase debate rounds
 
diff --git a/pyproject.toml b/pyproject.toml
index fff006df..72057b81 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,6 +21,7 @@ dependencies = [
     "langchain-ollama>=0.3.6",
     "langchain-openai>=0.3.23",
     "langgraph>=0.4.8",
+    "llama-cpp-python>=0.3.16",
     "pandas>=2.3.0",
     "parsel>=1.10.0",
     "praw>=7.8.1",
diff --git a/tradingagents/graph/trading_graph.py b/tradingagents/graph/trading_graph.py
index fe9b9f6c..b3b00a72 100644
--- a/tradingagents/graph/trading_graph.py
+++ b/tradingagents/graph/trading_graph.py
@@ -10,7 +10,10 @@ from langchain_openai import ChatOpenAI
 from langchain_anthropic import ChatAnthropic
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_ollama import ChatOllama
-from langchain_community.llms import VLLM
+from langchain_community.llms import VLLM, VLLMOpenAI
+import multiprocessing
+
+from langchain_community.chat_models import ChatLlamaCpp
 
 
 from langgraph.prebuilt import ToolNode
@@ -91,16 +94,26 @@ class TradingAgentsGraph:
                 model=self.config["quick_think_llm"]
             )
         elif self.config["llm_provider"].lower() == "vllm":
-            self.deep_thinking_llm = VLLM(
+            self.deep_thinking_llm = VLLMOpenAI(
                 model=self.config["deep_think_llm"],
                 trust_remote_code=True,
                 tensor_parallel_size=1,
             )
-            self.quick_thinking_llm = VLLM(
+            self.quick_thinking_llm = VLLMOpenAI(
                 model=self.config["quick_think_llm"],
                 trust_remote_code=True,
                 tensor_parallel_size=1,
             )
+        elif self.config["llm_provider"].lower() == "llamacpp":
+            self.deep_thinking_llm = ChatLlamaCpp(
+                model_path=self.config["deep_think_llm"],
+                n_ctx=8192,
+                n_batch=512,
+                n_gpu_layers=20,
+                n_threads=multiprocessing.cpu_count() - 1,
+                verbose=True,
+            )
+            self.quick_thinking_llm = self.deep_thinking_llm
         else:
             raise ValueError(f"Unsupported LLM provider: {self.config['llm_provider']}")
 
diff --git a/uv.lock b/uv.lock
index 53132759..be996cfa 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2440,6 +2440,18 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/7e/c1/7bd34ad0ae6cfd99512f8a40b28b9624c3b1f4e1d40c9038eabc2f870b15/literalai-0.1.201.tar.gz", hash = "sha256:29e4ccadd9d68bfea319a7f0b4fc32611b081990d9195f98e5e97a14d24d3713", size = 67832, upload-time = "2025-03-24T10:01:51.559Z" }
 
+[[package]]
+name = "llama-cpp-python"
+version = "0.3.16"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "diskcache" },
+    { name = "jinja2" },
+    { name = "numpy" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e4/b4/c8cd17629ced0b9644a71d399a91145aedef109c0333443bef015e45b704/llama_cpp_python-0.3.16.tar.gz", hash = "sha256:34ed0f9bd9431af045bb63d9324ae620ad0536653740e9bb163a2e1fcb973be6", size = 50688636, upload-time = "2025-08-15T04:58:29.212Z" }
+
 [[package]]
 name = "llguidance"
 version = "0.7.30"
@@ -6702,6 +6714,7 @@ dependencies = [
     { name = "langchain-ollama" },
     { name = "langchain-openai" },
     { name = "langgraph" },
+    { name = "llama-cpp-python" },
     { name = "pandas" },
     { name = "parsel" },
     { name = "praw" },
@@ -6740,6 +6753,7 @@ requires-dist = [
     { name = "langchain-ollama", specifier = ">=0.3.6" },
     { name = "langchain-openai", specifier = ">=0.3.23" },
     { name = "langgraph", specifier = ">=0.4.8" },
+    { name = "llama-cpp-python", specifier = ">=0.3.16" },
     { name = "pandas", specifier = ">=2.3.0" },
     { name = "parsel", specifier = ">=1.10.0" },
     { name = "praw", specifier = ">=7.8.1" },