feat: llama.cpp

2025-08-18 22:53:08 +08:00 · 2025-08-18 22:53:08 +08:00 · 2013221499
parent 6418d1a906
commit 2013221499
5 changed files with 40 additions and 7 deletions
--- a/.gitignore
+++ b/.gitignore
@ -9,4 +9,5 @@ eval_data/
 .env
 *.log
 results
-.idea
+.idea
+models
--- a/main.py
+++ b/main.py
@ -7,10 +7,14 @@ load_dotenv()

 # Create a custom config
 config = DEFAULT_CONFIG.copy()
-config["llm_provider"] = "vllm"  # Use a different model
+config["llm_provider"] = "llamacpp"  # Use a different model
 config["backend_url"] = "http://localhost:8000/v1"  # Use a different backend
-config["deep_think_llm"] = "zai-org/GLM-4.5"  # Use a different model
-config["quick_think_llm"] = "zai-org/GLM-4.5"  # Use a different model
+config["deep_think_llm"] = (
+    "models/Qwen3-4B-Thinking-2507-UD-Q8_K_XL.gguf"  # Use a different model
+)
+config["quick_think_llm"] = (
+    "models/Qwen3-4B-Thinking-2507-UD-Q8_K_XL.gguf"  # Use a different model
+)
 config["max_debate_rounds"] = 1  # Increase debate rounds
 config["online_tools"] = True  # Increase debate rounds

--- a/pyproject.toml
+++ b/pyproject.toml
@ -21,6 +21,7 @@ dependencies = [
    "langchain-ollama>=0.3.6",
    "langchain-openai>=0.3.23",
    "langgraph>=0.4.8",
+    "llama-cpp-python>=0.3.16",
    "pandas>=2.3.0",
    "parsel>=1.10.0",
    "praw>=7.8.1",
--- a/tradingagents/graph/trading_graph.py
+++ b/tradingagents/graph/trading_graph.py
@ -10,7 +10,10 @@ from langchain_openai import ChatOpenAI
 from langchain_anthropic import ChatAnthropic
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_ollama import ChatOllama
-from langchain_community.llms import VLLM
+from langchain_community.llms import VLLM, VLLMOpenAI
+import multiprocessing
+
+from langchain_community.chat_models import ChatLlamaCpp


 from langgraph.prebuilt import ToolNode
@ -91,16 +94,26 @@ class TradingAgentsGraph:
                model=self.config["quick_think_llm"]
            )
        elif self.config["llm_provider"].lower() == "vllm":
-            self.deep_thinking_llm = VLLM(
+            self.deep_thinking_llm = VLLMOpenAI(
                model=self.config["deep_think_llm"],
                trust_remote_code=True,
                tensor_parallel_size=1,
            )
-            self.quick_thinking_llm = VLLM(
+            self.quick_thinking_llm = VLLMOpenAI(
                model=self.config["quick_think_llm"],
                trust_remote_code=True,
                tensor_parallel_size=1,
            )
+        elif self.config["llm_provider"].lower() == "llamacpp":
+            self.deep_thinking_llm = ChatLlamaCpp(
+                model_path=self.config["deep_think_llm"],
+                n_ctx=8192,
+                n_batch=512,
+                n_gpu_layers=20,
+                n_threads=multiprocessing.cpu_count() - 1,
+                verbose=True,
+            )
+            self.quick_thinking_llm = self.deep_thinking_llm
        else:
            raise ValueError(f"Unsupported LLM provider: {self.config['llm_provider']}")

--- a/uv.lock
+++ b/uv.lock
@ -2440,6 +2440,18 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/7e/c1/7bd34ad0ae6cfd99512f8a40b28b9624c3b1f4e1d40c9038eabc2f870b15/literalai-0.1.201.tar.gz", hash = "sha256:29e4ccadd9d68bfea319a7f0b4fc32611b081990d9195f98e5e97a14d24d3713", size = 67832, upload-time = "2025-03-24T10:01:51.559Z" }

+[[package]]
+name = "llama-cpp-python"
+version = "0.3.16"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "diskcache" },
+    { name = "jinja2" },
+    { name = "numpy" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e4/b4/c8cd17629ced0b9644a71d399a91145aedef109c0333443bef015e45b704/llama_cpp_python-0.3.16.tar.gz", hash = "sha256:34ed0f9bd9431af045bb63d9324ae620ad0536653740e9bb163a2e1fcb973be6", size = 50688636, upload-time = "2025-08-15T04:58:29.212Z" }
+
 [[package]]
 name = "llguidance"
 version = "0.7.30"
@ -6702,6 +6714,7 @@ dependencies = [
    { name = "langchain-ollama" },
    { name = "langchain-openai" },
    { name = "langgraph" },
+    { name = "llama-cpp-python" },
    { name = "pandas" },
    { name = "parsel" },
    { name = "praw" },
@ -6740,6 +6753,7 @@ requires-dist = [
    { name = "langchain-ollama", specifier = ">=0.3.6" },
    { name = "langchain-openai", specifier = ">=0.3.23" },
    { name = "langgraph", specifier = ">=0.4.8" },
+    { name = "llama-cpp-python", specifier = ">=0.3.16" },
    { name = "pandas", specifier = ">=2.3.0" },
    { name = "parsel", specifier = ">=1.10.0" },
    { name = "praw", specifier = ">=7.8.1" },