feat: llama.cpp

This commit is contained in:
Ivan Lee 2025-08-18 22:53:08 +08:00
parent 6418d1a906
commit 2013221499
5 changed files with 40 additions and 7 deletions

3
.gitignore vendored
View File

@ -9,4 +9,5 @@ eval_data/
.env
*.log
results
.idea
.idea
models

10
main.py
View File

@ -7,10 +7,14 @@ load_dotenv()
# Create a custom config
config = DEFAULT_CONFIG.copy()
config["llm_provider"] = "vllm" # Use a different model
config["llm_provider"] = "llamacpp" # Use a different model
config["backend_url"] = "http://localhost:8000/v1" # Use a different backend
config["deep_think_llm"] = "zai-org/GLM-4.5" # Use a different model
config["quick_think_llm"] = "zai-org/GLM-4.5" # Use a different model
config["deep_think_llm"] = (
"models/Qwen3-4B-Thinking-2507-UD-Q8_K_XL.gguf" # Use a different model
)
config["quick_think_llm"] = (
"models/Qwen3-4B-Thinking-2507-UD-Q8_K_XL.gguf" # Use a different model
)
config["max_debate_rounds"] = 1 # Increase debate rounds
config["online_tools"] = True # Increase debate rounds

View File

@ -21,6 +21,7 @@ dependencies = [
"langchain-ollama>=0.3.6",
"langchain-openai>=0.3.23",
"langgraph>=0.4.8",
"llama-cpp-python>=0.3.16",
"pandas>=2.3.0",
"parsel>=1.10.0",
"praw>=7.8.1",

View File

@ -10,7 +10,10 @@ from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_ollama import ChatOllama
from langchain_community.llms import VLLM
from langchain_community.llms import VLLM, VLLMOpenAI
import multiprocessing
from langchain_community.chat_models import ChatLlamaCpp
from langgraph.prebuilt import ToolNode
@ -91,16 +94,26 @@ class TradingAgentsGraph:
model=self.config["quick_think_llm"]
)
elif self.config["llm_provider"].lower() == "vllm":
self.deep_thinking_llm = VLLM(
self.deep_thinking_llm = VLLMOpenAI(
model=self.config["deep_think_llm"],
trust_remote_code=True,
tensor_parallel_size=1,
)
self.quick_thinking_llm = VLLM(
self.quick_thinking_llm = VLLMOpenAI(
model=self.config["quick_think_llm"],
trust_remote_code=True,
tensor_parallel_size=1,
)
elif self.config["llm_provider"].lower() == "llamacpp":
self.deep_thinking_llm = ChatLlamaCpp(
model_path=self.config["deep_think_llm"],
n_ctx=8192,
n_batch=512,
n_gpu_layers=20,
n_threads=multiprocessing.cpu_count() - 1,
verbose=True,
)
self.quick_thinking_llm = self.deep_thinking_llm
else:
raise ValueError(f"Unsupported LLM provider: {self.config['llm_provider']}")

14
uv.lock
View File

@ -2440,6 +2440,18 @@ dependencies = [
]
sdist = { url = "https://files.pythonhosted.org/packages/7e/c1/7bd34ad0ae6cfd99512f8a40b28b9624c3b1f4e1d40c9038eabc2f870b15/literalai-0.1.201.tar.gz", hash = "sha256:29e4ccadd9d68bfea319a7f0b4fc32611b081990d9195f98e5e97a14d24d3713", size = 67832, upload-time = "2025-03-24T10:01:51.559Z" }
[[package]]
name = "llama-cpp-python"
version = "0.3.16"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "diskcache" },
{ name = "jinja2" },
{ name = "numpy" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/e4/b4/c8cd17629ced0b9644a71d399a91145aedef109c0333443bef015e45b704/llama_cpp_python-0.3.16.tar.gz", hash = "sha256:34ed0f9bd9431af045bb63d9324ae620ad0536653740e9bb163a2e1fcb973be6", size = 50688636, upload-time = "2025-08-15T04:58:29.212Z" }
[[package]]
name = "llguidance"
version = "0.7.30"
@ -6702,6 +6714,7 @@ dependencies = [
{ name = "langchain-ollama" },
{ name = "langchain-openai" },
{ name = "langgraph" },
{ name = "llama-cpp-python" },
{ name = "pandas" },
{ name = "parsel" },
{ name = "praw" },
@ -6740,6 +6753,7 @@ requires-dist = [
{ name = "langchain-ollama", specifier = ">=0.3.6" },
{ name = "langchain-openai", specifier = ">=0.3.23" },
{ name = "langgraph", specifier = ">=0.4.8" },
{ name = "llama-cpp-python", specifier = ">=0.3.16" },
{ name = "pandas", specifier = ">=2.3.0" },
{ name = "parsel", specifier = ">=1.10.0" },
{ name = "praw", specifier = ">=7.8.1" },