feat: llama.cpp working with TradingAgents

2025-08-20 12:12:51 +08:00 · 2025-08-20 12:12:51 +08:00 · 1eda6ce34a
parent d40c0dd000
commit 1eda6ce34a
4 changed files with 14 additions and 14 deletions
--- a/main.py
+++ b/main.py
@ -10,10 +10,10 @@ config = DEFAULT_CONFIG.copy()
 config["llm_provider"] = "llamacpp"  # Use a different model
 config["backend_url"] = "http://localhost:8080/v1"  # Use a different backend
 config["deep_think_llm"] = (
-    "models/Qwen3-4B-Thinking-2507-UD-Q8_K_XL.gguf"  # Use a different model
+    "models/Llama-3.3-70B-Instruct.Q5_K_M.gguf"  # Use a different model
 )
 config["quick_think_llm"] = (
-    "models/Qwen3-4B-Thinking-2507-UD-Q8_K_XL.gguf"  # Use a different model
+    "models/Llama-3.3-70B-Instruct.Q5_K_M.gguf"  # Use a different model
 )
 config["max_debate_rounds"] = 1  # Increase debate rounds
 config["online_tools"] = True  # Increase debate rounds
--- a/main.slurm
+++ b/main.slurm
@ -1,13 +1,13 @@
 #!/bin/bash
 #SBATCH --job-name=tradingAgents
-#SBATCH --output=tradingAgents%j.out
-#SBATCH --error=tradingAgents%j.err
-#SBATCH --time=01:00:00
+#SBATCH --output=runs/%j/run_%j.out
+#SBATCH --error=runs/%j/run_%j.err
+#SBATCH --time=02:00:00
 #SBATCH --partition=gpu
 #SBATCH --ntasks=1
 #SBATCH --cpus-per-task=4
-#SBATCH --gpus=h100-96:2
-#SBATCH --mem=96G
+#SBATCH --gpus=h100-96:1
+#SBATCH --mem=700G

 # Run the script
 uv sync
--- a/tradingagents/agents/utils/memory.py
+++ b/tradingagents/agents/utils/memory.py
@ -23,18 +23,18 @@ class FinancialSituationMemory:
            # instantiate the LlamaCpp embeddings wrapper
            self.embeddings_model = LlamaCppEmbeddings(
                model_path=self.embedding,
-                n_ctx=2048,
+                n_ctx=512,
                n_parts=-1,
                seed=0,
                f16_kv=True,
                logits_all=False,
                vocab_only=False,
                use_mlock=False,
-                n_threads=4,
+                n_threads=16,
                n_batch=512,
-                n_gpu_layers=0,
+                n_gpu_layers=10,
                verbose=False,
-                device="cpu",
+                device="cuda",
            )
            self.client = None
        else:
--- a/tradingagents/graph/trading_graph.py
+++ b/tradingagents/graph/trading_graph.py
@ -109,9 +109,9 @@ class TradingAgentsGraph:
        elif self.config["llm_provider"].lower() == "llamacpp":
            self.deep_thinking_llm = ChatLlamaCpp(
                model_path=self.config["deep_think_llm"],
-                n_ctx=8192,
-                n_batch=512,
-                n_gpu_layers=20,
+                n_ctx=65536,
+                n_batch=1024,
+                n_gpu_layers=80,
                n_threads=multiprocessing.cpu_count() - 1,
                verbose=True,
            )