diff --git a/main.py b/main.py
index 84c90366..039fe111 100644
--- a/main.py
+++ b/main.py
@@ -10,10 +10,10 @@ config = DEFAULT_CONFIG.copy()
 config["llm_provider"] = "llamacpp"  # Use a different model
 config["backend_url"] = "http://localhost:8080/v1"  # Use a different backend
 config["deep_think_llm"] = (
-    "models/Qwen3-4B-Thinking-2507-UD-Q8_K_XL.gguf"  # Use a different model
+    "models/Llama-3.3-70B-Instruct.Q5_K_M.gguf"  # Use a different model
 )
 config["quick_think_llm"] = (
-    "models/Qwen3-4B-Thinking-2507-UD-Q8_K_XL.gguf"  # Use a different model
+    "models/Llama-3.3-70B-Instruct.Q5_K_M.gguf"  # Use a different model
 )
 config["max_debate_rounds"] = 1  # Increase debate rounds
 config["online_tools"] = True  # Increase debate rounds
diff --git a/main.slurm b/main.slurm
index 2d348927..6c34f8fb 100644
--- a/main.slurm
+++ b/main.slurm
@@ -1,13 +1,13 @@
 #!/bin/bash
 #SBATCH --job-name=tradingAgents
-#SBATCH --output=tradingAgents%j.out
-#SBATCH --error=tradingAgents%j.err
-#SBATCH --time=01:00:00
+#SBATCH --output=runs/%j/run_%j.out
+#SBATCH --error=runs/%j/run_%j.err
+#SBATCH --time=02:00:00
 #SBATCH --partition=gpu
 #SBATCH --ntasks=1
 #SBATCH --cpus-per-task=4
-#SBATCH --gpus=h100-96:2
-#SBATCH --mem=96G
+#SBATCH --gpus=h100-96:1
+#SBATCH --mem=700G
 
 # Run the script
 uv sync
diff --git a/tradingagents/agents/utils/memory.py b/tradingagents/agents/utils/memory.py
index 3751ee4f..c1430215 100644
--- a/tradingagents/agents/utils/memory.py
+++ b/tradingagents/agents/utils/memory.py
@@ -23,18 +23,18 @@ class FinancialSituationMemory:
             # instantiate the LlamaCpp embeddings wrapper
             self.embeddings_model = LlamaCppEmbeddings(
                 model_path=self.embedding,
-                n_ctx=2048,
+                n_ctx=512,
                 n_parts=-1,
                 seed=0,
                 f16_kv=True,
                 logits_all=False,
                 vocab_only=False,
                 use_mlock=False,
-                n_threads=4,
+                n_threads=16,
                 n_batch=512,
-                n_gpu_layers=0,
+                n_gpu_layers=10,
                 verbose=False,
-                device="cpu",
+                device="cuda",
             )
             self.client = None
         else:
diff --git a/tradingagents/graph/trading_graph.py b/tradingagents/graph/trading_graph.py
index 38fef3f5..05daf7b7 100644
--- a/tradingagents/graph/trading_graph.py
+++ b/tradingagents/graph/trading_graph.py
@@ -109,9 +109,9 @@ class TradingAgentsGraph:
         elif self.config["llm_provider"].lower() == "llamacpp":
             self.deep_thinking_llm = ChatLlamaCpp(
                 model_path=self.config["deep_think_llm"],
-                n_ctx=8192,
-                n_batch=512,
-                n_gpu_layers=20,
+                n_ctx=65536,
+                n_batch=1024,
+                n_gpu_layers=80,
                 n_threads=multiprocessing.cpu_count() - 1,
                 verbose=True,
             )