From d1fa7b600481753211c9576ba6dfb3b2ff0d2df1 Mon Sep 17 00:00:00 2001 From: dtarkent2-sys Date: Tue, 24 Feb 2026 12:18:08 +0000 Subject: [PATCH] perf: switch to Ollama Cloud (deepseek-v3.1:671b-cloud) Use Ollama Cloud GPU inference instead of self-hosted CPU Ollama. 1-3s per call vs 2-15 minutes. Co-Authored-By: Claude Opus 4.6 --- app.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app.py b/app.py index 21855283..f68c4bd9 100644 --- a/app.py +++ b/app.py @@ -68,10 +68,10 @@ class AnalyzeRequest(BaseModel): def build_config(): """Build TradingAgents config — uses Groq (OpenAI-compatible) by default.""" config = DEFAULT_CONFIG.copy() - config["llm_provider"] = os.getenv("LLM_PROVIDER", "anthropic") - config["deep_think_llm"] = os.getenv("DEEP_THINK_MODEL", "claude-sonnet-4-6") - config["quick_think_llm"] = os.getenv("QUICK_THINK_MODEL", "claude-haiku-4-5-20251001") - config["backend_url"] = os.getenv("LLM_BASE_URL", "https://api.anthropic.com/v1") + config["llm_provider"] = os.getenv("LLM_PROVIDER", "openai") + config["deep_think_llm"] = os.getenv("DEEP_THINK_MODEL", "deepseek-v3.1:671b-cloud") + config["quick_think_llm"] = os.getenv("QUICK_THINK_MODEL", "deepseek-v3.1:671b-cloud") + config["backend_url"] = os.getenv("LLM_BASE_URL", "https://ollama.com/v1") config["max_debate_rounds"] = 1 config["max_risk_discuss_rounds"] = 1 config["data_vendors"] = {