Add support for NVIDIA LLM provider and update configuration settings

2026-03-24 14:22:48 +00:00 · 2026-03-24 14:22:48 +00:00 · 1f92c3d8cd
parent d880468047
commit 1f92c3d8cd
4 changed files with 70 additions and 78 deletions
--- a/cli/utils.py
+++ b/cli/utils.py
@ -10,6 +10,49 @@ ANALYST_ORDER = [
    ("Fundamentals Analyst", AnalystType.FUNDAMENTALS),
 ]

+AGENT_OPTIONS = {
+        "openai": [
+            ("GPT-4o-mini - Fast and efficient for quick tasks", "gpt-4o-mini"),
+            ("GPT-4.1-nano - Ultra-lightweight model for basic operations", "gpt-4.1-nano"),
+            ("GPT-4.1-mini - Compact model with good performance", "gpt-4.1-mini"),
+            ("GPT-4o - Standard model with solid capabilities", "gpt-4o"),
+        ],
+        "anthropic": [
+            ("Claude Haiku 3.5 - Fast inference and standard capabilities", "claude-3-5-haiku-latest"),
+            ("Claude Sonnet 3.5 - Highly capable standard model", "claude-3-5-sonnet-latest"),
+            ("Claude Sonnet 3.7 - Exceptional hybrid reasoning and agentic capabilities", "claude-3-7-sonnet-latest"),
+            ("Claude Sonnet 4 - High performance and excellent reasoning", "claude-sonnet-4-0"),
+            ("CCR", "openai/gpt-oss-20b"),
+        ],
+        "google": [
+            ("Gemini 2.0 Flash-Lite - Cost efficiency and low latency", "gemini-2.0-flash-lite"),
+            ("Gemini 2.0 Flash - Next generation features, speed, and thinking", "gemini-2.0-flash"),
+            ("Gemini 2.5 Flash - Adaptive thinking, cost efficiency", "gemini-2.5-flash-preview-05-20"),
+        ],
+        "openrouter": [
+            ("Meta: Llama 4 Scout", "meta-llama/llama-4-scout:free"),
+            ("Meta: Llama 3.3 8B Instruct - A lightweight and ultra-fast variant of Llama 3.3 70B", "meta-llama/llama-3.3-8b-instruct:free"),
+            ("google/gemini-2.0-flash-exp:free - Gemini Flash 2.0 offers a significantly faster time to first token", "google/gemini-2.0-flash-exp:free"),
+        ],
+        "ollama": [
+            ("llama3.1 local", "llama3.1"),
+            ("llama3.2 local", "llama3.2"),
+        ],
+        "lmstudio": [
+            ("LMStudio Qwen 4b Thinking","qwen/qwen3-4b-thinking-2507"),
+            ("LMStudio GLM 4.6v", "zai-org/glm-4.6v-flash"),
+            ("LMStudio GLM 4.7", "glm-4.7-reap-218b-a32b"),
+            ("LMStudio OSS 20b","openai/gpt-oss-20b"),
+            ("LMStudio Minimax 2.1","minimax-m2.1-reap-30-mlx"),
+        ],
+        "nvidia": [
+            ("MiniMax M2.1","minimaxai/minimax-m2.1"),
+            ("Moonshot K2", "moonshotai/kimi-k2-thinking"),
+            ("Z.AI GLM4.7","z-ai/glm4.7"),
+            ("Deepseek v3.2","deepseek-ai/deepseek-v3.2"),
+        ],        
+    }
+

 def get_ticker() -> str:
    """Prompt the user to enter a ticker symbol."""
@ -126,41 +169,8 @@ def select_shallow_thinking_agent(provider) -> str:
    """Select shallow thinking llm engine using an interactive selection."""

    # Define shallow thinking llm engine options with their corresponding model names
-    SHALLOW_AGENT_OPTIONS = {
-        "openai": [
-            ("GPT-4o-mini - Fast and efficient for quick tasks", "gpt-4o-mini"),
-            ("GPT-4.1-nano - Ultra-lightweight model for basic operations", "gpt-4.1-nano"),
-            ("GPT-4.1-mini - Compact model with good performance", "gpt-4.1-mini"),
-            ("GPT-4o - Standard model with solid capabilities", "gpt-4o"),
-        ],
-        "anthropic": [
-            ("Claude Haiku 3.5 - Fast inference and standard capabilities", "claude-3-5-haiku-latest"),
-            ("Claude Sonnet 3.5 - Highly capable standard model", "claude-3-5-sonnet-latest"),
-            ("Claude Sonnet 3.7 - Exceptional hybrid reasoning and agentic capabilities", "claude-3-7-sonnet-latest"),
-            ("Claude Sonnet 4 - High performance and excellent reasoning", "claude-sonnet-4-0"),
-            ("CCR", "openai/gpt-oss-20b"),
-        ],
-        "google": [
-            ("Gemini 2.0 Flash-Lite - Cost efficiency and low latency", "gemini-2.0-flash-lite"),
-            ("Gemini 2.0 Flash - Next generation features, speed, and thinking", "gemini-2.0-flash"),
-            ("Gemini 2.5 Flash - Adaptive thinking, cost efficiency", "gemini-2.5-flash-preview-05-20"),
-        ],
-        "openrouter": [
-            ("Meta: Llama 4 Scout", "meta-llama/llama-4-scout:free"),
-            ("Meta: Llama 3.3 8B Instruct - A lightweight and ultra-fast variant of Llama 3.3 70B", "meta-llama/llama-3.3-8b-instruct:free"),
-            ("google/gemini-2.0-flash-exp:free - Gemini Flash 2.0 offers a significantly faster time to first token", "google/gemini-2.0-flash-exp:free"),
-        ],
-        "ollama": [
-            ("llama3.1 local", "llama3.1"),
-            ("llama3.2 local", "llama3.2"),
-        ],
-        "lmstudio": [
-            ("LMStudio Qwen 4b Thinking","qwen/qwen3-4b-thinking-2507"),
-            ("LMStudio GLM 4.6v", "zai-org/glm-4.6v-flash"),
-            ("LMStudio OSS 20b","openai/gpt-oss-20b"),
-            ("LMStudio Minimax 2.1","minimax-m2.1-reap-30-mlx"),
-        ]
-    }
+    SHALLOW_AGENT_OPTIONS = AGENT_OPTIONS
+    

    choice = questionary.select(
        "Select Your [Quick-Thinking LLM Engine]:",
@ -191,44 +201,7 @@ def select_deep_thinking_agent(provider) -> str:
    """Select deep thinking llm engine using an interactive selection."""

    # Define deep thinking llm engine options with their corresponding model names
-    DEEP_AGENT_OPTIONS = {
-        "openai": [
-            ("GPT-4.1-nano - Ultra-lightweight model for basic operations", "gpt-4.1-nano"),
-            ("GPT-4.1-mini - Compact model with good performance", "gpt-4.1-mini"),
-            ("GPT-4o - Standard model with solid capabilities", "gpt-4o"),
-            ("o4-mini - Specialized reasoning model (compact)", "o4-mini"),
-            ("o3-mini - Advanced reasoning model (lightweight)", "o3-mini"),
-            ("o3 - Full advanced reasoning model", "o3"),
-            ("o1 - Premier reasoning and problem-solving model", "o1"),
-        ],
-        "anthropic": [
-            ("Claude Haiku 3.5 - Fast inference and standard capabilities", "claude-3-5-haiku-latest"),
-            ("Claude Sonnet 3.5 - Highly capable standard model", "claude-3-5-sonnet-latest"),
-            ("Claude Sonnet 3.7 - Exceptional hybrid reasoning and agentic capabilities", "claude-3-7-sonnet-latest"),
-            ("Claude Sonnet 4 - High performance and excellent reasoning", "claude-sonnet-4-0"),
-            ("Claude Opus 4 - Most powerful Anthropic model", "	claude-opus-4-0"),
-        ],
-        "google": [
-            ("Gemini 2.0 Flash-Lite - Cost efficiency and low latency", "gemini-2.0-flash-lite"),
-            ("Gemini 2.0 Flash - Next generation features, speed, and thinking", "gemini-2.0-flash"),
-            ("Gemini 2.5 Flash - Adaptive thinking, cost efficiency", "gemini-2.5-flash-preview-05-20"),
-            ("Gemini 2.5 Pro", "gemini-2.5-pro-preview-06-05"),
-        ],
-        "openrouter": [
-            ("DeepSeek V3 - a 685B-parameter, mixture-of-experts model", "deepseek/deepseek-chat-v3-0324:free"),
-            ("Deepseek - latest iteration of the flagship chat model family from the DeepSeek team.", "deepseek/deepseek-chat-v3-0324:free"),
-        ],
-        "ollama": [
-            ("llama3.1 local", "llama3.1"),
-            ("qwen3", "qwen3"),
-        ],
-        "lmstudio": [
-            ("LMStudio Qwen 4b Thinking","qwen/qwen3-4b-thinking-2507"),
-            ("LMStudio GLM 4.6v", "zai-org/glm-4.6v-flash"),
-            ("LMStudio OSS 20b","openai/gpt-oss-20b"),
-            ("LMStudio Minimax 2.1","minimax-m2.1-reap-30-mlx"),
-        ]
-    }
+    DEEP_AGENT_OPTIONS = AGENT_OPTIONS
    
    choice = questionary.select(
        "Select Your [Deep-Thinking LLM Engine]:",
@ -257,12 +230,12 @@ def select_llm_provider() -> tuple[str, str]:
    # Define OpenAI api options with their corresponding endpoints
    BASE_URLS = [
        ("LMStudio", "http://192.168.0.20:1234/v1"),
-        ("OpenAI Local", "http://192.168.0.20:1234/v1"),
        ("OpenAI", "https://api.openai.com/v1"),
        ("Anthropic", "https://api.anthropic.com/"),
        ("Google", "https://generativelanguage.googleapis.com/v1"),
        ("Openrouter", "https://openrouter.ai/api/v1"),
        ("Ollama", "http://localhost:11434/v1"),
+        ("Nvidia","https://integrate.api.nvidia.com"),
    ]
    
    choice = questionary.select(
--- a/tradingagents/agents/utils/memory.py
+++ b/tradingagents/agents/utils/memory.py
@ -14,12 +14,21 @@ class FinancialSituationMemory:
            self.embedding = "nomic-embed-text"
        elif config["backend_url"] == "http://192.168.0.20:1234/v1":
            self.embedding = "text-embedding-nomic-embed-text-v2-moe"
+        elif config["backend_url"] == "http://192.168.0.20:1234/v1":
+            self.embedding = "text-embedding-nomic-embed-text-v2-moe"            
        else:
            self.embedding = "text-embedding-nomic-embed-text-v2-moe"
                
-        self.client = OpenAI(base_url=config["backend_url"])
+        self.client = OpenAI(base_url=config["local_url"])
        self.chroma_client = chromadb.Client(Settings(allow_reset=True))
        self.situation_collection = self.chroma_client.create_collection(name=name)
+        
+        """ If nvidia is selected as the LLM """
+        if config["llm_provider"].lower() == "nvidia" :
+            #self.embedding = "nvidia/embed-qa-4"
+            #self.client = OpenAI(base_url=config["nvidia_backend_url"])
+            self.embedding = "text-embedding-nomic-embed-text-v2-moe"
+            self.client = OpenAI(base_url=config["local_url"])    

    def get_embedding(self, text):
        """Get OpenAI embedding for a text"""
--- a/tradingagents/default_config.py
+++ b/tradingagents/default_config.py
@ -3,7 +3,8 @@ import os
 DEFAULT_CONFIG = {
    "project_dir": os.path.abspath(os.path.join(os.path.dirname(__file__), ".")),
    "results_dir": os.getenv("TRADINGAGENTS_RESULTS_DIR", "./results"),
-    "data_dir": "/Users/yluo/Documents/Code/ScAI/FR1-data",
+    #"data_dir": "/Users/yluo/Documents/Code/ScAI/FR1-data",
+    "data_dir" : os.path.join("project_dir","data",),
    "data_cache_dir": os.path.join(
        os.path.abspath(os.path.join(os.path.dirname(__file__), ".")),
        "dataflows/data_cache",
@ -16,7 +17,7 @@ DEFAULT_CONFIG = {
    # Debate and discussion settings
    "max_debate_rounds": 1,
    "max_risk_discuss_rounds": 1,
-    "max_recur_limit": 100,
+    "max_recur_limit": 500, #100
    # Data vendor configuration
    # Category-level configuration (default for all tools in category)
    
@ -24,8 +25,14 @@ DEFAULT_CONFIG = {
    "llm_provider": "lmstudio",
    "deep_think_llm": "glm-4.7-reap-50-mixed-3-4-bits",
    "quick_think_llm": "qwen/qwen3-vl-30b",
-    "backend_url": "http://192.168.0.20/v1",
+    "backend_url": "http://192.168.0.20:1234/v1",
    "api_key": "blablabla",
+
+    "local_url": "http://192.168.0.20:1234/v1",
+    "ollama_url":"http://192.168.0.20:11434/v1",
+
+    "nvidia_backend_url" : "https://integrate.api.nvidia.com/v1",
+    "nvidia_api_key" : "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",
    
    
    "data_vendors": {
--- a/tradingagents/graph/trading_graph.py
+++ b/tradingagents/graph/trading_graph.py
@ -75,6 +75,9 @@ class TradingAgentsGraph:
        if self.config["llm_provider"].lower() == "openai" or self.config["llm_provider"] == "ollama" or self.config["llm_provider"] == "lmstudio" or self.config["llm_provider"] == "openrouter":
            self.deep_thinking_llm = ChatOpenAI(model=self.config["deep_think_llm"], base_url=self.config["backend_url"],api_key=self.config["api_key"])
            self.quick_thinking_llm = ChatOpenAI(model=self.config["quick_think_llm"], base_url=self.config["backend_url"],api_key=self.config["api_key"])
+        elif self.config["llm_provider"].lower() == "nvidia" :
+            self.deep_thinking_llm = ChatOpenAI(model=self.config["deep_think_llm"], base_url=self.config["nvidia_backend_url"],api_key=self.config["nvidia_api_key"])
+            self.quick_thinking_llm = ChatOpenAI(model=self.config["quick_think_llm"], base_url=self.config["nvidia_backend_url"],api_key=self.config["nvidia_api_key"])        
        elif self.config["llm_provider"].lower() == "anthropic":
            self.deep_thinking_llm = ChatAnthropic(model=self.config["deep_think_llm"], base_url=self.config["backend_url"])
            self.quick_thinking_llm = ChatAnthropic(model=self.config["quick_think_llm"], base_url=self.config["backend_url"])