Add support for NVIDIA LLM provider and update configuration settings

This commit is contained in:
cemreaytekin 2026-03-24 14:22:48 +00:00
parent d880468047
commit 1f92c3d8cd
4 changed files with 70 additions and 78 deletions

View File

@ -10,6 +10,49 @@ ANALYST_ORDER = [
("Fundamentals Analyst", AnalystType.FUNDAMENTALS),
]
AGENT_OPTIONS = {
"openai": [
("GPT-4o-mini - Fast and efficient for quick tasks", "gpt-4o-mini"),
("GPT-4.1-nano - Ultra-lightweight model for basic operations", "gpt-4.1-nano"),
("GPT-4.1-mini - Compact model with good performance", "gpt-4.1-mini"),
("GPT-4o - Standard model with solid capabilities", "gpt-4o"),
],
"anthropic": [
("Claude Haiku 3.5 - Fast inference and standard capabilities", "claude-3-5-haiku-latest"),
("Claude Sonnet 3.5 - Highly capable standard model", "claude-3-5-sonnet-latest"),
("Claude Sonnet 3.7 - Exceptional hybrid reasoning and agentic capabilities", "claude-3-7-sonnet-latest"),
("Claude Sonnet 4 - High performance and excellent reasoning", "claude-sonnet-4-0"),
("CCR", "openai/gpt-oss-20b"),
],
"google": [
("Gemini 2.0 Flash-Lite - Cost efficiency and low latency", "gemini-2.0-flash-lite"),
("Gemini 2.0 Flash - Next generation features, speed, and thinking", "gemini-2.0-flash"),
("Gemini 2.5 Flash - Adaptive thinking, cost efficiency", "gemini-2.5-flash-preview-05-20"),
],
"openrouter": [
("Meta: Llama 4 Scout", "meta-llama/llama-4-scout:free"),
("Meta: Llama 3.3 8B Instruct - A lightweight and ultra-fast variant of Llama 3.3 70B", "meta-llama/llama-3.3-8b-instruct:free"),
("google/gemini-2.0-flash-exp:free - Gemini Flash 2.0 offers a significantly faster time to first token", "google/gemini-2.0-flash-exp:free"),
],
"ollama": [
("llama3.1 local", "llama3.1"),
("llama3.2 local", "llama3.2"),
],
"lmstudio": [
("LMStudio Qwen 4b Thinking","qwen/qwen3-4b-thinking-2507"),
("LMStudio GLM 4.6v", "zai-org/glm-4.6v-flash"),
("LMStudio GLM 4.7", "glm-4.7-reap-218b-a32b"),
("LMStudio OSS 20b","openai/gpt-oss-20b"),
("LMStudio Minimax 2.1","minimax-m2.1-reap-30-mlx"),
],
"nvidia": [
("MiniMax M2.1","minimaxai/minimax-m2.1"),
("Moonshot K2", "moonshotai/kimi-k2-thinking"),
("Z.AI GLM4.7","z-ai/glm4.7"),
("Deepseek v3.2","deepseek-ai/deepseek-v3.2"),
],
}
def get_ticker() -> str:
"""Prompt the user to enter a ticker symbol."""
@ -126,41 +169,8 @@ def select_shallow_thinking_agent(provider) -> str:
"""Select shallow thinking llm engine using an interactive selection."""
# Define shallow thinking llm engine options with their corresponding model names
SHALLOW_AGENT_OPTIONS = {
"openai": [
("GPT-4o-mini - Fast and efficient for quick tasks", "gpt-4o-mini"),
("GPT-4.1-nano - Ultra-lightweight model for basic operations", "gpt-4.1-nano"),
("GPT-4.1-mini - Compact model with good performance", "gpt-4.1-mini"),
("GPT-4o - Standard model with solid capabilities", "gpt-4o"),
],
"anthropic": [
("Claude Haiku 3.5 - Fast inference and standard capabilities", "claude-3-5-haiku-latest"),
("Claude Sonnet 3.5 - Highly capable standard model", "claude-3-5-sonnet-latest"),
("Claude Sonnet 3.7 - Exceptional hybrid reasoning and agentic capabilities", "claude-3-7-sonnet-latest"),
("Claude Sonnet 4 - High performance and excellent reasoning", "claude-sonnet-4-0"),
("CCR", "openai/gpt-oss-20b"),
],
"google": [
("Gemini 2.0 Flash-Lite - Cost efficiency and low latency", "gemini-2.0-flash-lite"),
("Gemini 2.0 Flash - Next generation features, speed, and thinking", "gemini-2.0-flash"),
("Gemini 2.5 Flash - Adaptive thinking, cost efficiency", "gemini-2.5-flash-preview-05-20"),
],
"openrouter": [
("Meta: Llama 4 Scout", "meta-llama/llama-4-scout:free"),
("Meta: Llama 3.3 8B Instruct - A lightweight and ultra-fast variant of Llama 3.3 70B", "meta-llama/llama-3.3-8b-instruct:free"),
("google/gemini-2.0-flash-exp:free - Gemini Flash 2.0 offers a significantly faster time to first token", "google/gemini-2.0-flash-exp:free"),
],
"ollama": [
("llama3.1 local", "llama3.1"),
("llama3.2 local", "llama3.2"),
],
"lmstudio": [
("LMStudio Qwen 4b Thinking","qwen/qwen3-4b-thinking-2507"),
("LMStudio GLM 4.6v", "zai-org/glm-4.6v-flash"),
("LMStudio OSS 20b","openai/gpt-oss-20b"),
("LMStudio Minimax 2.1","minimax-m2.1-reap-30-mlx"),
]
}
SHALLOW_AGENT_OPTIONS = AGENT_OPTIONS
choice = questionary.select(
"Select Your [Quick-Thinking LLM Engine]:",
@ -191,44 +201,7 @@ def select_deep_thinking_agent(provider) -> str:
"""Select deep thinking llm engine using an interactive selection."""
# Define deep thinking llm engine options with their corresponding model names
DEEP_AGENT_OPTIONS = {
"openai": [
("GPT-4.1-nano - Ultra-lightweight model for basic operations", "gpt-4.1-nano"),
("GPT-4.1-mini - Compact model with good performance", "gpt-4.1-mini"),
("GPT-4o - Standard model with solid capabilities", "gpt-4o"),
("o4-mini - Specialized reasoning model (compact)", "o4-mini"),
("o3-mini - Advanced reasoning model (lightweight)", "o3-mini"),
("o3 - Full advanced reasoning model", "o3"),
("o1 - Premier reasoning and problem-solving model", "o1"),
],
"anthropic": [
("Claude Haiku 3.5 - Fast inference and standard capabilities", "claude-3-5-haiku-latest"),
("Claude Sonnet 3.5 - Highly capable standard model", "claude-3-5-sonnet-latest"),
("Claude Sonnet 3.7 - Exceptional hybrid reasoning and agentic capabilities", "claude-3-7-sonnet-latest"),
("Claude Sonnet 4 - High performance and excellent reasoning", "claude-sonnet-4-0"),
("Claude Opus 4 - Most powerful Anthropic model", " claude-opus-4-0"),
],
"google": [
("Gemini 2.0 Flash-Lite - Cost efficiency and low latency", "gemini-2.0-flash-lite"),
("Gemini 2.0 Flash - Next generation features, speed, and thinking", "gemini-2.0-flash"),
("Gemini 2.5 Flash - Adaptive thinking, cost efficiency", "gemini-2.5-flash-preview-05-20"),
("Gemini 2.5 Pro", "gemini-2.5-pro-preview-06-05"),
],
"openrouter": [
("DeepSeek V3 - a 685B-parameter, mixture-of-experts model", "deepseek/deepseek-chat-v3-0324:free"),
("Deepseek - latest iteration of the flagship chat model family from the DeepSeek team.", "deepseek/deepseek-chat-v3-0324:free"),
],
"ollama": [
("llama3.1 local", "llama3.1"),
("qwen3", "qwen3"),
],
"lmstudio": [
("LMStudio Qwen 4b Thinking","qwen/qwen3-4b-thinking-2507"),
("LMStudio GLM 4.6v", "zai-org/glm-4.6v-flash"),
("LMStudio OSS 20b","openai/gpt-oss-20b"),
("LMStudio Minimax 2.1","minimax-m2.1-reap-30-mlx"),
]
}
DEEP_AGENT_OPTIONS = AGENT_OPTIONS
choice = questionary.select(
"Select Your [Deep-Thinking LLM Engine]:",
@ -257,12 +230,12 @@ def select_llm_provider() -> tuple[str, str]:
# Define OpenAI api options with their corresponding endpoints
BASE_URLS = [
("LMStudio", "http://192.168.0.20:1234/v1"),
("OpenAI Local", "http://192.168.0.20:1234/v1"),
("OpenAI", "https://api.openai.com/v1"),
("Anthropic", "https://api.anthropic.com/"),
("Google", "https://generativelanguage.googleapis.com/v1"),
("Openrouter", "https://openrouter.ai/api/v1"),
("Ollama", "http://localhost:11434/v1"),
("Nvidia","https://integrate.api.nvidia.com"),
]
choice = questionary.select(

View File

@ -14,12 +14,21 @@ class FinancialSituationMemory:
self.embedding = "nomic-embed-text"
elif config["backend_url"] == "http://192.168.0.20:1234/v1":
self.embedding = "text-embedding-nomic-embed-text-v2-moe"
elif config["backend_url"] == "http://192.168.0.20:1234/v1":
self.embedding = "text-embedding-nomic-embed-text-v2-moe"
else:
self.embedding = "text-embedding-nomic-embed-text-v2-moe"
self.client = OpenAI(base_url=config["backend_url"])
self.client = OpenAI(base_url=config["local_url"])
self.chroma_client = chromadb.Client(Settings(allow_reset=True))
self.situation_collection = self.chroma_client.create_collection(name=name)
""" If nvidia is selected as the LLM """
if config["llm_provider"].lower() == "nvidia" :
#self.embedding = "nvidia/embed-qa-4"
#self.client = OpenAI(base_url=config["nvidia_backend_url"])
self.embedding = "text-embedding-nomic-embed-text-v2-moe"
self.client = OpenAI(base_url=config["local_url"])
def get_embedding(self, text):
"""Get OpenAI embedding for a text"""

View File

@ -3,7 +3,8 @@ import os
DEFAULT_CONFIG = {
"project_dir": os.path.abspath(os.path.join(os.path.dirname(__file__), ".")),
"results_dir": os.getenv("TRADINGAGENTS_RESULTS_DIR", "./results"),
"data_dir": "/Users/yluo/Documents/Code/ScAI/FR1-data",
#"data_dir": "/Users/yluo/Documents/Code/ScAI/FR1-data",
"data_dir" : os.path.join("project_dir","data",),
"data_cache_dir": os.path.join(
os.path.abspath(os.path.join(os.path.dirname(__file__), ".")),
"dataflows/data_cache",
@ -16,7 +17,7 @@ DEFAULT_CONFIG = {
# Debate and discussion settings
"max_debate_rounds": 1,
"max_risk_discuss_rounds": 1,
"max_recur_limit": 100,
"max_recur_limit": 500, #100
# Data vendor configuration
# Category-level configuration (default for all tools in category)
@ -24,8 +25,14 @@ DEFAULT_CONFIG = {
"llm_provider": "lmstudio",
"deep_think_llm": "glm-4.7-reap-50-mixed-3-4-bits",
"quick_think_llm": "qwen/qwen3-vl-30b",
"backend_url": "http://192.168.0.20/v1",
"backend_url": "http://192.168.0.20:1234/v1",
"api_key": "blablabla",
"local_url": "http://192.168.0.20:1234/v1",
"ollama_url":"http://192.168.0.20:11434/v1",
"nvidia_backend_url" : "https://integrate.api.nvidia.com/v1",
"nvidia_api_key" : "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",
"data_vendors": {

View File

@ -75,6 +75,9 @@ class TradingAgentsGraph:
if self.config["llm_provider"].lower() == "openai" or self.config["llm_provider"] == "ollama" or self.config["llm_provider"] == "lmstudio" or self.config["llm_provider"] == "openrouter":
self.deep_thinking_llm = ChatOpenAI(model=self.config["deep_think_llm"], base_url=self.config["backend_url"],api_key=self.config["api_key"])
self.quick_thinking_llm = ChatOpenAI(model=self.config["quick_think_llm"], base_url=self.config["backend_url"],api_key=self.config["api_key"])
elif self.config["llm_provider"].lower() == "nvidia" :
self.deep_thinking_llm = ChatOpenAI(model=self.config["deep_think_llm"], base_url=self.config["nvidia_backend_url"],api_key=self.config["nvidia_api_key"])
self.quick_thinking_llm = ChatOpenAI(model=self.config["quick_think_llm"], base_url=self.config["nvidia_backend_url"],api_key=self.config["nvidia_api_key"])
elif self.config["llm_provider"].lower() == "anthropic":
self.deep_thinking_llm = ChatAnthropic(model=self.config["deep_think_llm"], base_url=self.config["backend_url"])
self.quick_thinking_llm = ChatAnthropic(model=self.config["quick_think_llm"], base_url=self.config["backend_url"])