feat: add dynamic model fetching and CLI enhancements

Fetch latest models from provider APIs, add LM Studio support, improve provider selection UX
This commit is contained in:
MUmarJ 2026-01-16 18:54:43 -05:00
parent 4d41dd6066
commit 7e659dfddf
6 changed files with 432 additions and 28 deletions

View File

@ -1,2 +1,17 @@
# Data vendor API keys
ALPHA_VANTAGE_API_KEY=alpha_vantage_api_key_placeholder ALPHA_VANTAGE_API_KEY=alpha_vantage_api_key_placeholder
# LLM Provider API keys (set the ones you want to use)
OPENAI_API_KEY=openai_api_key_placeholder OPENAI_API_KEY=openai_api_key_placeholder
ANTHROPIC_API_KEY=anthropic_api_key_placeholder
GEMINI_API_KEY=gemini_api_key_placeholder
OPENROUTER_API_KEY=openrouter_api_key_placeholder
# Local LLM provider URLs (optional, defaults shown)
# OLLAMA_URL=http://localhost:11434
# LM_STUDIO_URL=http://localhost:1234
# Feature flags
# Set to "true" to fetch latest models from APIs and use latest web_search tool
# Set to "false" or leave unset for static model lists and web_search_preview (legacy)
FETCH_LATEST=true

100
cli/api_keys.py Normal file
View File

@ -0,0 +1,100 @@
"""API key and endpoint validation for LLM providers."""
import os
from typing import Optional, Tuple
import httpx
# Map cloud providers to their required environment variables
PROVIDER_API_KEYS = {
"openai": "OPENAI_API_KEY",
"anthropic": "ANTHROPIC_API_KEY",
"google": "GEMINI_API_KEY",
"openrouter": "OPENROUTER_API_KEY",
}
# Default endpoints for local providers
LOCAL_PROVIDER_DEFAULTS = {
"ollama": ("OLLAMA_URL", "http://localhost:11434"),
"lm studio": ("LM_STUDIO_URL", "http://localhost:1234"),
}
def get_api_key(provider: str) -> Optional[str]:
"""Get API key for a cloud provider, returns None if not set."""
provider_lower = provider.lower()
# Special case: OpenRouter can use OPENROUTER_API_KEY or OPENAI_API_KEY with sk-or- prefix
if provider_lower == "openrouter":
openrouter_key = os.getenv("OPENROUTER_API_KEY")
if openrouter_key:
return openrouter_key
# Check if OPENAI_API_KEY is actually an OpenRouter key
openai_key = os.getenv("OPENAI_API_KEY", "")
if openai_key.startswith("sk-or-"):
return openai_key
return None
env_var = PROVIDER_API_KEYS.get(provider_lower)
if env_var is None:
return None
return os.getenv(env_var)
def get_local_endpoint(provider: str) -> Optional[str]:
"""Get the endpoint URL for a local provider."""
provider_lower = provider.lower()
if provider_lower not in LOCAL_PROVIDER_DEFAULTS:
return None
env_var, default_url = LOCAL_PROVIDER_DEFAULTS[provider_lower]
return os.getenv(env_var, default_url)
def is_local_provider_running(provider: str) -> bool:
"""Check if a local provider (Ollama/LM Studio) is running by probing its endpoint."""
endpoint = get_local_endpoint(provider)
if not endpoint:
return False
try:
# Probe the models endpoint with a short timeout
response = httpx.get(
f"{endpoint}/v1/models",
timeout=1.0
)
return response.status_code == 200
except (httpx.RequestError, httpx.TimeoutException):
return False
def is_provider_available(provider: str) -> Tuple[bool, str]:
"""
Check if a provider is available.
Returns:
Tuple of (is_available, reason_if_unavailable)
"""
provider_lower = provider.lower()
# Local providers: check if endpoint is reachable
if provider_lower in LOCAL_PROVIDER_DEFAULTS:
if is_local_provider_running(provider):
return (True, "")
return (False, "Not running")
# Cloud providers: check for API key
if get_api_key(provider) is not None:
return (True, "")
return (False, "No API key")
def get_all_provider_availability() -> dict:
"""
Get availability status for all providers.
Returns:
Dict mapping provider name to (is_available, reason) tuple
"""
all_providers = list(PROVIDER_API_KEYS.keys()) + list(LOCAL_PROVIDER_DEFAULTS.keys())
return {provider: is_provider_available(provider) for provider in all_providers}

View File

@ -475,13 +475,20 @@ def get_user_selections():
) )
selected_llm_provider, backend_url = select_llm_provider() selected_llm_provider, backend_url = select_llm_provider()
# Step 6: Thinking agents # Step 6: Quick-Thinking LLM Engine
console.print( console.print(
create_question_box( create_question_box(
"Step 6: Thinking Agents", "Select your thinking agents for analysis" "Step 6: Quick-Thinking LLM Engine", "Select your quick-thinking model for fast operations"
) )
) )
selected_shallow_thinker = select_shallow_thinking_agent(selected_llm_provider) selected_shallow_thinker = select_shallow_thinking_agent(selected_llm_provider)
# Step 7: Deep-Thinking LLM Engine
console.print(
create_question_box(
"Step 7: Deep-Thinking LLM Engine", "Select your deep-thinking model for complex reasoning"
)
)
selected_deep_thinker = select_deep_thinking_agent(selected_llm_provider) selected_deep_thinker = select_deep_thinking_agent(selected_llm_provider)
return { return {

225
cli/model_fetcher.py Normal file
View File

@ -0,0 +1,225 @@
"""Dynamic model fetching from LLM provider APIs with caching."""
import os
from typing import List, Tuple, Optional
import httpx
# Cache for fetched models (provider -> list of models)
_model_cache: dict = {}
# Maximum number of models to display (None = no limit, show all)
MAX_MODELS = None
def is_fetch_latest() -> bool:
"""Check if FETCH_LATEST is enabled in environment.
When enabled, fetches models dynamically from provider APIs.
When disabled, falls back to static hardcoded model lists.
"""
return os.getenv("FETCH_LATEST", "false").lower() in ("true", "1", "yes")
def fetch_openai_models() -> Optional[List[Tuple[str, str]]]:
"""
Fetch available models from OpenAI API, sorted by creation date (newest first).
Returns:
List of (display_name, model_id) tuples, or None on failure
"""
if "openai" in _model_cache:
return _model_cache["openai"]
api_key = os.getenv("OPENAI_API_KEY")
if not api_key or api_key.startswith("sk-or-"):
return None
try:
response = httpx.get(
"https://api.openai.com/v1/models",
headers={"Authorization": f"Bearer {api_key}"},
timeout=10.0
)
response.raise_for_status()
models_data = response.json().get("data", [])
# Filter to chat/reasoning models and keep metadata for sorting
chat_models = []
for model in models_data:
model_id = model.get("id", "")
created = model.get("created", 0)
# Include GPT models and reasoning models (o-series)
if (model_id.startswith("gpt-") or
model_id.startswith("o1") or
model_id.startswith("o3") or
model_id.startswith("o4") or
model_id.startswith("o5") or
model_id.startswith("gpt-5")):
# Skip snapshot/dated versions to keep list clean
if "-20" not in model_id and "-preview" not in model_id.lower():
chat_models.append((model_id, created))
# Remove duplicates (keep highest created timestamp for each model_id)
model_dict = {}
for model_id, created in chat_models:
if model_id not in model_dict or created > model_dict[model_id]:
model_dict[model_id] = created
# Sort by created timestamp (newest first) and limit
sorted_models = sorted(model_dict.items(), key=lambda x: -x[1])[:MAX_MODELS]
result = [(model_id, model_id) for model_id, _ in sorted_models]
_model_cache["openai"] = result
return result
except Exception:
return None
def fetch_anthropic_models() -> Optional[List[Tuple[str, str]]]:
"""
Fetch available models from Anthropic API, sorted by creation date (newest first).
Returns:
List of (display_name, model_id) tuples, or None on failure
"""
if "anthropic" in _model_cache:
return _model_cache["anthropic"]
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
return None
try:
response = httpx.get(
"https://api.anthropic.com/v1/models",
headers={
"x-api-key": api_key,
"anthropic-version": "2023-06-01"
},
timeout=10.0
)
response.raise_for_status()
models_data = response.json().get("data", [])
# Filter to Claude models and keep metadata for sorting
claude_models = []
for model in models_data:
model_id = model.get("id", "")
# Anthropic API returns created_at as ISO string (RFC 3339)
created_at = model.get("created_at", "")
display_name = model.get("display_name", "")
if model_id.startswith("claude-"):
# Skip dated versions (e.g., claude-3-sonnet-20240229)
if "-20" not in model_id:
# Use display_name if available, otherwise model_id
label = display_name if display_name else model_id
claude_models.append((model_id, label, created_at))
# Remove duplicates (keep latest for each model_id)
model_dict = {}
for model_id, label, created_at in claude_models:
if model_id not in model_dict or created_at > model_dict[model_id][1]:
model_dict[model_id] = (label, created_at)
# Sort by created_at (newest first) and limit
sorted_models = sorted(model_dict.items(), key=lambda x: x[1][1], reverse=True)[:MAX_MODELS]
result = [(label, model_id) for model_id, (label, _) in sorted_models]
_model_cache["anthropic"] = result
return result
except Exception:
return None
def fetch_google_models() -> Optional[List[Tuple[str, str]]]:
"""
Fetch available models from Google Generative AI API.
Uses displayName for user-friendly labels, sorted as returned by API (typically newest first).
Returns:
List of (display_name, model_id) tuples, or None on failure
"""
if "google" in _model_cache:
return _model_cache["google"]
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
return None
try:
response = httpx.get(
f"https://generativelanguage.googleapis.com/v1/models?key={api_key}",
timeout=10.0
)
response.raise_for_status()
models_data = response.json().get("models", [])
# Filter to Gemini models that support generateContent
gemini_models = []
for model in models_data:
model_name = model.get("name", "")
display_name = model.get("displayName", "")
supported_methods = model.get("supportedGenerationMethods", [])
# Extract model ID from "models/gemini-..." format
if model_name.startswith("models/"):
model_id = model_name.replace("models/", "")
else:
model_id = model_name
# Only include Gemini models that support content generation
if model_id.startswith("gemini") and "generateContent" in supported_methods:
# Use displayName if available, otherwise model_id
label = display_name if display_name else model_id
gemini_models.append((label, model_id))
# API returns in a reasonable order, just dedupe and limit
seen = set()
unique_models = []
for label, model_id in gemini_models:
if model_id not in seen:
seen.add(model_id)
unique_models.append((label, model_id))
result = unique_models[:MAX_MODELS]
_model_cache["google"] = result
return result
except Exception:
return None
def fetch_models_for_provider(provider: str) -> Optional[List[Tuple[str, str]]]:
"""
Fetch models for a given provider.
Only fetches dynamically if FETCH_LATEST is enabled. Otherwise returns None
to trigger fallback to static model lists.
Args:
provider: Provider name (openai, anthropic, google)
Returns:
List of (display_name, model_id) tuples, or None if not supported/failed
"""
# Return None if FETCH_LATEST is not enabled - will use static lists
if not is_fetch_latest():
return None
provider_lower = provider.lower()
if provider_lower == "openai":
return fetch_openai_models()
elif provider_lower == "anthropic":
return fetch_anthropic_models()
elif provider_lower == "google":
return fetch_google_models()
return None
def clear_cache():
"""Clear the model cache."""
_model_cache.clear()

View File

@ -1,7 +1,12 @@
import questionary import questionary
from typing import List, Optional, Tuple, Dict from typing import List, Optional, Tuple, Dict
from rich.console import Console
from cli.models import AnalystType from cli.models import AnalystType
from cli.api_keys import is_provider_available
from cli.model_fetcher import fetch_models_for_provider
console = Console()
ANALYST_ORDER = [ ANALYST_ORDER = [
("Market Analyst", AnalystType.MARKET), ("Market Analyst", AnalystType.MARKET),
@ -125,7 +130,7 @@ def select_research_depth() -> int:
def select_shallow_thinking_agent(provider) -> str: def select_shallow_thinking_agent(provider) -> str:
"""Select shallow thinking llm engine using an interactive selection.""" """Select shallow thinking llm engine using an interactive selection."""
# Define shallow thinking llm engine options with their corresponding model names # Static fallback options for each provider
SHALLOW_AGENT_OPTIONS = { SHALLOW_AGENT_OPTIONS = {
"openai": [ "openai": [
("GPT-4o-mini - Fast and efficient for quick tasks", "gpt-4o-mini"), ("GPT-4o-mini - Fast and efficient for quick tasks", "gpt-4o-mini"),
@ -142,24 +147,43 @@ def select_shallow_thinking_agent(provider) -> str:
"google": [ "google": [
("Gemini 2.0 Flash-Lite - Cost efficiency and low latency", "gemini-2.0-flash-lite"), ("Gemini 2.0 Flash-Lite - Cost efficiency and low latency", "gemini-2.0-flash-lite"),
("Gemini 2.0 Flash - Next generation features, speed, and thinking", "gemini-2.0-flash"), ("Gemini 2.0 Flash - Next generation features, speed, and thinking", "gemini-2.0-flash"),
("Gemini 2.5 Flash - Adaptive thinking, cost efficiency", "gemini-2.5-flash-preview-05-20"), ("Gemini 2.5 Flash-Lite - Lightweight and cost efficient", "gemini-2.5-flash-lite"),
("Gemini 2.5 Flash - Adaptive thinking, cost efficiency", "gemini-2.5-flash"),
("Gemini 3 Flash Preview - Latest generation flash model", "gemini-3-flash-preview"),
], ],
"openrouter": [ "openrouter": [
("Xiaomi MiMo V2 Flash - Fast and efficient multimodal model", "xiaomi/mimo-v2-flash:free"),
("Meta: Llama 4 Scout", "meta-llama/llama-4-scout:free"), ("Meta: Llama 4 Scout", "meta-llama/llama-4-scout:free"),
("Meta: Llama 3.3 8B Instruct - A lightweight and ultra-fast variant of Llama 3.3 70B", "meta-llama/llama-3.3-8b-instruct:free"), ("Meta: Llama 3.3 8B Instruct - A lightweight and ultra-fast variant of Llama 3.3 70B", "meta-llama/llama-3.3-8b-instruct:free"),
("google/gemini-2.0-flash-exp:free - Gemini Flash 2.0 offers a significantly faster time to first token", "google/gemini-2.0-flash-exp:free"), ("google/gemini-2.0-flash-exp:free - Gemini Flash 2.0 offers a significantly faster time to first token", "google/gemini-2.0-flash-exp:free"),
], ],
"ollama": [ "ollama": [
("llama3.1 local", "llama3.1"), ("llama3.2:3b local", "llama3.2:3b"),
("llama3.2 local", "llama3.2"), ("phi3.5 local", "phi3.5:latest"),
],
"lm studio": [
("Local Model (default)", "local-model"),
] ]
} }
provider_lower = provider.lower()
# Try dynamic fetch for supported providers (OpenAI, Anthropic, Google)
model_options = None
if provider_lower in ["openai", "anthropic", "google"]:
dynamic_models = fetch_models_for_provider(provider_lower)
if dynamic_models:
model_options = dynamic_models
# Fall back to static list if dynamic fetch failed or not supported
if model_options is None:
model_options = SHALLOW_AGENT_OPTIONS.get(provider_lower, [])
choice = questionary.select( choice = questionary.select(
"Select Your [Quick-Thinking LLM Engine]:", "Select Your [Quick-Thinking LLM Engine]:",
choices=[ choices=[
questionary.Choice(display, value=value) questionary.Choice(display, value=value)
for display, value in SHALLOW_AGENT_OPTIONS[provider.lower()] for display, value in model_options
], ],
instruction="\n- Use arrow keys to navigate\n- Press Enter to select", instruction="\n- Use arrow keys to navigate\n- Press Enter to select",
style=questionary.Style( style=questionary.Style(
@ -183,7 +207,7 @@ def select_shallow_thinking_agent(provider) -> str:
def select_deep_thinking_agent(provider) -> str: def select_deep_thinking_agent(provider) -> str:
"""Select deep thinking llm engine using an interactive selection.""" """Select deep thinking llm engine using an interactive selection."""
# Define deep thinking llm engine options with their corresponding model names # Static fallback options for each provider
DEEP_AGENT_OPTIONS = { DEEP_AGENT_OPTIONS = {
"openai": [ "openai": [
("GPT-4.1-nano - Ultra-lightweight model for basic operations", "gpt-4.1-nano"), ("GPT-4.1-nano - Ultra-lightweight model for basic operations", "gpt-4.1-nano"),
@ -199,29 +223,47 @@ def select_deep_thinking_agent(provider) -> str:
("Claude Sonnet 3.5 - Highly capable standard model", "claude-3-5-sonnet-latest"), ("Claude Sonnet 3.5 - Highly capable standard model", "claude-3-5-sonnet-latest"),
("Claude Sonnet 3.7 - Exceptional hybrid reasoning and agentic capabilities", "claude-3-7-sonnet-latest"), ("Claude Sonnet 3.7 - Exceptional hybrid reasoning and agentic capabilities", "claude-3-7-sonnet-latest"),
("Claude Sonnet 4 - High performance and excellent reasoning", "claude-sonnet-4-0"), ("Claude Sonnet 4 - High performance and excellent reasoning", "claude-sonnet-4-0"),
("Claude Opus 4 - Most powerful Anthropic model", " claude-opus-4-0"), ("Claude Opus 4 - Most powerful Anthropic model", "claude-opus-4-0"),
], ],
"google": [ "google": [
("Gemini 2.0 Flash-Lite - Cost efficiency and low latency", "gemini-2.0-flash-lite"), ("Gemini 2.0 Flash-Lite - Cost efficiency and low latency", "gemini-2.0-flash-lite"),
("Gemini 2.0 Flash - Next generation features, speed, and thinking", "gemini-2.0-flash"), ("Gemini 2.0 Flash - Next generation features, speed, and thinking", "gemini-2.0-flash"),
("Gemini 2.5 Flash - Adaptive thinking, cost efficiency", "gemini-2.5-flash-preview-05-20"), ("Gemini 2.5 Flash-Lite - Lightweight and cost efficient", "gemini-2.5-flash-lite"),
("Gemini 2.5 Pro", "gemini-2.5-pro-preview-06-05"), ("Gemini 2.5 Flash - Adaptive thinking, cost efficiency", "gemini-2.5-flash"),
("Gemini 3 Flash Preview - Latest generation flash model", "gemini-3-flash-preview"),
], ],
"openrouter": [ "openrouter": [
("Xiaomi MiMo V2 Flash - Fast and efficient multimodal model", "xiaomi/mimo-v2-flash:free"),
("DeepSeek V3 - a 685B-parameter, mixture-of-experts model", "deepseek/deepseek-chat-v3-0324:free"), ("DeepSeek V3 - a 685B-parameter, mixture-of-experts model", "deepseek/deepseek-chat-v3-0324:free"),
("Deepseek - latest iteration of the flagship chat model family from the DeepSeek team.", "deepseek/deepseek-chat-v3-0324:free"), ("Deepseek - latest iteration of the flagship chat model family from the DeepSeek team.", "deepseek/deepseek-chat-v3-0324:free"),
], ],
"ollama": [ "ollama": [
("llama3.1 local", "llama3.1"), ("llama3.2:3b local", "llama3.2:3b"),
("qwen3", "qwen3"), ("phi3.5 local", "phi3.5:latest"),
],
"lm studio": [
("Local Model (default)", "local-model"),
] ]
} }
provider_lower = provider.lower()
# Try dynamic fetch for supported providers (OpenAI, Anthropic, Google)
model_options = None
if provider_lower in ["openai", "anthropic", "google"]:
dynamic_models = fetch_models_for_provider(provider_lower)
if dynamic_models:
model_options = dynamic_models
# Fall back to static list if dynamic fetch failed or not supported
if model_options is None:
model_options = DEEP_AGENT_OPTIONS.get(provider_lower, [])
choice = questionary.select( choice = questionary.select(
"Select Your [Deep-Thinking LLM Engine]:", "Select Your [Deep-Thinking LLM Engine]:",
choices=[ choices=[
questionary.Choice(display, value=value) questionary.Choice(display, value=value)
for display, value in DEEP_AGENT_OPTIONS[provider.lower()] for display, value in model_options
], ],
instruction="\n- Use arrow keys to navigate\n- Press Enter to select", instruction="\n- Use arrow keys to navigate\n- Press Enter to select",
style=questionary.Style( style=questionary.Style(
@ -240,22 +282,35 @@ def select_deep_thinking_agent(provider) -> str:
return choice return choice
def select_llm_provider() -> tuple[str, str]: def select_llm_provider() -> tuple[str, str]:
"""Select the OpenAI api url using interactive selection.""" """Select the LLM provider using interactive selection with availability checks."""
# Define OpenAI api options with their corresponding endpoints # Define provider options with their corresponding endpoints
BASE_URLS = [ BASE_URLS = [
("OpenAI", "https://api.openai.com/v1"), ("OpenAI", "https://api.openai.com/v1"),
("Anthropic", "https://api.anthropic.com/"), ("Anthropic", "https://api.anthropic.com/"),
("Google", "https://generativelanguage.googleapis.com/v1"), ("Google", "https://generativelanguage.googleapis.com/v1"),
("Openrouter", "https://openrouter.ai/api/v1"), ("Openrouter", "https://openrouter.ai/api/v1"),
("Ollama", "http://localhost:11434/v1"), ("Ollama", "http://localhost:11434/v1"),
("LM Studio", "http://localhost:1234/v1"),
] ]
# Build choices with availability status
choices = []
for display, url in BASE_URLS:
available, reason = is_provider_available(display)
if available:
choices.append(questionary.Choice(display, value=(display, url)))
else:
# Show disabled option with reason
disabled_label = f"{display} ({reason})"
choices.append(questionary.Choice(
disabled_label,
value=(display, url),
disabled=reason
))
choice = questionary.select( choice = questionary.select(
"Select your LLM Provider:", "Select your LLM Provider:",
choices=[ choices=choices,
questionary.Choice(display, value=(display, value))
for display, value in BASE_URLS
],
instruction="\n- Use arrow keys to navigate\n- Press Enter to select", instruction="\n- Use arrow keys to navigate\n- Press Enter to select",
style=questionary.Style( style=questionary.Style(
[ [
@ -267,7 +322,7 @@ def select_llm_provider() -> tuple[str, str]:
).ask() ).ask()
if choice is None: if choice is None:
console.print("\n[red]no OpenAI backend selected. Exiting...[/red]") console.print("\n[red]No LLM provider selected. Exiting...[/red]")
exit(1) exit(1)
display_name, url = choice display_name, url = choice

View File

@ -24,3 +24,5 @@ rich
questionary questionary
langchain_anthropic langchain_anthropic
langchain-google-genai langchain-google-genai
playwright
markdown2