feat: add dynamic model fetching and CLI enhancements

Fetch latest models from provider APIs, add LM Studio support, improve provider selection UX
2026-01-16 18:54:43 -05:00 · 2026-01-16 18:54:43 -05:00 · 7e659dfddf
parent 4d41dd6066
commit 7e659dfddf
6 changed files with 432 additions and 28 deletions
--- a/.env.example
+++ b/.env.example
@ -1,2 +1,17 @@
+# Data vendor API keys
 ALPHA_VANTAGE_API_KEY=alpha_vantage_api_key_placeholder
-OPENAI_API_KEY=openai_api_key_placeholder
+
+# LLM Provider API keys (set the ones you want to use)
+OPENAI_API_KEY=openai_api_key_placeholder
+ANTHROPIC_API_KEY=anthropic_api_key_placeholder
+GEMINI_API_KEY=gemini_api_key_placeholder
+OPENROUTER_API_KEY=openrouter_api_key_placeholder
+
+# Local LLM provider URLs (optional, defaults shown)
+# OLLAMA_URL=http://localhost:11434
+# LM_STUDIO_URL=http://localhost:1234
+
+# Feature flags
+# Set to "true" to fetch latest models from APIs and use latest web_search tool
+# Set to "false" or leave unset for static model lists and web_search_preview (legacy)
+FETCH_LATEST=true
--- a/cli/api_keys.py
+++ b/cli/api_keys.py
@ -0,0 +1,100 @@
+"""API key and endpoint validation for LLM providers."""
+
+import os
+from typing import Optional, Tuple
+import httpx
+
+
+# Map cloud providers to their required environment variables
+PROVIDER_API_KEYS = {
+    "openai": "OPENAI_API_KEY",
+    "anthropic": "ANTHROPIC_API_KEY",
+    "google": "GEMINI_API_KEY",
+    "openrouter": "OPENROUTER_API_KEY",
+}
+
+# Default endpoints for local providers
+LOCAL_PROVIDER_DEFAULTS = {
+    "ollama": ("OLLAMA_URL", "http://localhost:11434"),
+    "lm studio": ("LM_STUDIO_URL", "http://localhost:1234"),
+}
+
+
+def get_api_key(provider: str) -> Optional[str]:
+    """Get API key for a cloud provider, returns None if not set."""
+    provider_lower = provider.lower()
+
+    # Special case: OpenRouter can use OPENROUTER_API_KEY or OPENAI_API_KEY with sk-or- prefix
+    if provider_lower == "openrouter":
+        openrouter_key = os.getenv("OPENROUTER_API_KEY")
+        if openrouter_key:
+            return openrouter_key
+        # Check if OPENAI_API_KEY is actually an OpenRouter key
+        openai_key = os.getenv("OPENAI_API_KEY", "")
+        if openai_key.startswith("sk-or-"):
+            return openai_key
+        return None
+
+    env_var = PROVIDER_API_KEYS.get(provider_lower)
+    if env_var is None:
+        return None
+    return os.getenv(env_var)
+
+
+def get_local_endpoint(provider: str) -> Optional[str]:
+    """Get the endpoint URL for a local provider."""
+    provider_lower = provider.lower()
+    if provider_lower not in LOCAL_PROVIDER_DEFAULTS:
+        return None
+
+    env_var, default_url = LOCAL_PROVIDER_DEFAULTS[provider_lower]
+    return os.getenv(env_var, default_url)
+
+
+def is_local_provider_running(provider: str) -> bool:
+    """Check if a local provider (Ollama/LM Studio) is running by probing its endpoint."""
+    endpoint = get_local_endpoint(provider)
+    if not endpoint:
+        return False
+
+    try:
+        # Probe the models endpoint with a short timeout
+        response = httpx.get(
+            f"{endpoint}/v1/models",
+            timeout=1.0
+        )
+        return response.status_code == 200
+    except (httpx.RequestError, httpx.TimeoutException):
+        return False
+
+
+def is_provider_available(provider: str) -> Tuple[bool, str]:
+    """
+    Check if a provider is available.
+
+    Returns:
+        Tuple of (is_available, reason_if_unavailable)
+    """
+    provider_lower = provider.lower()
+
+    # Local providers: check if endpoint is reachable
+    if provider_lower in LOCAL_PROVIDER_DEFAULTS:
+        if is_local_provider_running(provider):
+            return (True, "")
+        return (False, "Not running")
+
+    # Cloud providers: check for API key
+    if get_api_key(provider) is not None:
+        return (True, "")
+    return (False, "No API key")
+
+
+def get_all_provider_availability() -> dict:
+    """
+    Get availability status for all providers.
+
+    Returns:
+        Dict mapping provider name to (is_available, reason) tuple
+    """
+    all_providers = list(PROVIDER_API_KEYS.keys()) + list(LOCAL_PROVIDER_DEFAULTS.keys())
+    return {provider: is_provider_available(provider) for provider in all_providers}
--- a/cli/main.py
+++ b/cli/main.py
@ -475,13 +475,20 @@ def get_user_selections():
    )
    selected_llm_provider, backend_url = select_llm_provider()
    
-    # Step 6: Thinking agents
+    # Step 6: Quick-Thinking LLM Engine
    console.print(
        create_question_box(
-            "Step 6: Thinking Agents", "Select your thinking agents for analysis"
+            "Step 6: Quick-Thinking LLM Engine", "Select your quick-thinking model for fast operations"
        )
    )
    selected_shallow_thinker = select_shallow_thinking_agent(selected_llm_provider)
+
+    # Step 7: Deep-Thinking LLM Engine
+    console.print(
+        create_question_box(
+            "Step 7: Deep-Thinking LLM Engine", "Select your deep-thinking model for complex reasoning"
+        )
+    )
    selected_deep_thinker = select_deep_thinking_agent(selected_llm_provider)

    return {
--- a/cli/model_fetcher.py
+++ b/cli/model_fetcher.py
@ -0,0 +1,225 @@
+"""Dynamic model fetching from LLM provider APIs with caching."""
+
+import os
+from typing import List, Tuple, Optional
+import httpx
+
+# Cache for fetched models (provider -> list of models)
+_model_cache: dict = {}
+
+# Maximum number of models to display (None = no limit, show all)
+MAX_MODELS = None
+
+
+def is_fetch_latest() -> bool:
+    """Check if FETCH_LATEST is enabled in environment.
+
+    When enabled, fetches models dynamically from provider APIs.
+    When disabled, falls back to static hardcoded model lists.
+    """
+    return os.getenv("FETCH_LATEST", "false").lower() in ("true", "1", "yes")
+
+
+def fetch_openai_models() -> Optional[List[Tuple[str, str]]]:
+    """
+    Fetch available models from OpenAI API, sorted by creation date (newest first).
+
+    Returns:
+        List of (display_name, model_id) tuples, or None on failure
+    """
+    if "openai" in _model_cache:
+        return _model_cache["openai"]
+
+    api_key = os.getenv("OPENAI_API_KEY")
+    if not api_key or api_key.startswith("sk-or-"):
+        return None
+
+    try:
+        response = httpx.get(
+            "https://api.openai.com/v1/models",
+            headers={"Authorization": f"Bearer {api_key}"},
+            timeout=10.0
+        )
+        response.raise_for_status()
+        models_data = response.json().get("data", [])
+
+        # Filter to chat/reasoning models and keep metadata for sorting
+        chat_models = []
+        for model in models_data:
+            model_id = model.get("id", "")
+            created = model.get("created", 0)
+
+            # Include GPT models and reasoning models (o-series)
+            if (model_id.startswith("gpt-") or
+                model_id.startswith("o1") or
+                model_id.startswith("o3") or
+                model_id.startswith("o4") or
+                model_id.startswith("o5") or
+                model_id.startswith("gpt-5")):
+                # Skip snapshot/dated versions to keep list clean
+                if "-20" not in model_id and "-preview" not in model_id.lower():
+                    chat_models.append((model_id, created))
+
+        # Remove duplicates (keep highest created timestamp for each model_id)
+        model_dict = {}
+        for model_id, created in chat_models:
+            if model_id not in model_dict or created > model_dict[model_id]:
+                model_dict[model_id] = created
+
+        # Sort by created timestamp (newest first) and limit
+        sorted_models = sorted(model_dict.items(), key=lambda x: -x[1])[:MAX_MODELS]
+        result = [(model_id, model_id) for model_id, _ in sorted_models]
+
+        _model_cache["openai"] = result
+        return result
+    except Exception:
+        return None
+
+
+def fetch_anthropic_models() -> Optional[List[Tuple[str, str]]]:
+    """
+    Fetch available models from Anthropic API, sorted by creation date (newest first).
+
+    Returns:
+        List of (display_name, model_id) tuples, or None on failure
+    """
+    if "anthropic" in _model_cache:
+        return _model_cache["anthropic"]
+
+    api_key = os.getenv("ANTHROPIC_API_KEY")
+    if not api_key:
+        return None
+
+    try:
+        response = httpx.get(
+            "https://api.anthropic.com/v1/models",
+            headers={
+                "x-api-key": api_key,
+                "anthropic-version": "2023-06-01"
+            },
+            timeout=10.0
+        )
+        response.raise_for_status()
+        models_data = response.json().get("data", [])
+
+        # Filter to Claude models and keep metadata for sorting
+        claude_models = []
+        for model in models_data:
+            model_id = model.get("id", "")
+            # Anthropic API returns created_at as ISO string (RFC 3339)
+            created_at = model.get("created_at", "")
+            display_name = model.get("display_name", "")
+
+            if model_id.startswith("claude-"):
+                # Skip dated versions (e.g., claude-3-sonnet-20240229)
+                if "-20" not in model_id:
+                    # Use display_name if available, otherwise model_id
+                    label = display_name if display_name else model_id
+                    claude_models.append((model_id, label, created_at))
+
+        # Remove duplicates (keep latest for each model_id)
+        model_dict = {}
+        for model_id, label, created_at in claude_models:
+            if model_id not in model_dict or created_at > model_dict[model_id][1]:
+                model_dict[model_id] = (label, created_at)
+
+        # Sort by created_at (newest first) and limit
+        sorted_models = sorted(model_dict.items(), key=lambda x: x[1][1], reverse=True)[:MAX_MODELS]
+        result = [(label, model_id) for model_id, (label, _) in sorted_models]
+
+        _model_cache["anthropic"] = result
+        return result
+    except Exception:
+        return None
+
+
+def fetch_google_models() -> Optional[List[Tuple[str, str]]]:
+    """
+    Fetch available models from Google Generative AI API.
+    Uses displayName for user-friendly labels, sorted as returned by API (typically newest first).
+
+    Returns:
+        List of (display_name, model_id) tuples, or None on failure
+    """
+    if "google" in _model_cache:
+        return _model_cache["google"]
+
+    api_key = os.getenv("GEMINI_API_KEY")
+    if not api_key:
+        return None
+
+    try:
+        response = httpx.get(
+            f"https://generativelanguage.googleapis.com/v1/models?key={api_key}",
+            timeout=10.0
+        )
+        response.raise_for_status()
+        models_data = response.json().get("models", [])
+
+        # Filter to Gemini models that support generateContent
+        gemini_models = []
+        for model in models_data:
+            model_name = model.get("name", "")
+            display_name = model.get("displayName", "")
+            supported_methods = model.get("supportedGenerationMethods", [])
+
+            # Extract model ID from "models/gemini-..." format
+            if model_name.startswith("models/"):
+                model_id = model_name.replace("models/", "")
+            else:
+                model_id = model_name
+
+            # Only include Gemini models that support content generation
+            if model_id.startswith("gemini") and "generateContent" in supported_methods:
+                # Use displayName if available, otherwise model_id
+                label = display_name if display_name else model_id
+                gemini_models.append((label, model_id))
+
+        # API returns in a reasonable order, just dedupe and limit
+        seen = set()
+        unique_models = []
+        for label, model_id in gemini_models:
+            if model_id not in seen:
+                seen.add(model_id)
+                unique_models.append((label, model_id))
+
+        result = unique_models[:MAX_MODELS]
+
+        _model_cache["google"] = result
+        return result
+    except Exception:
+        return None
+
+
+def fetch_models_for_provider(provider: str) -> Optional[List[Tuple[str, str]]]:
+    """
+    Fetch models for a given provider.
+
+    Only fetches dynamically if FETCH_LATEST is enabled. Otherwise returns None
+    to trigger fallback to static model lists.
+
+    Args:
+        provider: Provider name (openai, anthropic, google)
+
+    Returns:
+        List of (display_name, model_id) tuples, or None if not supported/failed
+    """
+    # Return None if FETCH_LATEST is not enabled - will use static lists
+    if not is_fetch_latest():
+        return None
+
+    provider_lower = provider.lower()
+
+    if provider_lower == "openai":
+        return fetch_openai_models()
+    elif provider_lower == "anthropic":
+        return fetch_anthropic_models()
+    elif provider_lower == "google":
+        return fetch_google_models()
+
+    return None
+
+
+def clear_cache():
+    """Clear the model cache."""
+    _model_cache.clear()
--- a/cli/utils.py
+++ b/cli/utils.py
@ -1,7 +1,12 @@
 import questionary
 from typing import List, Optional, Tuple, Dict
+from rich.console import Console

 from cli.models import AnalystType
+from cli.api_keys import is_provider_available
+from cli.model_fetcher import fetch_models_for_provider
+
+console = Console()

 ANALYST_ORDER = [
    ("Market Analyst", AnalystType.MARKET),
@ -125,7 +130,7 @@ def select_research_depth() -> int:
 def select_shallow_thinking_agent(provider) -> str:
    """Select shallow thinking llm engine using an interactive selection."""

-    # Define shallow thinking llm engine options with their corresponding model names
+    # Static fallback options for each provider
    SHALLOW_AGENT_OPTIONS = {
        "openai": [
            ("GPT-4o-mini - Fast and efficient for quick tasks", "gpt-4o-mini"),
@ -142,24 +147,43 @@ def select_shallow_thinking_agent(provider) -> str:
        "google": [
            ("Gemini 2.0 Flash-Lite - Cost efficiency and low latency", "gemini-2.0-flash-lite"),
            ("Gemini 2.0 Flash - Next generation features, speed, and thinking", "gemini-2.0-flash"),
-            ("Gemini 2.5 Flash - Adaptive thinking, cost efficiency", "gemini-2.5-flash-preview-05-20"),
+            ("Gemini 2.5 Flash-Lite - Lightweight and cost efficient", "gemini-2.5-flash-lite"),
+            ("Gemini 2.5 Flash - Adaptive thinking, cost efficiency", "gemini-2.5-flash"),
+            ("Gemini 3 Flash Preview - Latest generation flash model", "gemini-3-flash-preview"),
        ],
        "openrouter": [
+            ("Xiaomi MiMo V2 Flash - Fast and efficient multimodal model", "xiaomi/mimo-v2-flash:free"),
            ("Meta: Llama 4 Scout", "meta-llama/llama-4-scout:free"),
            ("Meta: Llama 3.3 8B Instruct - A lightweight and ultra-fast variant of Llama 3.3 70B", "meta-llama/llama-3.3-8b-instruct:free"),
            ("google/gemini-2.0-flash-exp:free - Gemini Flash 2.0 offers a significantly faster time to first token", "google/gemini-2.0-flash-exp:free"),
        ],
        "ollama": [
-            ("llama3.1 local", "llama3.1"),
-            ("llama3.2 local", "llama3.2"),
+            ("llama3.2:3b local", "llama3.2:3b"),
+            ("phi3.5 local", "phi3.5:latest"),
+        ],
+        "lm studio": [
+            ("Local Model (default)", "local-model"),
        ]
    }

+    provider_lower = provider.lower()
+
+    # Try dynamic fetch for supported providers (OpenAI, Anthropic, Google)
+    model_options = None
+    if provider_lower in ["openai", "anthropic", "google"]:
+        dynamic_models = fetch_models_for_provider(provider_lower)
+        if dynamic_models:
+            model_options = dynamic_models
+
+    # Fall back to static list if dynamic fetch failed or not supported
+    if model_options is None:
+        model_options = SHALLOW_AGENT_OPTIONS.get(provider_lower, [])
+
    choice = questionary.select(
        "Select Your [Quick-Thinking LLM Engine]:",
        choices=[
            questionary.Choice(display, value=value)
-            for display, value in SHALLOW_AGENT_OPTIONS[provider.lower()]
+            for display, value in model_options
        ],
        instruction="\n- Use arrow keys to navigate\n- Press Enter to select",
        style=questionary.Style(
@ -183,7 +207,7 @@ def select_shallow_thinking_agent(provider) -> str:
 def select_deep_thinking_agent(provider) -> str:
    """Select deep thinking llm engine using an interactive selection."""

-    # Define deep thinking llm engine options with their corresponding model names
+    # Static fallback options for each provider
    DEEP_AGENT_OPTIONS = {
        "openai": [
            ("GPT-4.1-nano - Ultra-lightweight model for basic operations", "gpt-4.1-nano"),
@ -199,29 +223,47 @@ def select_deep_thinking_agent(provider) -> str:
            ("Claude Sonnet 3.5 - Highly capable standard model", "claude-3-5-sonnet-latest"),
            ("Claude Sonnet 3.7 - Exceptional hybrid reasoning and agentic capabilities", "claude-3-7-sonnet-latest"),
            ("Claude Sonnet 4 - High performance and excellent reasoning", "claude-sonnet-4-0"),
-            ("Claude Opus 4 - Most powerful Anthropic model", "	claude-opus-4-0"),
+            ("Claude Opus 4 - Most powerful Anthropic model", "claude-opus-4-0"),
        ],
        "google": [
            ("Gemini 2.0 Flash-Lite - Cost efficiency and low latency", "gemini-2.0-flash-lite"),
            ("Gemini 2.0 Flash - Next generation features, speed, and thinking", "gemini-2.0-flash"),
-            ("Gemini 2.5 Flash - Adaptive thinking, cost efficiency", "gemini-2.5-flash-preview-05-20"),
-            ("Gemini 2.5 Pro", "gemini-2.5-pro-preview-06-05"),
+            ("Gemini 2.5 Flash-Lite - Lightweight and cost efficient", "gemini-2.5-flash-lite"),
+            ("Gemini 2.5 Flash - Adaptive thinking, cost efficiency", "gemini-2.5-flash"),
+            ("Gemini 3 Flash Preview - Latest generation flash model", "gemini-3-flash-preview"),
        ],
        "openrouter": [
+            ("Xiaomi MiMo V2 Flash - Fast and efficient multimodal model", "xiaomi/mimo-v2-flash:free"),
            ("DeepSeek V3 - a 685B-parameter, mixture-of-experts model", "deepseek/deepseek-chat-v3-0324:free"),
            ("Deepseek - latest iteration of the flagship chat model family from the DeepSeek team.", "deepseek/deepseek-chat-v3-0324:free"),
        ],
        "ollama": [
-            ("llama3.1 local", "llama3.1"),
-            ("qwen3", "qwen3"),
+            ("llama3.2:3b local", "llama3.2:3b"),
+            ("phi3.5 local", "phi3.5:latest"),
+        ],
+        "lm studio": [
+            ("Local Model (default)", "local-model"),
        ]
    }
-    
+
+    provider_lower = provider.lower()
+
+    # Try dynamic fetch for supported providers (OpenAI, Anthropic, Google)
+    model_options = None
+    if provider_lower in ["openai", "anthropic", "google"]:
+        dynamic_models = fetch_models_for_provider(provider_lower)
+        if dynamic_models:
+            model_options = dynamic_models
+
+    # Fall back to static list if dynamic fetch failed or not supported
+    if model_options is None:
+        model_options = DEEP_AGENT_OPTIONS.get(provider_lower, [])
+
    choice = questionary.select(
        "Select Your [Deep-Thinking LLM Engine]:",
        choices=[
            questionary.Choice(display, value=value)
-            for display, value in DEEP_AGENT_OPTIONS[provider.lower()]
+            for display, value in model_options
        ],
        instruction="\n- Use arrow keys to navigate\n- Press Enter to select",
        style=questionary.Style(
@ -240,22 +282,35 @@ def select_deep_thinking_agent(provider) -> str:
    return choice

 def select_llm_provider() -> tuple[str, str]:
-    """Select the OpenAI api url using interactive selection."""
-    # Define OpenAI api options with their corresponding endpoints
+    """Select the LLM provider using interactive selection with availability checks."""
+    # Define provider options with their corresponding endpoints
    BASE_URLS = [
        ("OpenAI", "https://api.openai.com/v1"),
        ("Anthropic", "https://api.anthropic.com/"),
        ("Google", "https://generativelanguage.googleapis.com/v1"),
        ("Openrouter", "https://openrouter.ai/api/v1"),
-        ("Ollama", "http://localhost:11434/v1"),        
+        ("Ollama", "http://localhost:11434/v1"),
+        ("LM Studio", "http://localhost:1234/v1"),
    ]
-    
+
+    # Build choices with availability status
+    choices = []
+    for display, url in BASE_URLS:
+        available, reason = is_provider_available(display)
+        if available:
+            choices.append(questionary.Choice(display, value=(display, url)))
+        else:
+            # Show disabled option with reason
+            disabled_label = f"{display} ({reason})"
+            choices.append(questionary.Choice(
+                disabled_label,
+                value=(display, url),
+                disabled=reason
+            ))
+
    choice = questionary.select(
        "Select your LLM Provider:",
-        choices=[
-            questionary.Choice(display, value=(display, value))
-            for display, value in BASE_URLS
-        ],
+        choices=choices,
        instruction="\n- Use arrow keys to navigate\n- Press Enter to select",
        style=questionary.Style(
            [
@ -265,12 +320,12 @@ def select_llm_provider() -> tuple[str, str]:
            ]
        ),
    ).ask()
-    
+
    if choice is None:
-        console.print("\n[red]no OpenAI backend selected. Exiting...[/red]")
+        console.print("\n[red]No LLM provider selected. Exiting...[/red]")
        exit(1)
-    
+
    display_name, url = choice
    print(f"You selected: {display_name}\tURL: {url}")
-    
+
    return display_name, url
--- a/requirements.txt
+++ b/requirements.txt
@ -24,3 +24,5 @@ rich
 questionary
 langchain_anthropic
 langchain-google-genai
+playwright
+markdown2