feat: add llama.cpp local LLM support via .env configuration

Add 'llamacpp' as a new provider for running TradingAgents fully
offline with a local llama-server (llama.cpp).

Changes:
- factory.py: register 'llamacpp' provider alongside openai/ollama
- validators.py: accept any model name for llamacpp (like ollama)
- openai_client.py: llamacpp branch sets base_url from env/config,
  uses placeholder api_key so no auth error is raised
- default_config.py: load .env via python-dotenv (optional dep);
  LLM_PROVIDER, BACKEND_URL, DEEP_THINK_LLM, QUICK_THINK_LLM are
  all overridable via environment variables
- .env.example: document llamacpp setup alongside cloud providers
- .gitignore: ensure .env is ignored, .env.example is tracked

Fully backward-compatible: OpenAI remains the default when no
.env is present. Also works for LM Studio, vLLM, or any other
OpenAI-compatible local server via BACKEND_URL + LLM_PROVIDER=openai.

Tested with: llama.cpp llama-server + Qwen3.5-35B-A3B-Q3_K_M
This commit is contained in:
OpenClaw Assistant 2026-03-21 10:26:48 +01:00
parent f362a160c3
commit 3e509bfa32
6 changed files with 34 additions and 7 deletions

View File

@ -1,6 +1,22 @@
# LLM Providers (set the one you use)
# ─── Cloud Providers ──────────────────────────────────────────────────────────
OPENAI_API_KEY=
GOOGLE_API_KEY=
ANTHROPIC_API_KEY=
XAI_API_KEY=
OPENROUTER_API_KEY=
# ─── Local LLM via llama.cpp ──────────────────────────────────────────────────
# 1. Start llama-server:
# llama-server --model ~/models/my-model.gguf --port 8081 --host 0.0.0.0
# 2. Copy this file to .env and uncomment + fill in the values below
# 3. Find your model ID with: curl http://localhost:8081/v1/models
# LLM_PROVIDER=llamacpp
# BACKEND_URL=http://localhost:8081/v1
# DEEP_THINK_LLM=your-model-name-here
# QUICK_THINK_LLM=your-model-name-here
# ─── Any OpenAI-compatible local server (LM Studio, vLLM, etc.) ───────────────
# LLM_PROVIDER=openai
# BACKEND_URL=http://localhost:1234/v1
# OPENAI_API_KEY=dummy

1
.gitignore vendored
View File

@ -217,3 +217,4 @@ __marimo__/
# Cache
**/data_cache/
!.env.example

View File

@ -1,5 +1,11 @@
import os
try:
from dotenv import load_dotenv
load_dotenv()
except ImportError:
pass # python-dotenv optional — falls back to environment variables
DEFAULT_CONFIG = {
"project_dir": os.path.abspath(os.path.join(os.path.dirname(__file__), ".")),
"results_dir": os.getenv("TRADINGAGENTS_RESULTS_DIR", "./results"),
@ -8,10 +14,10 @@ DEFAULT_CONFIG = {
"dataflows/data_cache",
),
# LLM settings
"llm_provider": "openai",
"deep_think_llm": "gpt-5.2",
"quick_think_llm": "gpt-5-mini",
"backend_url": "https://api.openai.com/v1",
"llm_provider": os.environ.get("LLM_PROVIDER", "openai"),
"deep_think_llm": os.environ.get("DEEP_THINK_LLM", "gpt-5.2"),
"quick_think_llm": os.environ.get("QUICK_THINK_LLM", "gpt-5-mini"),
"backend_url": os.environ.get("BACKEND_URL", os.environ.get("LLAMACPP_BASE_URL", "https://api.openai.com/v1")),
# Provider-specific thinking configuration
"google_thinking_level": None, # "high", "minimal", etc.
"openai_reasoning_effort": None, # "medium", "high", "low"

View File

@ -34,7 +34,7 @@ def create_llm_client(
"""
provider_lower = provider.lower()
if provider_lower in ("openai", "ollama", "openrouter"):
if provider_lower in ("openai", "ollama", "openrouter", "llamacpp"):
return OpenAIClient(model, base_url, provider=provider_lower, **kwargs)
if provider_lower == "xai":

View File

@ -56,6 +56,10 @@ class OpenAIClient(BaseLLMClient):
elif self.provider == "ollama":
llm_kwargs["base_url"] = "http://localhost:11434/v1"
llm_kwargs["api_key"] = "ollama" # Ollama doesn't require auth
elif self.provider == "llamacpp":
base_url = self.base_url or os.environ.get("LLAMACPP_BASE_URL", "http://localhost:8080/v1")
llm_kwargs["base_url"] = base_url
llm_kwargs["api_key"] = "no-key-needed" # llama-server doesn't require auth
elif self.base_url:
llm_kwargs["base_url"] = self.base_url

View File

@ -58,7 +58,7 @@ def validate_model(provider: str, model: str) -> bool:
"""
provider_lower = provider.lower()
if provider_lower in ("ollama", "openrouter"):
if provider_lower in ("ollama", "openrouter", "llamacpp"):
return True
if provider_lower not in VALID_MODELS: