adapt for llama server

This commit is contained in:
Ashish Jain 2026-04-12 15:57:48 -04:00
parent fd8cc324e1
commit 9109516f7e
2 changed files with 2 additions and 2 deletions

View File

@ -11,7 +11,7 @@ DEFAULT_CONFIG = {
"llm_provider": "openai",
"deep_think_llm": "qwen-35b",
"quick_think_llm": "qwen-35b",
"backend_url": "https://ai.ghosthouse.uk/v1/chat/completions",
"backend_url": "https://ai.ghosthouse.uk/v1",
# Provider-specific thinking configuration
"google_thinking_level": None, # "high", "minimal", etc.
"openai_reasoning_effort": None, # "medium", "high", "low"

View File

@ -76,7 +76,7 @@ class OpenAIClient(BaseLLMClient):
# Native OpenAI: use Responses API for consistent behavior across
# all model families. Third-party providers use Chat Completions.
if self.provider == "openai":
if self.provider == "openai" and not self.base_url:
llm_kwargs["use_responses_api"] = True
return NormalizedChatOpenAI(**llm_kwargs)