adapt for llama server
This commit is contained in:
parent
fd8cc324e1
commit
9109516f7e
|
|
@ -11,7 +11,7 @@ DEFAULT_CONFIG = {
|
|||
"llm_provider": "openai",
|
||||
"deep_think_llm": "qwen-35b",
|
||||
"quick_think_llm": "qwen-35b",
|
||||
"backend_url": "https://ai.ghosthouse.uk/v1/chat/completions",
|
||||
"backend_url": "https://ai.ghosthouse.uk/v1",
|
||||
# Provider-specific thinking configuration
|
||||
"google_thinking_level": None, # "high", "minimal", etc.
|
||||
"openai_reasoning_effort": None, # "medium", "high", "low"
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ class OpenAIClient(BaseLLMClient):
|
|||
|
||||
# Native OpenAI: use Responses API for consistent behavior across
|
||||
# all model families. Third-party providers use Chat Completions.
|
||||
if self.provider == "openai":
|
||||
if self.provider == "openai" and not self.base_url:
|
||||
llm_kwargs["use_responses_api"] = True
|
||||
|
||||
return NormalizedChatOpenAI(**llm_kwargs)
|
||||
|
|
|
|||
Loading…
Reference in New Issue