fix: use OpenAI Responses API for native models
Enable use_responses_api for native OpenAI provider, which supports reasoning_effort with function tools across all model families. Removes the UnifiedChatOpenAI subclass workaround. Closes #403
This commit is contained in:
parent
f362a160c3
commit
3ff28f3559
|
|
@ -6,28 +6,28 @@ from langchain_openai import ChatOpenAI
|
||||||
from .base_client import BaseLLMClient
|
from .base_client import BaseLLMClient
|
||||||
from .validators import validate_model
|
from .validators import validate_model
|
||||||
|
|
||||||
|
# Kwargs forwarded from user config to ChatOpenAI
|
||||||
|
_PASSTHROUGH_KWARGS = (
|
||||||
|
"timeout", "max_retries", "reasoning_effort",
|
||||||
|
"api_key", "callbacks", "http_client", "http_async_client",
|
||||||
|
)
|
||||||
|
|
||||||
class UnifiedChatOpenAI(ChatOpenAI):
|
# Provider base URLs and API key env vars
|
||||||
"""ChatOpenAI subclass that strips temperature/top_p for GPT-5 family models.
|
_PROVIDER_CONFIG = {
|
||||||
|
"xai": ("https://api.x.ai/v1", "XAI_API_KEY"),
|
||||||
GPT-5 family models use reasoning natively. temperature/top_p are only
|
"openrouter": ("https://openrouter.ai/api/v1", "OPENROUTER_API_KEY"),
|
||||||
accepted when reasoning.effort is 'none'; with any other effort level
|
"ollama": ("http://localhost:11434/v1", None),
|
||||||
(or for older GPT-5/GPT-5-mini/GPT-5-nano which always reason) the API
|
}
|
||||||
rejects these params. Langchain defaults temperature=0.7, so we must
|
|
||||||
strip it to avoid errors.
|
|
||||||
|
|
||||||
Non-GPT-5 models (GPT-4.1, xAI, Ollama, etc.) are unaffected.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
if "gpt-5" in kwargs.get("model", "").lower():
|
|
||||||
kwargs.pop("temperature", None)
|
|
||||||
kwargs.pop("top_p", None)
|
|
||||||
super().__init__(**kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
class OpenAIClient(BaseLLMClient):
|
class OpenAIClient(BaseLLMClient):
|
||||||
"""Client for OpenAI, Ollama, OpenRouter, and xAI providers."""
|
"""Client for OpenAI, Ollama, OpenRouter, and xAI providers.
|
||||||
|
|
||||||
|
For native OpenAI models, uses the Responses API (/v1/responses) which
|
||||||
|
supports reasoning_effort with function tools across all model families
|
||||||
|
(GPT-4.1, GPT-5). Third-party compatible providers (xAI, OpenRouter,
|
||||||
|
Ollama) use standard Chat Completions.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
|
@ -43,27 +43,30 @@ class OpenAIClient(BaseLLMClient):
|
||||||
"""Return configured ChatOpenAI instance."""
|
"""Return configured ChatOpenAI instance."""
|
||||||
llm_kwargs = {"model": self.model}
|
llm_kwargs = {"model": self.model}
|
||||||
|
|
||||||
if self.provider == "xai":
|
# Provider-specific base URL and auth
|
||||||
llm_kwargs["base_url"] = "https://api.x.ai/v1"
|
if self.provider in _PROVIDER_CONFIG:
|
||||||
api_key = os.environ.get("XAI_API_KEY")
|
base_url, api_key_env = _PROVIDER_CONFIG[self.provider]
|
||||||
|
llm_kwargs["base_url"] = base_url
|
||||||
|
if api_key_env:
|
||||||
|
api_key = os.environ.get(api_key_env)
|
||||||
if api_key:
|
if api_key:
|
||||||
llm_kwargs["api_key"] = api_key
|
llm_kwargs["api_key"] = api_key
|
||||||
elif self.provider == "openrouter":
|
else:
|
||||||
llm_kwargs["base_url"] = "https://openrouter.ai/api/v1"
|
llm_kwargs["api_key"] = "ollama"
|
||||||
api_key = os.environ.get("OPENROUTER_API_KEY")
|
|
||||||
if api_key:
|
|
||||||
llm_kwargs["api_key"] = api_key
|
|
||||||
elif self.provider == "ollama":
|
|
||||||
llm_kwargs["base_url"] = "http://localhost:11434/v1"
|
|
||||||
llm_kwargs["api_key"] = "ollama" # Ollama doesn't require auth
|
|
||||||
elif self.base_url:
|
elif self.base_url:
|
||||||
llm_kwargs["base_url"] = self.base_url
|
llm_kwargs["base_url"] = self.base_url
|
||||||
|
|
||||||
for key in ("timeout", "max_retries", "reasoning_effort", "api_key", "callbacks", "http_client", "http_async_client"):
|
# Forward user-provided kwargs
|
||||||
|
for key in _PASSTHROUGH_KWARGS:
|
||||||
if key in self.kwargs:
|
if key in self.kwargs:
|
||||||
llm_kwargs[key] = self.kwargs[key]
|
llm_kwargs[key] = self.kwargs[key]
|
||||||
|
|
||||||
return UnifiedChatOpenAI(**llm_kwargs)
|
# Native OpenAI: use Responses API for consistent behavior across
|
||||||
|
# all model families. Third-party providers use Chat Completions.
|
||||||
|
if self.provider == "openai":
|
||||||
|
llm_kwargs["use_responses_api"] = True
|
||||||
|
|
||||||
|
return ChatOpenAI(**llm_kwargs)
|
||||||
|
|
||||||
def validate_model(self) -> bool:
|
def validate_model(self) -> bool:
|
||||||
"""Validate model for the provider."""
|
"""Validate model for the provider."""
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue