TradingAgents/tests/test_codex_provider.py

544 lines
21 KiB
Python

import re
import unittest
from collections import deque
from pathlib import Path
from unittest.mock import patch
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
from langchain_core.prompts import ChatPromptTemplate
from tradingagents.llm_clients.codex_app_server import (
CodexAppServerAuthError,
CodexAppServerBinaryError,
CodexInvocationResult,
CodexStructuredOutputError,
)
from tradingagents.llm_clients.codex_message_codec import normalize_input_messages
from tradingagents.llm_clients.codex_binary import resolve_codex_binary
from tradingagents.llm_clients.codex_preflight import run_codex_preflight
from tradingagents.llm_clients.codex_schema import (
build_plain_response_schema,
build_tool_response_schema,
normalize_tools_for_codex,
)
from tradingagents.llm_clients.factory import create_llm_client
def lookup_price(ticker: str) -> str:
"""Return the latest price snapshot for a ticker."""
def lookup_volume(ticker: str) -> str:
"""Return the latest volume snapshot for a ticker."""
class FakeCodexSession:
def __init__(
self,
*,
codex_binary=None,
request_timeout=0,
workspace_dir="",
cleanup_threads=True,
responses=None,
account_payload=None,
models_payload=None,
):
self.codex_binary = codex_binary
self.request_timeout = request_timeout
self.workspace_dir = workspace_dir
self.cleanup_threads = cleanup_threads
self.responses = deque(responses or [])
self.account_payload = account_payload or {
"account": {"type": "chatgpt"},
"requiresOpenaiAuth": False,
}
self.models_payload = models_payload or {
"data": [{"id": "gpt-5.4", "model": "gpt-5.4"}]
}
self.started = 0
self.closed = 0
self.invocations = []
def start(self):
self.started += 1
def close(self):
self.closed += 1
def account_read(self):
return self.account_payload
def model_list(self, include_hidden=True):
return self.models_payload
def invoke(
self,
*,
prompt,
model,
output_schema,
reasoning_effort,
summary,
personality,
):
self.invocations.append(
{
"prompt": prompt,
"model": model,
"output_schema": output_schema,
"reasoning_effort": reasoning_effort,
"summary": summary,
"personality": personality,
}
)
if not self.responses:
raise AssertionError("No fake Codex responses left.")
return CodexInvocationResult(final_text=self.responses.popleft(), notifications=[])
class CodexProviderTests(unittest.TestCase):
def test_resolve_codex_binary_uses_windows_vscode_fallback(self):
fake_home = Path("C:/Users/tester")
candidate = fake_home / ".vscode/extensions/openai.chatgpt-1.0.0/bin/windows-x86_64/codex.exe"
with (
patch("tradingagents.llm_clients.codex_binary.os.name", "nt"),
patch("tradingagents.llm_clients.codex_binary.Path.home", return_value=fake_home),
patch("tradingagents.llm_clients.codex_binary.shutil.which", return_value=None),
patch(
"tradingagents.llm_clients.codex_binary.Path.glob",
return_value=[candidate],
),
patch("pathlib.Path.is_file", return_value=True),
patch("pathlib.Path.exists", return_value=True),
patch("pathlib.Path.stat") as mocked_stat,
):
mocked_stat.return_value.st_mtime = 1
resolved = resolve_codex_binary(None)
self.assertEqual(resolved, str(candidate))
def test_resolve_codex_binary_skips_unusable_path_alias_on_windows(self):
fake_home = Path("C:/Users/tester")
alias_path = "C:/Program Files/WindowsApps/OpenAI.Codex/app/resources/codex.exe"
candidate = fake_home / ".vscode/extensions/openai.chatgpt-1.0.0/bin/windows-x86_64/codex.exe"
with (
patch("tradingagents.llm_clients.codex_binary.os.name", "nt"),
patch("tradingagents.llm_clients.codex_binary.Path.home", return_value=fake_home),
patch("tradingagents.llm_clients.codex_binary.shutil.which", return_value=alias_path),
patch(
"tradingagents.llm_clients.codex_binary.Path.glob",
return_value=[candidate],
),
patch("pathlib.Path.is_file", return_value=True),
patch("pathlib.Path.exists", return_value=True),
patch("pathlib.Path.stat") as mocked_stat,
patch(
"tradingagents.llm_clients.codex_binary._is_usable_codex_binary",
side_effect=lambda path: path != alias_path,
),
):
mocked_stat.return_value.st_mtime = 1
resolved = resolve_codex_binary(None)
self.assertEqual(resolved, str(candidate))
def test_resolve_codex_binary_uses_env_override(self):
with (
patch("tradingagents.llm_clients.codex_binary.os.name", "nt"),
patch("tradingagents.llm_clients.codex_binary.shutil.which", return_value=None),
patch.dict("os.environ", {"CODEX_BINARY": "C:/custom/codex.exe"}, clear=False),
patch("pathlib.Path.is_file", return_value=True),
patch(
"tradingagents.llm_clients.codex_binary._is_usable_codex_binary",
return_value=True,
),
):
resolved = resolve_codex_binary(None)
self.assertEqual(Path(resolved), Path("C:/custom/codex.exe"))
def test_resolve_codex_binary_checks_explicit_binary_usability(self):
with (
patch("tradingagents.llm_clients.codex_binary.os.name", "nt"),
patch("pathlib.Path.is_file", return_value=True),
patch(
"tradingagents.llm_clients.codex_binary._is_usable_codex_binary",
return_value=False,
),
):
resolved = resolve_codex_binary("C:/custom/codex.exe")
self.assertEqual(Path(resolved), Path("C:/custom/codex.exe"))
def test_message_normalization_supports_str_messages_and_openai_dicts(self):
normalized = normalize_input_messages(
[
{"role": "system", "content": "system"},
{"role": "user", "content": "user"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call_123",
"type": "function",
"function": {
"name": "lookup_price",
"arguments": '{"ticker":"NVDA"}',
},
}
],
},
{"role": "tool", "tool_call_id": "call_123", "content": "42"},
]
)
self.assertIsInstance(normalized[0], SystemMessage)
self.assertIsInstance(normalized[1], HumanMessage)
self.assertIsInstance(normalized[2], AIMessage)
self.assertEqual(normalized[2].tool_calls[0]["name"], "lookup_price")
self.assertEqual(normalized[2].tool_calls[0]["args"], {"ticker": "NVDA"})
self.assertIsInstance(normalized[3], ToolMessage)
def test_output_schema_construction_builds_exact_tool_branches(self):
tool_schemas = normalize_tools_for_codex([lookup_price])
schema = build_tool_response_schema(tool_schemas)
required_schema = build_tool_response_schema(tool_schemas, allow_final=False)
plain_schema = build_plain_response_schema()
self.assertEqual(plain_schema["required"], ["answer"])
self.assertEqual(schema["properties"]["mode"]["enum"], ["final", "tool_calls"])
tool_branch = schema["properties"]["tool_calls"]["items"]
self.assertEqual(tool_branch["properties"]["name"]["const"], "lookup_price")
self.assertIn("arguments", tool_branch["required"])
self.assertEqual(required_schema["properties"]["mode"]["const"], "tool_calls")
generic_schema = build_tool_response_schema(
normalize_tools_for_codex([lookup_price, lookup_volume])
)
generic_items = generic_schema["properties"]["tool_calls"]["items"]
self.assertEqual(generic_items["properties"]["name"]["type"], "string")
self.assertIn("enum", generic_items["properties"]["name"])
self.assertEqual(generic_items["properties"]["arguments_json"]["type"], "string")
def test_plain_final_response_parsing(self):
session = FakeCodexSession(
responses=['{"answer":"Final decision"}'],
)
llm = create_llm_client(
"codex",
"gpt-5.4",
codex_binary="C:/fake/codex",
codex_workspace_dir="C:/tmp/codex-workspace",
session_factory=lambda **kwargs: session,
preflight_runner=lambda **kwargs: None,
).get_llm()
result = llm.invoke("Give me the final answer.")
self.assertEqual(result.content, "Final decision")
self.assertEqual(session.started, 1)
def test_invoke_accepts_openai_style_message_dicts(self):
session = FakeCodexSession(
responses=['{"answer":"From dict transcript"}'],
)
llm = create_llm_client(
"codex",
"gpt-5.4",
codex_binary="C:/fake/codex",
codex_workspace_dir="C:/tmp/codex-workspace",
session_factory=lambda **kwargs: session,
preflight_runner=lambda **kwargs: None,
).get_llm()
result = llm.invoke(
[
{"role": "system", "content": "system"},
{"role": "user", "content": "user"},
]
)
self.assertEqual(result.content, "From dict transcript")
self.assertIn("[System]\nsystem", session.invocations[0]["prompt"])
self.assertIn("[Human]\nuser", session.invocations[0]["prompt"])
def test_invoke_accepts_langchain_message_sequences(self):
session = FakeCodexSession(
responses=['{"answer":"From BaseMessage transcript"}'],
)
llm = create_llm_client(
"codex",
"gpt-5.4",
codex_binary="C:/fake/codex",
codex_workspace_dir="C:/tmp/codex-workspace",
session_factory=lambda **kwargs: session,
preflight_runner=lambda **kwargs: None,
).get_llm()
result = llm.invoke(
[
SystemMessage(content="system"),
HumanMessage(content="user"),
]
)
self.assertEqual(result.content, "From BaseMessage transcript")
self.assertIn("[System]\nsystem", session.invocations[0]["prompt"])
self.assertIn("[Human]\nuser", session.invocations[0]["prompt"])
def test_tool_call_response_parsing_populates_ai_message_tool_calls(self):
session = FakeCodexSession(
responses=[
'{"mode":"tool_calls","content":"Need data first","tool_calls":[{"name":"lookup_price","arguments":{"ticker":"NVDA"}}]}'
],
)
llm = create_llm_client(
"codex",
"gpt-5.4",
codex_binary="C:/fake/codex",
codex_workspace_dir="C:/tmp/codex-workspace",
session_factory=lambda **kwargs: session,
preflight_runner=lambda **kwargs: None,
).get_llm()
prompt = ChatPromptTemplate.from_messages(
[("system", "Use tools if needed."), ("human", "Analyze NVDA")]
)
result = (prompt | llm.bind_tools([lookup_price])).invoke({})
self.assertEqual(result.content, "Need data first")
self.assertEqual(result.tool_calls[0]["name"], "lookup_price")
self.assertEqual(result.tool_calls[0]["args"], {"ticker": "NVDA"})
self.assertRegex(result.tool_calls[0]["id"], r"^call_[0-9a-f]{32}$")
def test_multi_tool_response_parses_arguments_json(self):
session = FakeCodexSession(
responses=[
'{"mode":"tool_calls","content":"","tool_calls":[{"name":"lookup_price","arguments_json":"{\\"ticker\\":\\"NVDA\\"}"}]}'
],
)
llm = create_llm_client(
"codex",
"gpt-5.4",
codex_binary="C:/fake/codex",
codex_workspace_dir="C:/tmp/codex-workspace",
session_factory=lambda **kwargs: session,
preflight_runner=lambda **kwargs: None,
).get_llm()
result = llm.bind_tools([lookup_price, lookup_volume]).invoke("Analyze NVDA")
self.assertEqual(result.tool_calls[0]["name"], "lookup_price")
self.assertEqual(result.tool_calls[0]["args"], {"ticker": "NVDA"})
def test_bind_tools_honors_required_and_named_tool_choice(self):
required_session = FakeCodexSession(
responses=[
'{"mode":"tool_calls","content":"Calling tool","tool_calls":[{"name":"lookup_price","arguments":{"ticker":"NVDA"}}]}'
],
)
required_llm = create_llm_client(
"codex",
"gpt-5.4",
codex_binary="C:/fake/codex",
codex_workspace_dir="C:/tmp/codex-workspace",
session_factory=lambda **kwargs: required_session,
preflight_runner=lambda **kwargs: None,
).get_llm()
required_result = required_llm.bind_tools([lookup_price], tool_choice="required").invoke(
"Analyze NVDA"
)
self.assertTrue(required_result.tool_calls)
self.assertEqual(
required_session.invocations[0]["output_schema"]["properties"]["mode"]["const"],
"tool_calls",
)
self.assertIn(
"must respond with one or more tool calls",
required_session.invocations[0]["prompt"].lower(),
)
named_session = FakeCodexSession(
responses=[
'{"mode":"tool_calls","content":"Calling named tool","tool_calls":[{"name":"lookup_price","arguments":{"ticker":"MSFT"}}]}'
],
)
named_llm = create_llm_client(
"codex",
"gpt-5.4",
codex_binary="C:/fake/codex",
codex_workspace_dir="C:/tmp/codex-workspace",
session_factory=lambda **kwargs: named_session,
preflight_runner=lambda **kwargs: None,
).get_llm()
named_result = named_llm.bind_tools(
[lookup_price],
tool_choice={"type": "function", "function": {"name": "lookup_price"}},
).invoke("Analyze MSFT")
self.assertEqual(named_result.tool_calls[0]["name"], "lookup_price")
tool_item = named_session.invocations[0]["output_schema"]["properties"]["tool_calls"]["items"]
self.assertEqual(tool_item["properties"]["name"]["const"], "lookup_price")
self.assertIn(
"must call the tool named `lookup_price`",
named_session.invocations[0]["prompt"].lower(),
)
def test_malformed_json_retries_and_surfaces_error_when_exhausted(self):
session = FakeCodexSession(
responses=["not json", '{"answer":"Recovered"}'],
)
llm = create_llm_client(
"codex",
"gpt-5.4",
codex_binary="C:/fake/codex",
codex_workspace_dir="C:/tmp/codex-workspace",
codex_max_retries=1,
session_factory=lambda **kwargs: session,
preflight_runner=lambda **kwargs: None,
).get_llm()
result = llm.invoke("Recover after malformed JSON.")
self.assertEqual(result.content, "Recovered")
self.assertEqual(len(session.invocations), 2)
self.assertIn(
"previous response did not satisfy tradingagents validation",
session.invocations[1]["prompt"].lower(),
)
failing_session = FakeCodexSession(
responses=["still bad", "still bad again"],
)
failing_llm = create_llm_client(
"codex",
"gpt-5.4",
codex_binary="C:/fake/codex",
codex_workspace_dir="C:/tmp/codex-workspace",
codex_max_retries=1,
session_factory=lambda **kwargs: failing_session,
preflight_runner=lambda **kwargs: None,
).get_llm()
with self.assertRaises(CodexStructuredOutputError):
failing_llm.invoke("This should fail.")
def test_runtime_errors_do_not_retry_as_json_failures(self):
class FailingSession(FakeCodexSession):
def invoke(self, **kwargs):
raise RuntimeError("transport exploded")
session = FailingSession()
llm = create_llm_client(
"codex",
"gpt-5.4",
codex_binary="C:/fake/codex",
codex_workspace_dir="C:/tmp/codex-workspace",
codex_max_retries=2,
session_factory=lambda **kwargs: session,
preflight_runner=lambda **kwargs: None,
).get_llm()
with self.assertRaisesRegex(RuntimeError, "transport exploded"):
llm.invoke("fail fast")
def test_provider_codex_smoke_covers_bind_tools_and_direct_invoke_paths(self):
session = FakeCodexSession(
responses=[
'{"mode":"tool_calls","content":"Fetching market data","tool_calls":[{"name":"lookup_price","arguments":{"ticker":"NVDA"}}]}',
'{"answer":"Rating: Buy\\nExecutive Summary: Add gradually."}',
],
)
llm = create_llm_client(
"codex",
"gpt-5.4",
codex_binary="C:/fake/codex",
codex_workspace_dir="C:/tmp/codex-workspace",
session_factory=lambda **kwargs: session,
preflight_runner=lambda **kwargs: None,
).get_llm()
analyst_prompt = ChatPromptTemplate.from_messages(
[("system", "Use tools when you need extra data."), ("human", "Analyze NVDA.")]
)
market_result = (analyst_prompt | llm.bind_tools([lookup_price])).invoke({})
self.assertTrue(market_result.tool_calls)
self.assertEqual(market_result.tool_calls[0]["name"], "lookup_price")
decision = llm.invoke("Produce the final trade decision.")
self.assertIn("Rating: Buy", decision.content)
self.assertEqual(len(session.invocations), 2)
def test_preflight_detects_missing_auth_and_missing_binary(self):
valid_factory = lambda **kwargs: FakeCodexSession(
account_payload={
"account": {"type": "chatgpt", "email": "user@example.com"},
"requiresOpenaiAuth": True,
}
)
result = run_codex_preflight(
codex_binary="C:\\fake\\codex.exe",
model="gpt-5.4",
request_timeout=10.0,
workspace_dir="C:/tmp/codex-workspace",
cleanup_threads=True,
session_factory=valid_factory,
)
self.assertEqual(result.account["type"], "chatgpt")
authless_factory = lambda **kwargs: FakeCodexSession(
account_payload={"account": None, "requiresOpenaiAuth": True}
)
with self.assertRaises(CodexAppServerAuthError):
run_codex_preflight(
codex_binary="C:\\fake\\codex.exe",
model="gpt-5.4",
request_timeout=10.0,
workspace_dir="C:/tmp/codex-workspace",
cleanup_threads=True,
session_factory=authless_factory,
)
with patch(
"tradingagents.llm_clients.codex_preflight.resolve_codex_binary",
return_value=None,
):
with self.assertRaises(CodexAppServerBinaryError):
run_codex_preflight(
codex_binary="definitely-missing-codex-binary",
model="gpt-5.4",
request_timeout=10.0,
workspace_dir="C:/tmp/codex-workspace",
cleanup_threads=True,
)
def test_preflight_uses_resolved_binary_path(self):
captured = {}
def factory(**kwargs):
captured["codex_binary"] = kwargs["codex_binary"]
return FakeCodexSession(**kwargs)
with patch(
"tradingagents.llm_clients.codex_preflight.resolve_codex_binary",
return_value="C:/resolved/codex.exe",
):
run_codex_preflight(
codex_binary=None,
model="gpt-5.4",
request_timeout=10.0,
workspace_dir="C:/tmp/codex-workspace",
cleanup_threads=True,
session_factory=factory,
)
self.assertEqual(captured["codex_binary"], "C:/resolved/codex.exe")
if __name__ == "__main__":
unittest.main()