import re import unittest from collections import deque from pathlib import Path from unittest.mock import patch from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage from langchain_core.prompts import ChatPromptTemplate from tradingagents.llm_clients.codex_app_server import ( CodexAppServerAuthError, CodexAppServerBinaryError, CodexInvocationResult, CodexStructuredOutputError, ) from tradingagents.llm_clients.codex_message_codec import normalize_input_messages from tradingagents.llm_clients.codex_binary import resolve_codex_binary from tradingagents.llm_clients.codex_preflight import run_codex_preflight from tradingagents.llm_clients.codex_schema import ( build_plain_response_schema, build_tool_response_schema, normalize_tools_for_codex, ) from tradingagents.llm_clients.factory import create_llm_client def lookup_price(ticker: str) -> str: """Return the latest price snapshot for a ticker.""" def lookup_volume(ticker: str) -> str: """Return the latest volume snapshot for a ticker.""" class FakeCodexSession: def __init__( self, *, codex_binary=None, request_timeout=0, workspace_dir="", cleanup_threads=True, responses=None, account_payload=None, models_payload=None, ): self.codex_binary = codex_binary self.request_timeout = request_timeout self.workspace_dir = workspace_dir self.cleanup_threads = cleanup_threads self.responses = deque(responses or []) self.account_payload = account_payload or { "account": {"type": "chatgpt"}, "requiresOpenaiAuth": False, } self.models_payload = models_payload or { "data": [{"id": "gpt-5.4", "model": "gpt-5.4"}] } self.started = 0 self.closed = 0 self.invocations = [] def start(self): self.started += 1 def close(self): self.closed += 1 def account_read(self): return self.account_payload def model_list(self, include_hidden=True): return self.models_payload def invoke( self, *, prompt, model, output_schema, reasoning_effort, summary, personality, ): self.invocations.append( { "prompt": prompt, "model": model, "output_schema": output_schema, "reasoning_effort": reasoning_effort, "summary": summary, "personality": personality, } ) if not self.responses: raise AssertionError("No fake Codex responses left.") return CodexInvocationResult(final_text=self.responses.popleft(), notifications=[]) class CodexProviderTests(unittest.TestCase): def test_resolve_codex_binary_uses_windows_vscode_fallback(self): fake_home = Path("C:/Users/tester") candidate = fake_home / ".vscode/extensions/openai.chatgpt-1.0.0/bin/windows-x86_64/codex.exe" with ( patch("tradingagents.llm_clients.codex_binary.os.name", "nt"), patch("tradingagents.llm_clients.codex_binary.Path.home", return_value=fake_home), patch("tradingagents.llm_clients.codex_binary.shutil.which", return_value=None), patch( "tradingagents.llm_clients.codex_binary.Path.glob", return_value=[candidate], ), patch("pathlib.Path.is_file", return_value=True), patch("pathlib.Path.exists", return_value=True), patch("pathlib.Path.stat") as mocked_stat, ): mocked_stat.return_value.st_mtime = 1 resolved = resolve_codex_binary(None) self.assertEqual(resolved, str(candidate)) def test_resolve_codex_binary_skips_unusable_path_alias_on_windows(self): fake_home = Path("C:/Users/tester") alias_path = "C:/Program Files/WindowsApps/OpenAI.Codex/app/resources/codex.exe" candidate = fake_home / ".vscode/extensions/openai.chatgpt-1.0.0/bin/windows-x86_64/codex.exe" with ( patch("tradingagents.llm_clients.codex_binary.os.name", "nt"), patch("tradingagents.llm_clients.codex_binary.Path.home", return_value=fake_home), patch("tradingagents.llm_clients.codex_binary.shutil.which", return_value=alias_path), patch( "tradingagents.llm_clients.codex_binary.Path.glob", return_value=[candidate], ), patch("pathlib.Path.is_file", return_value=True), patch("pathlib.Path.exists", return_value=True), patch("pathlib.Path.stat") as mocked_stat, patch( "tradingagents.llm_clients.codex_binary._is_usable_codex_binary", side_effect=lambda path: path != alias_path, ), ): mocked_stat.return_value.st_mtime = 1 resolved = resolve_codex_binary(None) self.assertEqual(resolved, str(candidate)) def test_resolve_codex_binary_uses_env_override(self): with ( patch("tradingagents.llm_clients.codex_binary.os.name", "nt"), patch("tradingagents.llm_clients.codex_binary.shutil.which", return_value=None), patch.dict("os.environ", {"CODEX_BINARY": "C:/custom/codex.exe"}, clear=False), patch("pathlib.Path.is_file", return_value=True), patch( "tradingagents.llm_clients.codex_binary._is_usable_codex_binary", return_value=True, ), ): resolved = resolve_codex_binary(None) self.assertEqual(Path(resolved), Path("C:/custom/codex.exe")) def test_resolve_codex_binary_checks_explicit_binary_usability(self): with ( patch("tradingagents.llm_clients.codex_binary.os.name", "nt"), patch("pathlib.Path.is_file", return_value=True), patch( "tradingagents.llm_clients.codex_binary._is_usable_codex_binary", return_value=False, ), ): resolved = resolve_codex_binary("C:/custom/codex.exe") self.assertEqual(Path(resolved), Path("C:/custom/codex.exe")) def test_message_normalization_supports_str_messages_and_openai_dicts(self): normalized = normalize_input_messages( [ {"role": "system", "content": "system"}, {"role": "user", "content": "user"}, { "role": "assistant", "content": "", "tool_calls": [ { "id": "call_123", "type": "function", "function": { "name": "lookup_price", "arguments": '{"ticker":"NVDA"}', }, } ], }, {"role": "tool", "tool_call_id": "call_123", "content": "42"}, ] ) self.assertIsInstance(normalized[0], SystemMessage) self.assertIsInstance(normalized[1], HumanMessage) self.assertIsInstance(normalized[2], AIMessage) self.assertEqual(normalized[2].tool_calls[0]["name"], "lookup_price") self.assertEqual(normalized[2].tool_calls[0]["args"], {"ticker": "NVDA"}) self.assertIsInstance(normalized[3], ToolMessage) def test_output_schema_construction_builds_exact_tool_branches(self): tool_schemas = normalize_tools_for_codex([lookup_price]) schema = build_tool_response_schema(tool_schemas) required_schema = build_tool_response_schema(tool_schemas, allow_final=False) plain_schema = build_plain_response_schema() self.assertEqual(plain_schema["required"], ["answer"]) self.assertEqual(schema["properties"]["mode"]["enum"], ["final", "tool_calls"]) tool_branch = schema["properties"]["tool_calls"]["items"] self.assertEqual(tool_branch["properties"]["name"]["const"], "lookup_price") self.assertIn("arguments", tool_branch["required"]) self.assertEqual(required_schema["properties"]["mode"]["const"], "tool_calls") generic_schema = build_tool_response_schema( normalize_tools_for_codex([lookup_price, lookup_volume]) ) generic_items = generic_schema["properties"]["tool_calls"]["items"] self.assertEqual(generic_items["properties"]["name"]["type"], "string") self.assertIn("enum", generic_items["properties"]["name"]) self.assertEqual(generic_items["properties"]["arguments_json"]["type"], "string") def test_plain_final_response_parsing(self): session = FakeCodexSession( responses=['{"answer":"Final decision"}'], ) llm = create_llm_client( "codex", "gpt-5.4", codex_binary="C:/fake/codex", codex_workspace_dir="C:/tmp/codex-workspace", session_factory=lambda **kwargs: session, preflight_runner=lambda **kwargs: None, ).get_llm() result = llm.invoke("Give me the final answer.") self.assertEqual(result.content, "Final decision") self.assertEqual(session.started, 1) def test_invoke_accepts_openai_style_message_dicts(self): session = FakeCodexSession( responses=['{"answer":"From dict transcript"}'], ) llm = create_llm_client( "codex", "gpt-5.4", codex_binary="C:/fake/codex", codex_workspace_dir="C:/tmp/codex-workspace", session_factory=lambda **kwargs: session, preflight_runner=lambda **kwargs: None, ).get_llm() result = llm.invoke( [ {"role": "system", "content": "system"}, {"role": "user", "content": "user"}, ] ) self.assertEqual(result.content, "From dict transcript") self.assertIn("[System]\nsystem", session.invocations[0]["prompt"]) self.assertIn("[Human]\nuser", session.invocations[0]["prompt"]) def test_invoke_accepts_langchain_message_sequences(self): session = FakeCodexSession( responses=['{"answer":"From BaseMessage transcript"}'], ) llm = create_llm_client( "codex", "gpt-5.4", codex_binary="C:/fake/codex", codex_workspace_dir="C:/tmp/codex-workspace", session_factory=lambda **kwargs: session, preflight_runner=lambda **kwargs: None, ).get_llm() result = llm.invoke( [ SystemMessage(content="system"), HumanMessage(content="user"), ] ) self.assertEqual(result.content, "From BaseMessage transcript") self.assertIn("[System]\nsystem", session.invocations[0]["prompt"]) self.assertIn("[Human]\nuser", session.invocations[0]["prompt"]) def test_tool_call_response_parsing_populates_ai_message_tool_calls(self): session = FakeCodexSession( responses=[ '{"mode":"tool_calls","content":"Need data first","tool_calls":[{"name":"lookup_price","arguments":{"ticker":"NVDA"}}]}' ], ) llm = create_llm_client( "codex", "gpt-5.4", codex_binary="C:/fake/codex", codex_workspace_dir="C:/tmp/codex-workspace", session_factory=lambda **kwargs: session, preflight_runner=lambda **kwargs: None, ).get_llm() prompt = ChatPromptTemplate.from_messages( [("system", "Use tools if needed."), ("human", "Analyze NVDA")] ) result = (prompt | llm.bind_tools([lookup_price])).invoke({}) self.assertEqual(result.content, "Need data first") self.assertEqual(result.tool_calls[0]["name"], "lookup_price") self.assertEqual(result.tool_calls[0]["args"], {"ticker": "NVDA"}) self.assertRegex(result.tool_calls[0]["id"], r"^call_[0-9a-f]{32}$") def test_multi_tool_response_parses_arguments_json(self): session = FakeCodexSession( responses=[ '{"mode":"tool_calls","content":"","tool_calls":[{"name":"lookup_price","arguments_json":"{\\"ticker\\":\\"NVDA\\"}"}]}' ], ) llm = create_llm_client( "codex", "gpt-5.4", codex_binary="C:/fake/codex", codex_workspace_dir="C:/tmp/codex-workspace", session_factory=lambda **kwargs: session, preflight_runner=lambda **kwargs: None, ).get_llm() result = llm.bind_tools([lookup_price, lookup_volume]).invoke("Analyze NVDA") self.assertEqual(result.tool_calls[0]["name"], "lookup_price") self.assertEqual(result.tool_calls[0]["args"], {"ticker": "NVDA"}) def test_bind_tools_honors_required_and_named_tool_choice(self): required_session = FakeCodexSession( responses=[ '{"mode":"tool_calls","content":"Calling tool","tool_calls":[{"name":"lookup_price","arguments":{"ticker":"NVDA"}}]}' ], ) required_llm = create_llm_client( "codex", "gpt-5.4", codex_binary="C:/fake/codex", codex_workspace_dir="C:/tmp/codex-workspace", session_factory=lambda **kwargs: required_session, preflight_runner=lambda **kwargs: None, ).get_llm() required_result = required_llm.bind_tools([lookup_price], tool_choice="required").invoke( "Analyze NVDA" ) self.assertTrue(required_result.tool_calls) self.assertEqual( required_session.invocations[0]["output_schema"]["properties"]["mode"]["const"], "tool_calls", ) self.assertIn( "must respond with one or more tool calls", required_session.invocations[0]["prompt"].lower(), ) named_session = FakeCodexSession( responses=[ '{"mode":"tool_calls","content":"Calling named tool","tool_calls":[{"name":"lookup_price","arguments":{"ticker":"MSFT"}}]}' ], ) named_llm = create_llm_client( "codex", "gpt-5.4", codex_binary="C:/fake/codex", codex_workspace_dir="C:/tmp/codex-workspace", session_factory=lambda **kwargs: named_session, preflight_runner=lambda **kwargs: None, ).get_llm() named_result = named_llm.bind_tools( [lookup_price], tool_choice={"type": "function", "function": {"name": "lookup_price"}}, ).invoke("Analyze MSFT") self.assertEqual(named_result.tool_calls[0]["name"], "lookup_price") tool_item = named_session.invocations[0]["output_schema"]["properties"]["tool_calls"]["items"] self.assertEqual(tool_item["properties"]["name"]["const"], "lookup_price") self.assertIn( "must call the tool named `lookup_price`", named_session.invocations[0]["prompt"].lower(), ) def test_malformed_json_retries_and_surfaces_error_when_exhausted(self): session = FakeCodexSession( responses=["not json", '{"answer":"Recovered"}'], ) llm = create_llm_client( "codex", "gpt-5.4", codex_binary="C:/fake/codex", codex_workspace_dir="C:/tmp/codex-workspace", codex_max_retries=1, session_factory=lambda **kwargs: session, preflight_runner=lambda **kwargs: None, ).get_llm() result = llm.invoke("Recover after malformed JSON.") self.assertEqual(result.content, "Recovered") self.assertEqual(len(session.invocations), 2) self.assertIn( "previous response did not satisfy tradingagents validation", session.invocations[1]["prompt"].lower(), ) failing_session = FakeCodexSession( responses=["still bad", "still bad again"], ) failing_llm = create_llm_client( "codex", "gpt-5.4", codex_binary="C:/fake/codex", codex_workspace_dir="C:/tmp/codex-workspace", codex_max_retries=1, session_factory=lambda **kwargs: failing_session, preflight_runner=lambda **kwargs: None, ).get_llm() with self.assertRaises(CodexStructuredOutputError): failing_llm.invoke("This should fail.") def test_runtime_errors_do_not_retry_as_json_failures(self): class FailingSession(FakeCodexSession): def invoke(self, **kwargs): raise RuntimeError("transport exploded") session = FailingSession() llm = create_llm_client( "codex", "gpt-5.4", codex_binary="C:/fake/codex", codex_workspace_dir="C:/tmp/codex-workspace", codex_max_retries=2, session_factory=lambda **kwargs: session, preflight_runner=lambda **kwargs: None, ).get_llm() with self.assertRaisesRegex(RuntimeError, "transport exploded"): llm.invoke("fail fast") def test_provider_codex_smoke_covers_bind_tools_and_direct_invoke_paths(self): session = FakeCodexSession( responses=[ '{"mode":"tool_calls","content":"Fetching market data","tool_calls":[{"name":"lookup_price","arguments":{"ticker":"NVDA"}}]}', '{"answer":"Rating: Buy\\nExecutive Summary: Add gradually."}', ], ) llm = create_llm_client( "codex", "gpt-5.4", codex_binary="C:/fake/codex", codex_workspace_dir="C:/tmp/codex-workspace", session_factory=lambda **kwargs: session, preflight_runner=lambda **kwargs: None, ).get_llm() analyst_prompt = ChatPromptTemplate.from_messages( [("system", "Use tools when you need extra data."), ("human", "Analyze NVDA.")] ) market_result = (analyst_prompt | llm.bind_tools([lookup_price])).invoke({}) self.assertTrue(market_result.tool_calls) self.assertEqual(market_result.tool_calls[0]["name"], "lookup_price") decision = llm.invoke("Produce the final trade decision.") self.assertIn("Rating: Buy", decision.content) self.assertEqual(len(session.invocations), 2) def test_preflight_detects_missing_auth_and_missing_binary(self): valid_factory = lambda **kwargs: FakeCodexSession( account_payload={ "account": {"type": "chatgpt", "email": "user@example.com"}, "requiresOpenaiAuth": True, } ) result = run_codex_preflight( codex_binary="C:\\fake\\codex.exe", model="gpt-5.4", request_timeout=10.0, workspace_dir="C:/tmp/codex-workspace", cleanup_threads=True, session_factory=valid_factory, ) self.assertEqual(result.account["type"], "chatgpt") authless_factory = lambda **kwargs: FakeCodexSession( account_payload={"account": None, "requiresOpenaiAuth": True} ) with self.assertRaises(CodexAppServerAuthError): run_codex_preflight( codex_binary="C:\\fake\\codex.exe", model="gpt-5.4", request_timeout=10.0, workspace_dir="C:/tmp/codex-workspace", cleanup_threads=True, session_factory=authless_factory, ) with patch( "tradingagents.llm_clients.codex_preflight.resolve_codex_binary", return_value=None, ): with self.assertRaises(CodexAppServerBinaryError): run_codex_preflight( codex_binary="definitely-missing-codex-binary", model="gpt-5.4", request_timeout=10.0, workspace_dir="C:/tmp/codex-workspace", cleanup_threads=True, ) def test_preflight_uses_resolved_binary_path(self): captured = {} def factory(**kwargs): captured["codex_binary"] = kwargs["codex_binary"] return FakeCodexSession(**kwargs) with patch( "tradingagents.llm_clients.codex_preflight.resolve_codex_binary", return_value="C:/resolved/codex.exe", ): run_codex_preflight( codex_binary=None, model="gpt-5.4", request_timeout=10.0, workspace_dir="C:/tmp/codex-workspace", cleanup_threads=True, session_factory=factory, ) self.assertEqual(captured["codex_binary"], "C:/resolved/codex.exe") if __name__ == "__main__": unittest.main()