544 lines
21 KiB
Python
544 lines
21 KiB
Python
import re
|
|
import unittest
|
|
from collections import deque
|
|
from pathlib import Path
|
|
from unittest.mock import patch
|
|
|
|
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
|
|
from langchain_core.prompts import ChatPromptTemplate
|
|
|
|
from tradingagents.llm_clients.codex_app_server import (
|
|
CodexAppServerAuthError,
|
|
CodexAppServerBinaryError,
|
|
CodexInvocationResult,
|
|
CodexStructuredOutputError,
|
|
)
|
|
from tradingagents.llm_clients.codex_message_codec import normalize_input_messages
|
|
from tradingagents.llm_clients.codex_binary import resolve_codex_binary
|
|
from tradingagents.llm_clients.codex_preflight import run_codex_preflight
|
|
from tradingagents.llm_clients.codex_schema import (
|
|
build_plain_response_schema,
|
|
build_tool_response_schema,
|
|
normalize_tools_for_codex,
|
|
)
|
|
from tradingagents.llm_clients.factory import create_llm_client
|
|
|
|
|
|
def lookup_price(ticker: str) -> str:
|
|
"""Return the latest price snapshot for a ticker."""
|
|
|
|
|
|
def lookup_volume(ticker: str) -> str:
|
|
"""Return the latest volume snapshot for a ticker."""
|
|
|
|
|
|
class FakeCodexSession:
|
|
def __init__(
|
|
self,
|
|
*,
|
|
codex_binary=None,
|
|
request_timeout=0,
|
|
workspace_dir="",
|
|
cleanup_threads=True,
|
|
responses=None,
|
|
account_payload=None,
|
|
models_payload=None,
|
|
):
|
|
self.codex_binary = codex_binary
|
|
self.request_timeout = request_timeout
|
|
self.workspace_dir = workspace_dir
|
|
self.cleanup_threads = cleanup_threads
|
|
self.responses = deque(responses or [])
|
|
self.account_payload = account_payload or {
|
|
"account": {"type": "chatgpt"},
|
|
"requiresOpenaiAuth": False,
|
|
}
|
|
self.models_payload = models_payload or {
|
|
"data": [{"id": "gpt-5.4", "model": "gpt-5.4"}]
|
|
}
|
|
self.started = 0
|
|
self.closed = 0
|
|
self.invocations = []
|
|
|
|
def start(self):
|
|
self.started += 1
|
|
|
|
def close(self):
|
|
self.closed += 1
|
|
|
|
def account_read(self):
|
|
return self.account_payload
|
|
|
|
def model_list(self, include_hidden=True):
|
|
return self.models_payload
|
|
|
|
def invoke(
|
|
self,
|
|
*,
|
|
prompt,
|
|
model,
|
|
output_schema,
|
|
reasoning_effort,
|
|
summary,
|
|
personality,
|
|
):
|
|
self.invocations.append(
|
|
{
|
|
"prompt": prompt,
|
|
"model": model,
|
|
"output_schema": output_schema,
|
|
"reasoning_effort": reasoning_effort,
|
|
"summary": summary,
|
|
"personality": personality,
|
|
}
|
|
)
|
|
if not self.responses:
|
|
raise AssertionError("No fake Codex responses left.")
|
|
return CodexInvocationResult(final_text=self.responses.popleft(), notifications=[])
|
|
|
|
|
|
class CodexProviderTests(unittest.TestCase):
|
|
def test_resolve_codex_binary_uses_windows_vscode_fallback(self):
|
|
fake_home = Path("C:/Users/tester")
|
|
candidate = fake_home / ".vscode/extensions/openai.chatgpt-1.0.0/bin/windows-x86_64/codex.exe"
|
|
|
|
with (
|
|
patch("tradingagents.llm_clients.codex_binary.os.name", "nt"),
|
|
patch("tradingagents.llm_clients.codex_binary.Path.home", return_value=fake_home),
|
|
patch("tradingagents.llm_clients.codex_binary.shutil.which", return_value=None),
|
|
patch(
|
|
"tradingagents.llm_clients.codex_binary.Path.glob",
|
|
return_value=[candidate],
|
|
),
|
|
patch("pathlib.Path.is_file", return_value=True),
|
|
patch("pathlib.Path.exists", return_value=True),
|
|
patch("pathlib.Path.stat") as mocked_stat,
|
|
):
|
|
mocked_stat.return_value.st_mtime = 1
|
|
resolved = resolve_codex_binary(None)
|
|
|
|
self.assertEqual(resolved, str(candidate))
|
|
|
|
def test_resolve_codex_binary_skips_unusable_path_alias_on_windows(self):
|
|
fake_home = Path("C:/Users/tester")
|
|
alias_path = "C:/Program Files/WindowsApps/OpenAI.Codex/app/resources/codex.exe"
|
|
candidate = fake_home / ".vscode/extensions/openai.chatgpt-1.0.0/bin/windows-x86_64/codex.exe"
|
|
|
|
with (
|
|
patch("tradingagents.llm_clients.codex_binary.os.name", "nt"),
|
|
patch("tradingagents.llm_clients.codex_binary.Path.home", return_value=fake_home),
|
|
patch("tradingagents.llm_clients.codex_binary.shutil.which", return_value=alias_path),
|
|
patch(
|
|
"tradingagents.llm_clients.codex_binary.Path.glob",
|
|
return_value=[candidate],
|
|
),
|
|
patch("pathlib.Path.is_file", return_value=True),
|
|
patch("pathlib.Path.exists", return_value=True),
|
|
patch("pathlib.Path.stat") as mocked_stat,
|
|
patch(
|
|
"tradingagents.llm_clients.codex_binary._is_usable_codex_binary",
|
|
side_effect=lambda path: path != alias_path,
|
|
),
|
|
):
|
|
mocked_stat.return_value.st_mtime = 1
|
|
resolved = resolve_codex_binary(None)
|
|
|
|
self.assertEqual(resolved, str(candidate))
|
|
|
|
def test_resolve_codex_binary_uses_env_override(self):
|
|
with (
|
|
patch("tradingagents.llm_clients.codex_binary.os.name", "nt"),
|
|
patch("tradingagents.llm_clients.codex_binary.shutil.which", return_value=None),
|
|
patch.dict("os.environ", {"CODEX_BINARY": "C:/custom/codex.exe"}, clear=False),
|
|
patch("pathlib.Path.is_file", return_value=True),
|
|
patch(
|
|
"tradingagents.llm_clients.codex_binary._is_usable_codex_binary",
|
|
return_value=True,
|
|
),
|
|
):
|
|
resolved = resolve_codex_binary(None)
|
|
|
|
self.assertEqual(Path(resolved), Path("C:/custom/codex.exe"))
|
|
|
|
def test_resolve_codex_binary_checks_explicit_binary_usability(self):
|
|
with (
|
|
patch("tradingagents.llm_clients.codex_binary.os.name", "nt"),
|
|
patch("pathlib.Path.is_file", return_value=True),
|
|
patch(
|
|
"tradingagents.llm_clients.codex_binary._is_usable_codex_binary",
|
|
return_value=False,
|
|
),
|
|
):
|
|
resolved = resolve_codex_binary("C:/custom/codex.exe")
|
|
|
|
self.assertEqual(Path(resolved), Path("C:/custom/codex.exe"))
|
|
|
|
def test_message_normalization_supports_str_messages_and_openai_dicts(self):
|
|
normalized = normalize_input_messages(
|
|
[
|
|
{"role": "system", "content": "system"},
|
|
{"role": "user", "content": "user"},
|
|
{
|
|
"role": "assistant",
|
|
"content": "",
|
|
"tool_calls": [
|
|
{
|
|
"id": "call_123",
|
|
"type": "function",
|
|
"function": {
|
|
"name": "lookup_price",
|
|
"arguments": '{"ticker":"NVDA"}',
|
|
},
|
|
}
|
|
],
|
|
},
|
|
{"role": "tool", "tool_call_id": "call_123", "content": "42"},
|
|
]
|
|
)
|
|
|
|
self.assertIsInstance(normalized[0], SystemMessage)
|
|
self.assertIsInstance(normalized[1], HumanMessage)
|
|
self.assertIsInstance(normalized[2], AIMessage)
|
|
self.assertEqual(normalized[2].tool_calls[0]["name"], "lookup_price")
|
|
self.assertEqual(normalized[2].tool_calls[0]["args"], {"ticker": "NVDA"})
|
|
self.assertIsInstance(normalized[3], ToolMessage)
|
|
|
|
def test_output_schema_construction_builds_exact_tool_branches(self):
|
|
tool_schemas = normalize_tools_for_codex([lookup_price])
|
|
schema = build_tool_response_schema(tool_schemas)
|
|
required_schema = build_tool_response_schema(tool_schemas, allow_final=False)
|
|
plain_schema = build_plain_response_schema()
|
|
|
|
self.assertEqual(plain_schema["required"], ["answer"])
|
|
self.assertEqual(schema["properties"]["mode"]["enum"], ["final", "tool_calls"])
|
|
tool_branch = schema["properties"]["tool_calls"]["items"]
|
|
self.assertEqual(tool_branch["properties"]["name"]["const"], "lookup_price")
|
|
self.assertIn("arguments", tool_branch["required"])
|
|
self.assertEqual(required_schema["properties"]["mode"]["const"], "tool_calls")
|
|
|
|
generic_schema = build_tool_response_schema(
|
|
normalize_tools_for_codex([lookup_price, lookup_volume])
|
|
)
|
|
generic_items = generic_schema["properties"]["tool_calls"]["items"]
|
|
self.assertEqual(generic_items["properties"]["name"]["type"], "string")
|
|
self.assertIn("enum", generic_items["properties"]["name"])
|
|
self.assertEqual(generic_items["properties"]["arguments_json"]["type"], "string")
|
|
|
|
def test_plain_final_response_parsing(self):
|
|
session = FakeCodexSession(
|
|
responses=['{"answer":"Final decision"}'],
|
|
)
|
|
llm = create_llm_client(
|
|
"codex",
|
|
"gpt-5.4",
|
|
codex_binary="C:/fake/codex",
|
|
codex_workspace_dir="C:/tmp/codex-workspace",
|
|
session_factory=lambda **kwargs: session,
|
|
preflight_runner=lambda **kwargs: None,
|
|
).get_llm()
|
|
|
|
result = llm.invoke("Give me the final answer.")
|
|
|
|
self.assertEqual(result.content, "Final decision")
|
|
self.assertEqual(session.started, 1)
|
|
|
|
def test_invoke_accepts_openai_style_message_dicts(self):
|
|
session = FakeCodexSession(
|
|
responses=['{"answer":"From dict transcript"}'],
|
|
)
|
|
llm = create_llm_client(
|
|
"codex",
|
|
"gpt-5.4",
|
|
codex_binary="C:/fake/codex",
|
|
codex_workspace_dir="C:/tmp/codex-workspace",
|
|
session_factory=lambda **kwargs: session,
|
|
preflight_runner=lambda **kwargs: None,
|
|
).get_llm()
|
|
|
|
result = llm.invoke(
|
|
[
|
|
{"role": "system", "content": "system"},
|
|
{"role": "user", "content": "user"},
|
|
]
|
|
)
|
|
|
|
self.assertEqual(result.content, "From dict transcript")
|
|
self.assertIn("[System]\nsystem", session.invocations[0]["prompt"])
|
|
self.assertIn("[Human]\nuser", session.invocations[0]["prompt"])
|
|
|
|
def test_invoke_accepts_langchain_message_sequences(self):
|
|
session = FakeCodexSession(
|
|
responses=['{"answer":"From BaseMessage transcript"}'],
|
|
)
|
|
llm = create_llm_client(
|
|
"codex",
|
|
"gpt-5.4",
|
|
codex_binary="C:/fake/codex",
|
|
codex_workspace_dir="C:/tmp/codex-workspace",
|
|
session_factory=lambda **kwargs: session,
|
|
preflight_runner=lambda **kwargs: None,
|
|
).get_llm()
|
|
|
|
result = llm.invoke(
|
|
[
|
|
SystemMessage(content="system"),
|
|
HumanMessage(content="user"),
|
|
]
|
|
)
|
|
|
|
self.assertEqual(result.content, "From BaseMessage transcript")
|
|
self.assertIn("[System]\nsystem", session.invocations[0]["prompt"])
|
|
self.assertIn("[Human]\nuser", session.invocations[0]["prompt"])
|
|
|
|
def test_tool_call_response_parsing_populates_ai_message_tool_calls(self):
|
|
session = FakeCodexSession(
|
|
responses=[
|
|
'{"mode":"tool_calls","content":"Need data first","tool_calls":[{"name":"lookup_price","arguments":{"ticker":"NVDA"}}]}'
|
|
],
|
|
)
|
|
llm = create_llm_client(
|
|
"codex",
|
|
"gpt-5.4",
|
|
codex_binary="C:/fake/codex",
|
|
codex_workspace_dir="C:/tmp/codex-workspace",
|
|
session_factory=lambda **kwargs: session,
|
|
preflight_runner=lambda **kwargs: None,
|
|
).get_llm()
|
|
|
|
prompt = ChatPromptTemplate.from_messages(
|
|
[("system", "Use tools if needed."), ("human", "Analyze NVDA")]
|
|
)
|
|
result = (prompt | llm.bind_tools([lookup_price])).invoke({})
|
|
|
|
self.assertEqual(result.content, "Need data first")
|
|
self.assertEqual(result.tool_calls[0]["name"], "lookup_price")
|
|
self.assertEqual(result.tool_calls[0]["args"], {"ticker": "NVDA"})
|
|
self.assertRegex(result.tool_calls[0]["id"], r"^call_[0-9a-f]{32}$")
|
|
|
|
def test_multi_tool_response_parses_arguments_json(self):
|
|
session = FakeCodexSession(
|
|
responses=[
|
|
'{"mode":"tool_calls","content":"","tool_calls":[{"name":"lookup_price","arguments_json":"{\\"ticker\\":\\"NVDA\\"}"}]}'
|
|
],
|
|
)
|
|
llm = create_llm_client(
|
|
"codex",
|
|
"gpt-5.4",
|
|
codex_binary="C:/fake/codex",
|
|
codex_workspace_dir="C:/tmp/codex-workspace",
|
|
session_factory=lambda **kwargs: session,
|
|
preflight_runner=lambda **kwargs: None,
|
|
).get_llm()
|
|
|
|
result = llm.bind_tools([lookup_price, lookup_volume]).invoke("Analyze NVDA")
|
|
|
|
self.assertEqual(result.tool_calls[0]["name"], "lookup_price")
|
|
self.assertEqual(result.tool_calls[0]["args"], {"ticker": "NVDA"})
|
|
|
|
def test_bind_tools_honors_required_and_named_tool_choice(self):
|
|
required_session = FakeCodexSession(
|
|
responses=[
|
|
'{"mode":"tool_calls","content":"Calling tool","tool_calls":[{"name":"lookup_price","arguments":{"ticker":"NVDA"}}]}'
|
|
],
|
|
)
|
|
required_llm = create_llm_client(
|
|
"codex",
|
|
"gpt-5.4",
|
|
codex_binary="C:/fake/codex",
|
|
codex_workspace_dir="C:/tmp/codex-workspace",
|
|
session_factory=lambda **kwargs: required_session,
|
|
preflight_runner=lambda **kwargs: None,
|
|
).get_llm()
|
|
|
|
required_result = required_llm.bind_tools([lookup_price], tool_choice="required").invoke(
|
|
"Analyze NVDA"
|
|
)
|
|
self.assertTrue(required_result.tool_calls)
|
|
self.assertEqual(
|
|
required_session.invocations[0]["output_schema"]["properties"]["mode"]["const"],
|
|
"tool_calls",
|
|
)
|
|
self.assertIn(
|
|
"must respond with one or more tool calls",
|
|
required_session.invocations[0]["prompt"].lower(),
|
|
)
|
|
|
|
named_session = FakeCodexSession(
|
|
responses=[
|
|
'{"mode":"tool_calls","content":"Calling named tool","tool_calls":[{"name":"lookup_price","arguments":{"ticker":"MSFT"}}]}'
|
|
],
|
|
)
|
|
named_llm = create_llm_client(
|
|
"codex",
|
|
"gpt-5.4",
|
|
codex_binary="C:/fake/codex",
|
|
codex_workspace_dir="C:/tmp/codex-workspace",
|
|
session_factory=lambda **kwargs: named_session,
|
|
preflight_runner=lambda **kwargs: None,
|
|
).get_llm()
|
|
|
|
named_result = named_llm.bind_tools(
|
|
[lookup_price],
|
|
tool_choice={"type": "function", "function": {"name": "lookup_price"}},
|
|
).invoke("Analyze MSFT")
|
|
self.assertEqual(named_result.tool_calls[0]["name"], "lookup_price")
|
|
tool_item = named_session.invocations[0]["output_schema"]["properties"]["tool_calls"]["items"]
|
|
self.assertEqual(tool_item["properties"]["name"]["const"], "lookup_price")
|
|
self.assertIn(
|
|
"must call the tool named `lookup_price`",
|
|
named_session.invocations[0]["prompt"].lower(),
|
|
)
|
|
|
|
def test_malformed_json_retries_and_surfaces_error_when_exhausted(self):
|
|
session = FakeCodexSession(
|
|
responses=["not json", '{"answer":"Recovered"}'],
|
|
)
|
|
llm = create_llm_client(
|
|
"codex",
|
|
"gpt-5.4",
|
|
codex_binary="C:/fake/codex",
|
|
codex_workspace_dir="C:/tmp/codex-workspace",
|
|
codex_max_retries=1,
|
|
session_factory=lambda **kwargs: session,
|
|
preflight_runner=lambda **kwargs: None,
|
|
).get_llm()
|
|
|
|
result = llm.invoke("Recover after malformed JSON.")
|
|
self.assertEqual(result.content, "Recovered")
|
|
self.assertEqual(len(session.invocations), 2)
|
|
self.assertIn(
|
|
"previous response did not satisfy tradingagents validation",
|
|
session.invocations[1]["prompt"].lower(),
|
|
)
|
|
|
|
failing_session = FakeCodexSession(
|
|
responses=["still bad", "still bad again"],
|
|
)
|
|
failing_llm = create_llm_client(
|
|
"codex",
|
|
"gpt-5.4",
|
|
codex_binary="C:/fake/codex",
|
|
codex_workspace_dir="C:/tmp/codex-workspace",
|
|
codex_max_retries=1,
|
|
session_factory=lambda **kwargs: failing_session,
|
|
preflight_runner=lambda **kwargs: None,
|
|
).get_llm()
|
|
|
|
with self.assertRaises(CodexStructuredOutputError):
|
|
failing_llm.invoke("This should fail.")
|
|
|
|
def test_runtime_errors_do_not_retry_as_json_failures(self):
|
|
class FailingSession(FakeCodexSession):
|
|
def invoke(self, **kwargs):
|
|
raise RuntimeError("transport exploded")
|
|
|
|
session = FailingSession()
|
|
llm = create_llm_client(
|
|
"codex",
|
|
"gpt-5.4",
|
|
codex_binary="C:/fake/codex",
|
|
codex_workspace_dir="C:/tmp/codex-workspace",
|
|
codex_max_retries=2,
|
|
session_factory=lambda **kwargs: session,
|
|
preflight_runner=lambda **kwargs: None,
|
|
).get_llm()
|
|
|
|
with self.assertRaisesRegex(RuntimeError, "transport exploded"):
|
|
llm.invoke("fail fast")
|
|
|
|
def test_provider_codex_smoke_covers_bind_tools_and_direct_invoke_paths(self):
|
|
session = FakeCodexSession(
|
|
responses=[
|
|
'{"mode":"tool_calls","content":"Fetching market data","tool_calls":[{"name":"lookup_price","arguments":{"ticker":"NVDA"}}]}',
|
|
'{"answer":"Rating: Buy\\nExecutive Summary: Add gradually."}',
|
|
],
|
|
)
|
|
llm = create_llm_client(
|
|
"codex",
|
|
"gpt-5.4",
|
|
codex_binary="C:/fake/codex",
|
|
codex_workspace_dir="C:/tmp/codex-workspace",
|
|
session_factory=lambda **kwargs: session,
|
|
preflight_runner=lambda **kwargs: None,
|
|
).get_llm()
|
|
|
|
analyst_prompt = ChatPromptTemplate.from_messages(
|
|
[("system", "Use tools when you need extra data."), ("human", "Analyze NVDA.")]
|
|
)
|
|
market_result = (analyst_prompt | llm.bind_tools([lookup_price])).invoke({})
|
|
self.assertTrue(market_result.tool_calls)
|
|
self.assertEqual(market_result.tool_calls[0]["name"], "lookup_price")
|
|
|
|
decision = llm.invoke("Produce the final trade decision.")
|
|
self.assertIn("Rating: Buy", decision.content)
|
|
self.assertEqual(len(session.invocations), 2)
|
|
|
|
def test_preflight_detects_missing_auth_and_missing_binary(self):
|
|
valid_factory = lambda **kwargs: FakeCodexSession(
|
|
account_payload={
|
|
"account": {"type": "chatgpt", "email": "user@example.com"},
|
|
"requiresOpenaiAuth": True,
|
|
}
|
|
)
|
|
result = run_codex_preflight(
|
|
codex_binary="C:\\fake\\codex.exe",
|
|
model="gpt-5.4",
|
|
request_timeout=10.0,
|
|
workspace_dir="C:/tmp/codex-workspace",
|
|
cleanup_threads=True,
|
|
session_factory=valid_factory,
|
|
)
|
|
self.assertEqual(result.account["type"], "chatgpt")
|
|
|
|
authless_factory = lambda **kwargs: FakeCodexSession(
|
|
account_payload={"account": None, "requiresOpenaiAuth": True}
|
|
)
|
|
with self.assertRaises(CodexAppServerAuthError):
|
|
run_codex_preflight(
|
|
codex_binary="C:\\fake\\codex.exe",
|
|
model="gpt-5.4",
|
|
request_timeout=10.0,
|
|
workspace_dir="C:/tmp/codex-workspace",
|
|
cleanup_threads=True,
|
|
session_factory=authless_factory,
|
|
)
|
|
|
|
with patch(
|
|
"tradingagents.llm_clients.codex_preflight.resolve_codex_binary",
|
|
return_value=None,
|
|
):
|
|
with self.assertRaises(CodexAppServerBinaryError):
|
|
run_codex_preflight(
|
|
codex_binary="definitely-missing-codex-binary",
|
|
model="gpt-5.4",
|
|
request_timeout=10.0,
|
|
workspace_dir="C:/tmp/codex-workspace",
|
|
cleanup_threads=True,
|
|
)
|
|
|
|
def test_preflight_uses_resolved_binary_path(self):
|
|
captured = {}
|
|
|
|
def factory(**kwargs):
|
|
captured["codex_binary"] = kwargs["codex_binary"]
|
|
return FakeCodexSession(**kwargs)
|
|
|
|
with patch(
|
|
"tradingagents.llm_clients.codex_preflight.resolve_codex_binary",
|
|
return_value="C:/resolved/codex.exe",
|
|
):
|
|
run_codex_preflight(
|
|
codex_binary=None,
|
|
model="gpt-5.4",
|
|
request_timeout=10.0,
|
|
workspace_dir="C:/tmp/codex-workspace",
|
|
cleanup_threads=True,
|
|
session_factory=factory,
|
|
)
|
|
|
|
self.assertEqual(captured["codex_binary"], "C:/resolved/codex.exe")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|