fix: report LLM calls, tool calls, and token usage for claude_agent

ChatClaudeAgent is a plain Runnable rather than a BaseChatModel, so LangChain's callback system never fired on_chat_model_start / on_llm_end for it — leaving the CLI TUI stuck on "LLM: 0" and "Tokens: --" during runs. Pop callbacks out of the LLM kwargs, invoke them manually around each SDK call, and attach usage_metadata extracted from the SDK's ResultMessage (input, output, total — including cached input) to the returned AIMessage so downstream handlers pick it up. Tool callbacks now also fire through the MCP wrapper: forward the callback list into each wrapped LangChain tool's invocation config so StatsCallbackHandler sees on_tool_start/on_tool_end when the SDK loop calls a tool. Verified via direct StatsCallbackHandler round-trip on both Shape A (ChatClaudeAgent.invoke) and Shape B (run_sdk_analyst): llm_calls, tool_calls, tokens_in, and tokens_out all increment as expected. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 17:32:11 -04:00 · 2026-04-14 17:32:11 -04:00 · 3d8341c104
parent 39182785ce
commit 3d8341c104
3 changed files with 126 additions and 15 deletions
--- a/tradingagents/agents/analysts/_claude_agent_runner.py
+++ b/tradingagents/agents/analysts/_claude_agent_runner.py
@ -23,7 +23,11 @@ from typing import Any, Dict, List

 from langchain_core.messages import AIMessage, HumanMessage

-from tradingagents.llm_clients.claude_agent_client import ChatClaudeAgent
+from tradingagents.llm_clients.claude_agent_client import (
+    ChatClaudeAgent,
+    extract_usage,
+    fire_llm_callbacks,
+)
 from tradingagents.llm_clients.mcp_tool_adapter import build_mcp_server


@ -109,7 +113,8 @@ async def _run(
    lc_tools: List[Any],
    server_name: str,
    model: str,
-) -> str:
+    callbacks: List[Any],
+) -> tuple[str, Dict[str, int]]:
    from claude_agent_sdk import (
        AssistantMessage,
        ClaudeAgentOptions,
@ -119,7 +124,7 @@ async def _run(

    _log(f"[{server_name}] building MCP server with {len(lc_tools)} tools: "
         f"{[t.name for t in lc_tools]}")
-    server, allowed = build_mcp_server(server_name, lc_tools)
+    server, allowed = build_mcp_server(server_name, lc_tools, callbacks=callbacks)
    _log(f"[{server_name}] allowed_tools={allowed}")

    options = ClaudeAgentOptions(
@ -140,6 +145,7 @@ async def _run(
    start = time.monotonic()

    text_parts: List[str] = []
+    final_usage: Dict[str, int] = {}
    msg_count = 0
    async for msg in query(prompt=user_prompt, options=options):
        msg_count += 1
@ -149,11 +155,15 @@ async def _run(
            for block in msg.content:
                if isinstance(block, TextBlock):
                    text_parts.append(block.text)
+        sdk_usage = getattr(msg, "usage", None)
+        if isinstance(sdk_usage, dict) and sdk_usage:
+            final_usage = extract_usage(sdk_usage)

    elapsed = time.monotonic() - start
    _log(f"[{server_name}] query complete after {elapsed:.1f}s, "
-         f"{msg_count} messages, {sum(len(t) for t in text_parts)} chars")
-    return "\n".join(text_parts).strip()
+         f"{msg_count} messages, {sum(len(t) for t in text_parts)} chars, "
+         f"usage={final_usage}")
+    return "\n".join(text_parts).strip(), final_usage


 def run_sdk_analyst(
@ -169,20 +179,23 @@ def run_sdk_analyst(
    _log(f"=== run_sdk_analyst start: server={server_name} report_field={report_field} "
         f"ticker={state.get('company_of_interest')!r} date={state.get('trade_date')!r} ===")
    try:
-        report = asyncio.run(
+        report, usage = asyncio.run(
            _run(
                system_prompt=system_prompt,
                user_prompt=user_prompt,
                lc_tools=lc_tools,
                server_name=server_name,
                model=llm.model,
+                callbacks=llm.callbacks,
            )
        )
    except Exception as e:
        _log(f"[{server_name}] EXCEPTION: {type(e).__name__}: {e}")
        raise
-    _log(f"=== run_sdk_analyst done: {report_field}={len(report)} chars ===")
+    _log(f"=== run_sdk_analyst done: {report_field}={len(report)} chars usage={usage} ===")
+    message = AIMessage(content=report, usage_metadata=usage or None)
+    fire_llm_callbacks(llm.callbacks, message, user_prompt)
    return {
-        "messages": [AIMessage(content=report)],
+        "messages": [message],
        report_field: report,
    }
--- a/tradingagents/llm_clients/claude_agent_client.py
+++ b/tradingagents/llm_clients/claude_agent_client.py
@ -10,9 +10,11 @@ Shape B.
 """

 import asyncio
-from typing import Any, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
+from uuid import uuid4

 from langchain_core.messages import AIMessage, BaseMessage, SystemMessage
+from langchain_core.outputs import ChatGeneration, LLMResult
 from langchain_core.prompt_values import PromptValue
 from langchain_core.runnables import Runnable

@ -69,6 +71,79 @@ def _coerce_input(input: Any) -> Tuple[Optional[str], str]:
    return system_prompt, user_prompt


+def extract_usage(sdk_usage: Any) -> Dict[str, int]:
+    """Normalize the SDK's `usage` dict into LangChain's usage_metadata shape.
+
+    Accepts either a plain dict (ResultMessage.usage) or None. Returns a dict
+    with ``input_tokens``, ``output_tokens``, ``total_tokens`` keys.
+    """
+    if not isinstance(sdk_usage, dict):
+        return {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+    # The SDK mirrors Anthropic usage shape. Be defensive across versions.
+    input_tokens = (
+        sdk_usage.get("input_tokens")
+        or sdk_usage.get("prompt_tokens")
+        or 0
+    )
+    output_tokens = (
+        sdk_usage.get("output_tokens")
+        or sdk_usage.get("completion_tokens")
+        or 0
+    )
+    # Count cached input against the input budget too so the TUI reflects it.
+    input_tokens += sdk_usage.get("cache_read_input_tokens", 0) or 0
+    input_tokens += sdk_usage.get("cache_creation_input_tokens", 0) or 0
+    total = input_tokens + output_tokens
+    return {
+        "input_tokens": int(input_tokens),
+        "output_tokens": int(output_tokens),
+        "total_tokens": int(total),
+    }
+
+
+def fire_llm_callbacks(
+    callbacks: List[Any],
+    message: AIMessage,
+    prompt_preview: str,
+) -> None:
+    """Manually fire on_chat_model_start + on_llm_end on the given handlers.
+
+    ChatClaudeAgent is a plain Runnable, so LangChain does not fire chat-model
+    callbacks automatically. We invoke them ourselves so stats handlers
+    (StatsCallbackHandler in the CLI, etc.) see LLM calls and token usage.
+    """
+    if not callbacks:
+        return
+    run_id = uuid4()
+    serialized = {"name": "ChatClaudeAgent"}
+    messages = [[{"role": "user", "content": prompt_preview}]]
+    for cb in callbacks:
+        if hasattr(cb, "on_chat_model_start"):
+            try:
+                cb.on_chat_model_start(serialized, messages, run_id=run_id)
+            except TypeError:
+                # Some handlers don't accept run_id; best-effort.
+                try:
+                    cb.on_chat_model_start(serialized, messages)
+                except Exception:
+                    pass
+            except Exception:
+                pass
+
+    result = LLMResult(generations=[[ChatGeneration(message=message)]])
+    for cb in callbacks:
+        if hasattr(cb, "on_llm_end"):
+            try:
+                cb.on_llm_end(result, run_id=run_id)
+            except TypeError:
+                try:
+                    cb.on_llm_end(result)
+                except Exception:
+                    pass
+            except Exception:
+                pass
+
+
 class ChatClaudeAgent(Runnable):
    """LangChain-compatible Runnable that routes inference through claude-agent-sdk.

@ -78,11 +153,16 @@ class ChatClaudeAgent(Runnable):

    def __init__(self, model: str, **kwargs: Any) -> None:
        self.model = model
+        # Pull callbacks out so we can fire them manually around each invoke —
+        # Runnable doesn't trigger chat-model callbacks the way BaseChatModel does.
+        self.callbacks: List[Any] = list(kwargs.pop("callbacks", None) or [])
        self.kwargs = kwargs

    def invoke(self, input: Any, config: Any = None, **kwargs: Any) -> AIMessage:
        system_prompt, prompt = _coerce_input(input)
-        return asyncio.run(self._ainvoke(prompt, system_prompt))
+        message = asyncio.run(self._ainvoke(prompt, system_prompt))
+        fire_llm_callbacks(self.callbacks, message, prompt)
+        return message

    async def _ainvoke(self, prompt: str, system_prompt: Optional[str]) -> AIMessage:
        from claude_agent_sdk import (
@ -104,13 +184,22 @@ class ChatClaudeAgent(Runnable):
        options = ClaudeAgentOptions(**options_kwargs)

        text_parts: List[str] = []
+        final_usage: Dict[str, int] = {}
        async for msg in query(prompt=prompt, options=options):
            if isinstance(msg, AssistantMessage):
                for block in msg.content:
                    if isinstance(block, TextBlock):
                        text_parts.append(block.text)
+            # The ResultMessage at the end carries cumulative usage; prefer it.
+            # Fall back to AssistantMessage.usage if ResultMessage omits it.
+            sdk_usage = getattr(msg, "usage", None)
+            if isinstance(sdk_usage, dict) and sdk_usage:
+                final_usage = extract_usage(sdk_usage)

-        return AIMessage(content="\n".join(text_parts))
+        return AIMessage(
+            content="\n".join(text_parts),
+            usage_metadata=final_usage or None,
+        )

    def bind_tools(self, tools: Any, **kwargs: Any) -> Any:
        raise NotImplementedError(
--- a/tradingagents/llm_clients/mcp_tool_adapter.py
+++ b/tradingagents/llm_clients/mcp_tool_adapter.py
@ -6,20 +6,23 @@ dict and returns {"content": [{"type": "text", "text": str}]}.

 Used by the SDK-native analyst runner to let Claude Code (authenticated via a
 Max/Pro subscription) call the same data tools the legacy analyst path uses.
+Callbacks passed in from the graph are forwarded into each tool invocation so
+that StatsCallbackHandler (and any other handler) sees on_tool_start/end.
 """

-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, List, Optional, Tuple

 from claude_agent_sdk import create_sdk_mcp_server, tool


-def _wrap_lc_tool(lc_tool: Any):
+def _wrap_lc_tool(lc_tool: Any, callbacks: Optional[List[Any]]):
    """Wrap a single LangChain BaseTool as an SDK @tool-decorated async callable."""
    schema = (
        lc_tool.args_schema.model_json_schema()
        if lc_tool.args_schema is not None
        else {"type": "object", "properties": {}}
    )
+    config = {"callbacks": callbacks} if callbacks else None

    @tool(
        name=lc_tool.name,
@ -27,7 +30,8 @@ def _wrap_lc_tool(lc_tool: Any):
        input_schema=schema,
    )
    async def _wrapped(args: Dict[str, Any]) -> Dict[str, Any]:
-        result = lc_tool.invoke(args)
+        # Pass callbacks via config so BaseTool fires on_tool_start/on_tool_end.
+        result = lc_tool.invoke(args, config=config) if config else lc_tool.invoke(args)
        return {"content": [{"type": "text", "text": str(result)}]}

    return _wrapped
@ -36,13 +40,18 @@ def _wrap_lc_tool(lc_tool: Any):
 def build_mcp_server(
    server_name: str,
    lc_tools: List[Any],
+    callbacks: Optional[List[Any]] = None,
 ) -> Tuple[Any, List[str]]:
    """Build an in-process MCP server from LangChain tools.

    Returns the server instance and the list of fully-qualified tool names
    (``mcp__<server>__<tool>``) suitable for passing to ``allowed_tools``.
+
+    ``callbacks`` are forwarded into each tool's LangChain config so that
+    on_tool_start/on_tool_end fire on the stats handler during SDK-driven
+    tool calls.
    """
-    wrapped = [_wrap_lc_tool(t) for t in lc_tools]
+    wrapped = [_wrap_lc_tool(t, callbacks) for t in lc_tools]
    server = create_sdk_mcp_server(name=server_name, version="1.0.0", tools=wrapped)
    allowed = [f"mcp__{server_name}__{t.name}" for t in lc_tools]
    return server, allowed