feat: unified report paths, structured observability logging, and memory system update (#22)

* gitignore

* feat: unify report paths under reports/daily/{date}/ hierarchy

All generated artifacts now land under a single reports/ tree:
- reports/daily/{date}/market/ for scan results (was results/macro_scan/)
- reports/daily/{date}/{TICKER}/ for per-ticker analysis (was reports/{TICKER}_{timestamp}/)
- reports/daily/{date}/{TICKER}/eval/ for eval logs (was eval_results/{TICKER}/...)

Adds tradingagents/report_paths.py with centralized path helpers used by
CLI commands, trading graph, and pipeline.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* feat: structured observability logging for LLM, tool, and vendor calls

Add RunLogger (tradingagents/observability.py) that emits JSON-lines events
for every LLM call (model, agent, tokens in/out, latency), tool invocation
(tool name, args, success, latency), data vendor call (method, vendor,
success/failure, latency), and report save.

Integration points:
- route_to_vendor: log_vendor_call() on every try/catch
- run_tool_loop: log_tool_call() on every tool invoke
- ScannerGraph: new callbacks param, passes RunLogger.callback to all LLM tiers
- pipeline/macro_bridge: picks up RunLogger from thread-local, passes to TradingAgentsGraph
- cli/main.py: one RunLogger per command (analyze/scan/pipeline), write_log()
  at end, summary line printed to console

Log files co-located with reports:
  reports/daily/{date}/{TICKER}/run_log.jsonl   (analyze)
  reports/daily/{date}/market/run_log.jsonl     (scan)
  reports/daily/{date}/run_log.jsonl            (pipeline)

Also fix test_long_response_no_nudge: update "A"*600 → "A"*2100 to match
MIN_REPORT_LENGTH=2000 threshold set in an earlier commit.

Update memory system context files (ARCHITECTURE, COMPONENTS, CONVENTIONS,
GLOSSARY, CURRENT_STATE) to document observability and report path systems.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
ahmet guzererler 2026-03-19 09:06:40 +01:00 committed by GitHub
parent 8fcd58ad0f
commit a90f14c086
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 568 additions and 56 deletions

4
.gitignore vendored
View File

@ -218,8 +218,10 @@ __marimo__/
# Cache # Cache
**/data_cache/ **/data_cache/
# Scan results and execution plans (generated artifacts) # Generated reports and execution plans
reports/
results/ results/
eval_results/
plans/ plans/
# Backup files # Backup files

View File

@ -28,12 +28,14 @@ from rich.align import Align
from rich.rule import Rule from rich.rule import Rule
from tradingagents.graph.trading_graph import TradingAgentsGraph from tradingagents.graph.trading_graph import TradingAgentsGraph
from tradingagents.report_paths import get_daily_dir, get_market_dir, get_ticker_dir
from tradingagents.default_config import DEFAULT_CONFIG from tradingagents.default_config import DEFAULT_CONFIG
from cli.models import AnalystType from cli.models import AnalystType
from cli.utils import * from cli.utils import *
from tradingagents.graph.scanner_graph import ScannerGraph from tradingagents.graph.scanner_graph import ScannerGraph
from cli.announcements import fetch_announcements, display_announcements from cli.announcements import fetch_announcements, display_announcements
from cli.stats_handler import StatsCallbackHandler from cli.stats_handler import StatsCallbackHandler
from tradingagents.observability import RunLogger, set_run_logger
console = Console() console = Console()
@ -923,6 +925,8 @@ def run_analysis():
# Create stats callback handler for tracking LLM/tool calls # Create stats callback handler for tracking LLM/tool calls
stats_handler = StatsCallbackHandler() stats_handler = StatsCallbackHandler()
run_logger = RunLogger()
set_run_logger(run_logger)
# Normalize analyst selection to predefined order (selection is a 'set', order is fixed) # Normalize analyst selection to predefined order (selection is a 'set', order is fixed)
selected_set = {analyst.value for analyst in selections["analysts"]} selected_set = {analyst.value for analyst in selections["analysts"]}
@ -933,7 +937,7 @@ def run_analysis():
selected_analyst_keys, selected_analyst_keys,
config=config, config=config,
debug=True, debug=True,
callbacks=[stats_handler], callbacks=[stats_handler, run_logger.callback],
) )
# Initialize message buffer with selected analysts # Initialize message buffer with selected analysts
@ -943,7 +947,7 @@ def run_analysis():
start_time = time.time() start_time = time.time()
# Create result directory # Create result directory
results_dir = Path(config["results_dir"]) / selections["ticker"] / selections["analysis_date"] results_dir = get_ticker_dir(selections["analysis_date"], selections["ticker"])
results_dir.mkdir(parents=True, exist_ok=True) results_dir.mkdir(parents=True, exist_ok=True)
report_dir = results_dir / "reports" report_dir = results_dir / "reports"
report_dir.mkdir(parents=True, exist_ok=True) report_dir.mkdir(parents=True, exist_ok=True)
@ -1156,8 +1160,7 @@ def run_analysis():
# Prompt to save report # Prompt to save report
save_choice = typer.prompt("Save report?", default="Y").strip().upper() save_choice = typer.prompt("Save report?", default="Y").strip().upper()
if save_choice in ("Y", "YES", ""): if save_choice in ("Y", "YES", ""):
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") default_path = get_ticker_dir(selections["analysis_date"], selections["ticker"])
default_path = Path.cwd() / "reports" / f"{selections['ticker']}_{timestamp}"
save_path_str = typer.prompt( save_path_str = typer.prompt(
"Save path (press Enter for default)", "Save path (press Enter for default)",
default=str(default_path) default=str(default_path)
@ -1170,6 +1173,19 @@ def run_analysis():
except Exception as e: except Exception as e:
console.print(f"[red]Error saving report: {e}[/red]") console.print(f"[red]Error saving report: {e}[/red]")
# Write observability log
log_dir = get_ticker_dir(selections["analysis_date"], selections["ticker"])
log_dir.mkdir(parents=True, exist_ok=True)
run_logger.write_log(log_dir / "run_log.jsonl")
summary = run_logger.summary()
console.print(
f"[dim]LLM calls: {summary['llm_calls']} | "
f"Tokens: {summary['tokens_in']}{summary['tokens_out']} | "
f"Tools: {summary['tool_calls']} | "
f"Vendor calls: {summary['vendor_success']}ok/{summary['vendor_fail']}fail[/dim]"
)
set_run_logger(None)
# Prompt to display full report # Prompt to display full report
display_choice = typer.prompt("\nDisplay full report on screen?", default="Y").strip().upper() display_choice = typer.prompt("\nDisplay full report on screen?", default="Y").strip().upper()
if display_choice in ("Y", "YES", ""): if display_choice in ("Y", "YES", ""):
@ -1186,7 +1202,7 @@ def run_scan(date: Optional[str] = None):
scan_date = typer.prompt("Scan date (YYYY-MM-DD)", default=default_date) scan_date = typer.prompt("Scan date (YYYY-MM-DD)", default=default_date)
# Prepare save directory # Prepare save directory
save_dir = Path("results/macro_scan") / scan_date save_dir = get_market_dir(scan_date)
save_dir.mkdir(parents=True, exist_ok=True) save_dir.mkdir(parents=True, exist_ok=True)
console.print(f"[cyan]Running 3-phase macro scanner for {scan_date}...[/cyan]") console.print(f"[cyan]Running 3-phase macro scanner for {scan_date}...[/cyan]")
@ -1194,8 +1210,11 @@ def run_scan(date: Optional[str] = None):
console.print("[dim]Phase 2: Industry Deep Dive[/dim]") console.print("[dim]Phase 2: Industry Deep Dive[/dim]")
console.print("[dim]Phase 3: Macro Synthesis → stocks to investigate[/dim]\n") console.print("[dim]Phase 3: Macro Synthesis → stocks to investigate[/dim]\n")
run_logger = RunLogger()
set_run_logger(run_logger)
try: try:
scanner = ScannerGraph(config=DEFAULT_CONFIG.copy()) scanner = ScannerGraph(config=DEFAULT_CONFIG.copy(), callbacks=[run_logger.callback])
with Live(Spinner("dots", text="Scanning..."), console=console, transient=True): with Live(Spinner("dots", text="Scanning..."), console=console, transient=True):
result = scanner.scan(scan_date) result = scanner.scan(scan_date)
except Exception as e: except Exception as e:
@ -1239,6 +1258,17 @@ def run_scan(date: Optional[str] = None):
pass # Summary wasn't valid JSON — already printed as markdown pass # Summary wasn't valid JSON — already printed as markdown
# Write observability log
run_logger.write_log(save_dir / "run_log.jsonl")
scan_summary = run_logger.summary()
console.print(
f"[dim]LLM calls: {scan_summary['llm_calls']} | "
f"Tokens: {scan_summary['tokens_in']}{scan_summary['tokens_out']} | "
f"Tools: {scan_summary['tool_calls']} | "
f"Vendor calls: {scan_summary['vendor_success']}ok/{scan_summary['vendor_fail']}fail[/dim]"
)
set_run_logger(None)
console.print(f"\n[green]Results saved to {save_dir}[/green]") console.print(f"\n[green]Results saved to {save_dir}[/green]")
@ -1290,7 +1320,10 @@ def run_pipeline():
return return
config = DEFAULT_CONFIG.copy() config = DEFAULT_CONFIG.copy()
output_dir = Path("results/macro_pipeline") output_dir = get_daily_dir(analysis_date)
run_logger = RunLogger()
set_run_logger(run_logger)
console.print(f"\n[cyan]Running TradingAgents for {len(candidates)} tickers...[/cyan]") console.print(f"\n[cyan]Running TradingAgents for {len(candidates)} tickers...[/cyan]")
try: try:
@ -1304,6 +1337,18 @@ def run_pipeline():
save_results(results, macro_context, output_dir) save_results(results, macro_context, output_dir)
# Write observability log
output_dir.mkdir(parents=True, exist_ok=True)
run_logger.write_log(output_dir / "run_log.jsonl")
pipe_summary = run_logger.summary()
console.print(
f"[dim]LLM calls: {pipe_summary['llm_calls']} | "
f"Tokens: {pipe_summary['tokens_in']}{pipe_summary['tokens_out']} | "
f"Tools: {pipe_summary['tool_calls']} | "
f"Vendor calls: {pipe_summary['vendor_success']}ok/{pipe_summary['vendor_fail']}fail[/dim]"
)
set_run_logger(None)
successes = [r for r in results if not r.error] successes = [r for r in results if not r.error]
failures = [r for r in results if r.error] failures = [r for r in results if r.error]
console.print(f"\n[green]Done: {len(successes)} succeeded, {len(failures)} failed[/green]") console.print(f"\n[green]Done: {len(successes)} succeeded, {len(failures)} failed[/green]")

View File

@ -1,18 +1,28 @@
# Current Milestone # Current Milestone
Opt-in vendor fallback (ADR 011) merged (PR #18). Pipeline CLI command implemented. Memory system v2 being built. Next: PR #20 review (Copilot memory rebuild), structured context files under `docs/agent/context/`. Report path unification complete. Observability logging (data sources, LLM calls, tool calls, token counts) is the active task. Next: `pipeline` CLI command.
# Recent Progress # Recent Progress
- **PR #18 merged**: Fail-fast vendor routing — `FALLBACK_ALLOWED` whitelist for 5 fungible-data methods only. ADR 011 written, ADR 002 superseded. - **PR #21 merged**: Memory system v2 — builder/reader skills, 5 context files, post-commit hook
- **PR #18 merged**: Opt-in vendor fallback — fail-fast by default, `FALLBACK_ALLOWED` whitelist for fungible data only (ADR 011)
- **PR #19 merged**: Merge conflict resolution after PR #18
- **Report path unification** (`80e174c`): All reports now written under `reports/daily/{date}/{ticker}/` for per-ticker analysis and `reports/daily/{date}/market/` for scanner output
- `pipeline` CLI command implemented — scan JSON → filter by conviction → per-ticker deep dive via `MacroBridge` - `pipeline` CLI command implemented — scan JSON → filter by conviction → per-ticker deep dive via `MacroBridge`
- `extract_json()` utility in `agents/utils/json_utils.py` handles DeepSeek R1 `<think>` blocks and markdown fences - `extract_json()` utility in `agents/utils/json_utils.py` handles DeepSeek R1 `<think>` blocks and markdown fences
- All 3 `@dataclass` types defined: `MacroContext`, `StockCandidate`, `TickerResult` in `pipeline/macro_bridge.py`
- 12 pre-existing test failures fixed across 5 files (PR merged to main)
- Memory builder and reader skills created in `.claude/skills/` - Memory builder and reader skills created in `.claude/skills/`
- Structured context files generated under `docs/agent/context/` (ARCHITECTURE, CONVENTIONS, COMPONENTS, TECH_STACK, GLOSSARY) - Structured context files generated under `docs/agent/context/` (ARCHITECTURE, CONVENTIONS, COMPONENTS, TECH_STACK, GLOSSARY)
- 220+ offline tests passing
- 12 pre-existing test failures fixed across 5 files
# In Progress
- **Observability logging**: Structured logging for data source calls (vendor, endpoint, success/failure), LLM requests (model name, agent, token counts), and tool invocations (tool name, duration). Goal: understand what's being called, by whom, and at what cost per run.
# Planned Next
- Report path unification tests (verify new paths in integration tests)
# Active Blockers # Active Blockers
- PR #20 (Copilot memory rebuild) is open/draft — needs review for aspirational deps and model name accuracy before merge - None currently
- Some scanner integration tests lack `@pytest.mark.integration` marker despite making live network calls

View File

@ -1,4 +1,4 @@
<!-- Last verified: 2026-03-18 --> <!-- Last verified: 2026-03-19 -->
# Architecture # Architecture
@ -80,6 +80,39 @@ Scanner JSON output → `MacroBridge.load()` → parse into `MacroContext` + `li
Source: `tradingagents/pipeline/macro_bridge.py` Source: `tradingagents/pipeline/macro_bridge.py`
## Unified Report Paths
All generated artifacts live under `reports/daily/{YYYY-MM-DD}/`:
```
reports/
└── daily/{YYYY-MM-DD}/
├── market/ # scan results (geopolitical_report.md, etc.)
├── {TICKER}/ # per-ticker analysis / pipeline
│ ├── 1_analysts/
│ ├── complete_report.md
│ └── eval/full_states_log.json
└── summary.md # pipeline combined summary
```
Helper functions: `get_daily_dir()`, `get_market_dir()`, `get_ticker_dir()`, `get_eval_dir()`.
Source: `tradingagents/report_paths.py`
## Observability
`RunLogger` accumulates structured events (JSON-lines) for a single run. Four event kinds: `llm` (model, agent, tokens in/out, latency), `tool` (tool name, args, success, latency), `vendor` (method, vendor, success, latency), `report` (path). Thread-safe via `_lock`.
Integration points:
- **LLM calls**: `_LLMCallbackHandler` (LangChain `BaseCallbackHandler`) — attach as callback to LLM constructors or graph invocations. Extracts model name from `invocation_params` / `serialized`, token counts from `usage_metadata`.
- **Vendor calls**: `log_vendor_call()` — called from `route_to_vendor`.
- **Tool calls**: `log_tool_call()` — called from `run_tool_loop()`.
- **Thread-local context**: `set_run_logger()` / `get_run_logger()` for passing logger to vendor/tool layers without changing signatures.
`RunLogger.summary()` returns aggregated stats (total tokens, model breakdown, vendor success/fail counts). `RunLogger.write_log(path)` writes all events + summary to a JSON-lines file.
Source: `tradingagents/observability.py`
## CLI Architecture ## CLI Architecture
3 Typer commands: `analyze` (interactive per-ticker), `scan` (macro scanner), `pipeline` (scan → filter → deep dive). Rich-based live UI with `MessageBuffer` (deque-backed state manager tracking agent status, reports, tool calls, defined in `cli/main.py`) and `StatsCallbackHandler` (token/timing stats, defined in `cli/stats_handler.py`). 7-step interactive questionnaire in `analyze` for provider/model selection. 3 Typer commands: `analyze` (interactive per-ticker), `scan` (macro scanner), `pipeline` (scan → filter → deep dive). Rich-based live UI with `MessageBuffer` (deque-backed state manager tracking agent status, reports, tool calls, defined in `cli/main.py`) and `StatsCallbackHandler` (token/timing stats, defined in `cli/stats_handler.py`). 7-step interactive questionnaire in `analyze` for provider/model selection.
@ -102,4 +135,6 @@ Source: `cli/main.py`, `cli/stats_handler.py`
| `tradingagents/pipeline/macro_bridge.py` | `MacroBridge`, data classes, pipeline orchestration | | `tradingagents/pipeline/macro_bridge.py` | `MacroBridge`, data classes, pipeline orchestration |
| `tradingagents/agents/utils/json_utils.py` | `extract_json()` — handles DeepSeek R1 markdown wrapping | | `tradingagents/agents/utils/json_utils.py` | `extract_json()` — handles DeepSeek R1 markdown wrapping |
| `cli/main.py` | CLI commands, `MessageBuffer`, Rich UI, interactive setup | | `cli/main.py` | CLI commands, `MessageBuffer`, Rich UI, interactive setup |
| `tradingagents/report_paths.py` | Unified report path helpers (`get_market_dir`, `get_ticker_dir`, etc.) |
| `tradingagents/observability.py` | `RunLogger`, `_LLMCallbackHandler`, structured event logging |
| `tradingagents/dataflows/config.py` | `set_config()`, `get_config()`, `initialize_config()` | | `tradingagents/dataflows/config.py` | `set_config()`, `get_config()`, `initialize_config()` |

View File

@ -1,4 +1,4 @@
<!-- Last verified: 2026-03-18 --> <!-- Last verified: 2026-03-19 -->
# Components # Components
@ -8,6 +8,8 @@
tradingagents/ tradingagents/
├── __init__.py ├── __init__.py
├── default_config.py # All config keys, defaults, env var overrides ├── default_config.py # All config keys, defaults, env var overrides
├── report_paths.py # Unified report path helpers (reports/daily/{date}/)
├── observability.py # RunLogger, _LLMCallbackHandler, structured event logging
├── agents/ ├── agents/
│ ├── __init__.py │ ├── __init__.py
│ ├── analysts/ │ ├── analysts/

View File

@ -1,4 +1,4 @@
<!-- Last verified: 2026-03-18 --> <!-- Last verified: 2026-03-19 -->
# Conventions # Conventions
@ -64,7 +64,7 @@
- Typer for command definitions, Rich for live UI. (`cli/main.py`) - Typer for command definitions, Rich for live UI. (`cli/main.py`)
- `MessageBuffer` — deque-based singleton tracking agent statuses, reports, tool calls. Fixed agents grouped by team (`FIXED_AGENTS`), analysts selectable. (`cli/main.py`) - `MessageBuffer` — deque-based singleton tracking agent statuses, reports, tool calls. Fixed agents grouped by team (`FIXED_AGENTS`), analysts selectable. (`cli/main.py`)
- `StatsCallbackHandler` — token and timing statistics for display. (`cli/stats_handler.py`) - `StatsCallbackHandler` — token and timing statistics for display. (`cli/stats_handler.py`)
- Scan results saved as `{key}.md` files to `results/macro_scan/{scan_date}/`. (`cli/main.py`) - All reports go under `reports/daily/{date}/` — use helpers from `report_paths.py`: `get_market_dir(date)` for scan results, `get_ticker_dir(date, ticker)` for per-ticker analysis, `get_eval_dir(date, ticker)` for eval logs. Never hardcode report paths. (`report_paths.py`)
## Pipeline Patterns ## Pipeline Patterns
@ -81,6 +81,14 @@
- Env isolation: always mock env vars before `importlib.reload()``load_dotenv()` leaks real `.env` values otherwise. - Env isolation: always mock env vars before `importlib.reload()``load_dotenv()` leaks real `.env` values otherwise.
- `callable()` returns False on LangChain `@tool` objects — use `hasattr(x, "invoke")` instead. - `callable()` returns False on LangChain `@tool` objects — use `hasattr(x, "invoke")` instead.
## Observability
- Create one `RunLogger` per CLI command (analyze/scan/pipeline). Attach `logger.callback` to LLM constructors. (`observability.py`)
- Call `set_run_logger(logger)` at run start so vendor/tool layers can access it via `get_run_logger()`. (`observability.py`)
- Vendor calls: `log_vendor_call(method, vendor, success, duration_ms)` — called inside `route_to_vendor`. (`observability.py`, `interface.py`)
- Tool calls: `log_tool_call(tool_name, args_summary, success, duration_ms)` — called inside `run_tool_loop`. (`observability.py`, `tool_runner.py`)
- Write the run log at the end of each command: `logger.write_log(report_dir / "run_log.jsonl")`. (`observability.py`)
## Error Handling ## Error Handling
- Fail-fast by default — no silent fallback unless method is in `FALLBACK_ALLOWED`. (ADR 011) - Fail-fast by default — no silent fallback unless method is in `FALLBACK_ALLOWED`. (ADR 011)

View File

@ -1,4 +1,4 @@
<!-- Last verified: 2026-03-18 --> <!-- Last verified: 2026-03-19 -->
# Glossary # Glossary
@ -77,6 +77,25 @@
| FIXED_AGENTS | Dict grouping non-analyst agents by team: Research Team, Trading Team, Risk Management, Portfolio Management | `cli/main.py` | | FIXED_AGENTS | Dict grouping non-analyst agents by team: Research Team, Trading Team, Risk Management, Portfolio Management | `cli/main.py` |
| ANALYST_MAPPING | Dict: `"market"``"Market Analyst"`, `"social"``"Social Analyst"`, etc. | `cli/main.py` | | ANALYST_MAPPING | Dict: `"market"``"Market Analyst"`, `"social"``"Social Analyst"`, etc. | `cli/main.py` |
## Observability
| Term | Definition | Source |
|------|-----------|--------|
| RunLogger | Accumulates structured events (llm, tool, vendor, report) for a single CLI run. Thread-safe. | `observability.py` |
| _LLMCallbackHandler | LangChain `BaseCallbackHandler` that feeds LLM call events (model, tokens, latency) into a `RunLogger` | `observability.py` |
| _Event | @dataclass: `kind`, `ts`, `data` — one JSON-line per event | `observability.py` |
| set_run_logger / get_run_logger | Thread-local context for passing `RunLogger` to vendor/tool layers | `observability.py` |
## Report Paths
| Term | Definition | Source |
|------|-----------|--------|
| REPORTS_ROOT | `Path("reports")` — root for all generated artifacts | `report_paths.py` |
| get_daily_dir | Returns `reports/daily/{date}/` | `report_paths.py` |
| get_market_dir | Returns `reports/daily/{date}/market/` — scan results | `report_paths.py` |
| get_ticker_dir | Returns `reports/daily/{date}/{TICKER}/` — per-ticker analysis | `report_paths.py` |
| get_eval_dir | Returns `reports/daily/{date}/{TICKER}/eval/` — eval logs | `report_paths.py` |
## Constants ## Constants
| Constant | Value | Source | | Constant | Value | Source |

View File

@ -1,4 +1,4 @@
<!-- Last verified: 2026-03-18 --> <!-- Last verified: 2026-03-19 -->
# Tech Stack # Tech Stack

View File

@ -127,7 +127,7 @@ class TestToolLoopNudge:
def test_long_response_no_nudge(self): def test_long_response_no_nudge(self):
"""A long first response (no tool calls) should be returned as-is.""" """A long first response (no tool calls) should be returned as-is."""
long_text = "A" * 600 long_text = "A" * 2100 # must exceed MIN_REPORT_LENGTH (2000)
response = AIMessage(content=long_text, tool_calls=[]) response = AIMessage(content=long_text, tool_calls=[])
chain = self._make_chain([response]) chain = self._make_chain([response])
tool = self._make_tool() tool = self._make_tool()
@ -139,7 +139,7 @@ class TestToolLoopNudge:
def test_short_response_triggers_nudge(self): def test_short_response_triggers_nudge(self):
"""A short first response triggers a nudge, then the LLM is re-invoked.""" """A short first response triggers a nudge, then the LLM is re-invoked."""
short_resp = AIMessage(content="Brief.", tool_calls=[]) short_resp = AIMessage(content="Brief.", tool_calls=[])
long_resp = AIMessage(content="A" * 600, tool_calls=[]) long_resp = AIMessage(content="A" * 2100, tool_calls=[])
chain = self._make_chain([short_resp, long_resp]) chain = self._make_chain([short_resp, long_resp])
tool = self._make_tool() tool = self._make_tool()

View File

@ -79,47 +79,29 @@ class TestScannerEndToEnd:
def test_scan_command_creates_output_files(self): def test_scan_command_creates_output_files(self):
"""Test that the scan command creates all expected output files.""" """Test that the scan command creates all expected output files."""
with tempfile.TemporaryDirectory() as temp_dir: with tempfile.TemporaryDirectory() as temp_dir:
# Set up the test directory structure test_date_dir = Path(temp_dir) / "market"
macro_scan_dir = Path(temp_dir) / "results" / "macro_scan"
test_date_dir = macro_scan_dir / "2026-03-15"
test_date_dir.mkdir(parents=True) test_date_dir.mkdir(parents=True)
# Mock the current working directory to use our temp directory # Mock get_market_dir to redirect output into the temp directory
with patch('cli.main.Path') as mock_path_class: with patch('cli.main.get_market_dir', return_value=test_date_dir):
# Mock Path.cwd() to return our temp directory
mock_path_class.cwd.return_value = Path(temp_dir)
# Mock Path constructor for results/macro_scan/{date}
def mock_path_constructor(*args):
path_obj = Path(*args)
# If this is the results/macro_scan/{date} path, return our test directory
if len(args) >= 3 and args[0] == "results" and args[1] == "macro_scan" and args[2] == "2026-03-15":
return test_date_dir
return path_obj
mock_path_class.side_effect = mock_path_constructor
# Mock the write_text method to capture what gets written # Mock the write_text method to capture what gets written
written_files = {} written_files = {}
def mock_write_text(self, content, encoding=None): def mock_write_text(self, content, encoding=None):
# Store what was written to each file
written_files[str(self)] = content written_files[str(self)] = content
with patch('pathlib.Path.write_text', mock_write_text): with patch('pathlib.Path.write_text', mock_write_text):
# Mock typer.prompt to return our test date
with patch('typer.prompt', return_value='2026-03-15'): with patch('typer.prompt', return_value='2026-03-15'):
try: try:
run_scan() run_scan()
except SystemExit: except SystemExit:
# typer might raise SystemExit, that's ok
pass pass
# Verify that run_scan() uses the correct output file naming convention. # Verify that run_scan() uses the correct output file naming convention.
# #
# run_scan() writes via: (save_dir / f"{key}.md").write_text(content) # run_scan() writes via: (save_dir / f"{key}.md").write_text(content)
# where save_dir = Path("results/macro_scan") / scan_date (relative). # where save_dir = get_market_dir(scan_date).
# pathlib.Path.write_text is mocked, so written_files keys are the # pathlib.Path.write_text is mocked, so written_files keys are the
# str() of those relative Path objects — NOT absolute paths. # str() of those Path objects.
# #
# LLM output is non-deterministic: a phase may produce an empty string, # LLM output is non-deterministic: a phase may produce an empty string,
# causing run_scan()'s `if content:` guard to skip writing that file. # causing run_scan()'s `if content:` guard to skip writing that file.
@ -133,6 +115,7 @@ class TestScannerEndToEnd:
"sector_performance_report.md", "sector_performance_report.md",
"industry_deep_dive_report.md", "industry_deep_dive_report.md",
"macro_scan_summary.md", "macro_scan_summary.md",
"run_log.jsonl",
} }
assert len(written_files) >= 1, ( assert len(written_files) >= 1, (

View File

@ -100,7 +100,7 @@ def test_scanner_integration_with_cli_scan():
# 3. CLI scan command calls get_sector_performance.invoke() # 3. CLI scan command calls get_sector_performance.invoke()
# 4. CLI scan command calls get_industry_performance.invoke() # 4. CLI scan command calls get_industry_performance.invoke()
# 5. CLI scan command calls get_topic_news.invoke() # 5. CLI scan command calls get_topic_news.invoke()
# 6. Results are written to files in results/macro_scan/{date}/ # 6. Results are written to files in reports/daily/{date}/market/
# Since we've verified the individual tools work above, and we've seen # Since we've verified the individual tools work above, and we've seen
# the CLI scan command work manually, we can be confident the integration works. # the CLI scan command work manually, we can be confident the integration works.

View File

@ -7,6 +7,7 @@ phase — so they need an inline tool-execution loop.
from __future__ import annotations from __future__ import annotations
import time
from typing import Any, List from typing import Any, List
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
@ -79,17 +80,27 @@ def run_tool_loop(
first_round = False first_round = False
# Execute each requested tool call and append ToolMessages # Execute each requested tool call and append ToolMessages
from tradingagents.observability import get_run_logger
rl = get_run_logger()
for tc in result.tool_calls: for tc in result.tool_calls:
tool_name = tc["name"] tool_name = tc["name"]
tool_args = tc["args"] tool_args = tc["args"]
tool_fn = tool_map.get(tool_name) tool_fn = tool_map.get(tool_name)
if tool_fn is None: if tool_fn is None:
tool_output = f"Error: unknown tool '{tool_name}'" tool_output = f"Error: unknown tool '{tool_name}'"
if rl:
rl.log_tool_call(tool_name, str(tool_args)[:120], False, 0, error="unknown tool")
else: else:
t0 = time.time()
try: try:
tool_output = tool_fn.invoke(tool_args) tool_output = tool_fn.invoke(tool_args)
if rl:
rl.log_tool_call(tool_name, str(tool_args)[:120], True, (time.time() - t0) * 1000)
except Exception as e: except Exception as e:
tool_output = f"Error calling {tool_name}: {e}" tool_output = f"Error calling {tool_name}: {e}"
if rl:
rl.log_tool_call(tool_name, str(tool_args)[:120], False, (time.time() - t0) * 1000, error=str(e)[:200])
current_messages.append( current_messages.append(
ToolMessage(content=str(tool_output), tool_call_id=tc["id"]) ToolMessage(content=str(tool_output), tool_call_id=tc["id"])

View File

@ -1,3 +1,4 @@
import time
from typing import Annotated from typing import Annotated
# Import from vendor-specific modules # Import from vendor-specific modules
@ -239,6 +240,11 @@ def route_to_vendor(method: str, *args, **kwargs):
# Fail-fast: only try configured primary vendor(s) # Fail-fast: only try configured primary vendor(s)
vendors_to_try = primary_vendors vendors_to_try = primary_vendors
from tradingagents.observability import get_run_logger
rl = get_run_logger()
args_summary = ", ".join(str(a)[:80] for a in args) if args else ""
last_error = None last_error = None
tried = [] tried = []
for vendor in vendors_to_try: for vendor in vendors_to_try:
@ -249,9 +255,15 @@ def route_to_vendor(method: str, *args, **kwargs):
vendor_impl = VENDOR_METHODS[method][vendor] vendor_impl = VENDOR_METHODS[method][vendor]
impl_func = vendor_impl[0] if isinstance(vendor_impl, list) else vendor_impl impl_func = vendor_impl[0] if isinstance(vendor_impl, list) else vendor_impl
t0 = time.time()
try: try:
return impl_func(*args, **kwargs) result = impl_func(*args, **kwargs)
if rl:
rl.log_vendor_call(method, vendor, True, (time.time() - t0) * 1000, args_summary=args_summary)
return result
except (AlphaVantageError, FinnhubError, ConnectionError, TimeoutError) as exc: except (AlphaVantageError, FinnhubError, ConnectionError, TimeoutError) as exc:
if rl:
rl.log_vendor_call(method, vendor, False, (time.time() - t0) * 1000, error=str(exc)[:200], args_summary=args_summary)
last_error = exc last_error = exc
continue continue

View File

@ -38,7 +38,7 @@ def _env_int(key: str, default=None):
DEFAULT_CONFIG = { DEFAULT_CONFIG = {
"project_dir": os.path.abspath(os.path.join(os.path.dirname(__file__), ".")), "project_dir": os.path.abspath(os.path.join(os.path.dirname(__file__), ".")),
"results_dir": _env("RESULTS_DIR", "./results"), "results_dir": _env("RESULTS_DIR", "./reports"),
"data_cache_dir": os.path.join( "data_cache_dir": os.path.join(
os.path.abspath(os.path.join(os.path.dirname(__file__), ".")), os.path.abspath(os.path.join(os.path.dirname(__file__), ".")),
"dataflows/data_cache", "dataflows/data_cache",

View File

@ -1,6 +1,6 @@
"""Scanner graph — orchestrates the 3-phase macro scanner pipeline.""" """Scanner graph — orchestrates the 3-phase macro scanner pipeline."""
from typing import Any from typing import Any, List, Optional
from tradingagents.default_config import DEFAULT_CONFIG from tradingagents.default_config import DEFAULT_CONFIG
from tradingagents.llm_clients import create_llm_client from tradingagents.llm_clients import create_llm_client
@ -22,15 +22,22 @@ class ScannerGraph:
Phase 3: macro_synthesis -> END Phase 3: macro_synthesis -> END
""" """
def __init__(self, config: dict[str, Any] | None = None, debug: bool = False) -> None: def __init__(
self,
config: dict[str, Any] | None = None,
debug: bool = False,
callbacks: Optional[List] = None,
) -> None:
"""Initialize the scanner graph. """Initialize the scanner graph.
Args: Args:
config: Configuration dictionary. Falls back to DEFAULT_CONFIG when None. config: Configuration dictionary. Falls back to DEFAULT_CONFIG when None.
debug: Whether to stream and print intermediate states. debug: Whether to stream and print intermediate states.
callbacks: Optional LangChain callback handlers (e.g. RunLogger.callback).
""" """
self.config = config or DEFAULT_CONFIG.copy() self.config = config or DEFAULT_CONFIG.copy()
self.debug = debug self.debug = debug
self.callbacks = callbacks or []
quick_llm = self._create_llm("quick_think") quick_llm = self._create_llm("quick_think")
mid_llm = self._create_llm("mid_think") mid_llm = self._create_llm("mid_think")
@ -78,6 +85,9 @@ class ScannerGraph:
provider = self.config.get(f"{tier}_llm_provider") or self.config["llm_provider"] provider = self.config.get(f"{tier}_llm_provider") or self.config["llm_provider"]
backend_url = self.config.get(f"{tier}_backend_url") or self.config.get("backend_url") backend_url = self.config.get(f"{tier}_backend_url") or self.config.get("backend_url")
if self.callbacks:
kwargs["callbacks"] = self.callbacks
client = create_llm_client( client = create_llm_client(
provider=provider, provider=provider,
model=model, model=model,

View File

@ -320,11 +320,13 @@ class TradingAgentsGraph:
} }
# Save to file # Save to file
directory = Path(f"eval_results/{self.ticker}/TradingAgentsStrategy_logs/") from tradingagents.report_paths import get_eval_dir
directory = get_eval_dir(str(trade_date), self.ticker)
directory.mkdir(parents=True, exist_ok=True) directory.mkdir(parents=True, exist_ok=True)
with open( with open(
f"eval_results/{self.ticker}/TradingAgentsStrategy_logs/full_states_log_{trade_date}.json", directory / f"full_states_log_{trade_date}.json",
"w", "w",
encoding="utf-8", encoding="utf-8",
) as f: ) as f:

View File

@ -0,0 +1,325 @@
"""Structured observability logging for TradingAgents.
Emits JSON-lines logs capturing:
- LLM calls (model, agent/node, token counts, latency)
- Tool calls (tool name, args summary, success/failure, latency)
- Data vendor calls (method, vendor, success/failure, fallback chain)
- Report saves
Usage:
from tradingagents.observability import RunLogger, get_run_logger
logger = RunLogger() # one per run
# pass logger.callback to LangChain as a callback handler
# pass logger to route_to_vendor / run_tool_loop via context
All events are written as JSON lines to a file and also to Python's
``logging`` module at DEBUG level for console visibility.
"""
from __future__ import annotations
import json
import logging
import threading
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
from langchain_core.callbacks import BaseCallbackHandler
from langchain_core.messages import AIMessage
from langchain_core.outputs import LLMResult
_py_logger = logging.getLogger("tradingagents.observability")
# ──────────────────────────────────────────────────────────────────────────────
# Event dataclass — each logged event becomes one JSON line
# ──────────────────────────────────────────────────────────────────────────────
@dataclass
class _Event:
kind: str # "llm", "tool", "vendor", "report"
ts: float # time.time()
data: dict # kind-specific payload
def to_dict(self) -> dict:
return {"kind": self.kind, "ts": self.ts, **self.data}
# ──────────────────────────────────────────────────────────────────────────────
# RunLogger — accumulates events and writes a JSON-lines log file
# ──────────────────────────────────────────────────────────────────────────────
class RunLogger:
"""Accumulates structured events for a single run (analyze / scan / pipeline).
Attributes:
callback: A LangChain ``BaseCallbackHandler`` that can be passed to
LLM constructors or graph invocations.
events: Thread-safe list of all recorded events.
"""
def __init__(self) -> None:
self._lock = threading.Lock()
self.events: list[_Event] = []
self.callback = _LLMCallbackHandler(self)
self._start = time.time()
# -- public helpers to record events from non-callback code ----------------
def log_vendor_call(
self,
method: str,
vendor: str,
success: bool,
duration_ms: float,
error: str | None = None,
args_summary: str = "",
) -> None:
"""Record a data-vendor call (called from ``route_to_vendor``)."""
evt = _Event(
kind="vendor",
ts=time.time(),
data={
"method": method,
"vendor": vendor,
"success": success,
"duration_ms": round(duration_ms, 1),
"error": error,
"args": args_summary,
},
)
self._append(evt)
def log_tool_call(
self,
tool_name: str,
args_summary: str,
success: bool,
duration_ms: float,
error: str | None = None,
) -> None:
"""Record a tool invocation (called from ``run_tool_loop``)."""
evt = _Event(
kind="tool",
ts=time.time(),
data={
"tool": tool_name,
"args": args_summary,
"success": success,
"duration_ms": round(duration_ms, 1),
"error": error,
},
)
self._append(evt)
def log_report_save(self, path: str) -> None:
"""Record that a report file was written."""
evt = _Event(kind="report", ts=time.time(), data={"path": path})
self._append(evt)
# -- summary ---------------------------------------------------------------
def summary(self) -> dict:
"""Return an aggregated summary suitable for ``run_summary.json``."""
with self._lock:
events = list(self.events)
llm_events = [e for e in events if e.kind == "llm"]
tool_events = [e for e in events if e.kind == "tool"]
vendor_events = [e for e in events if e.kind == "vendor"]
total_tokens_in = sum(e.data.get("tokens_in", 0) for e in llm_events)
total_tokens_out = sum(e.data.get("tokens_out", 0) for e in llm_events)
vendor_ok = sum(1 for e in vendor_events if e.data["success"])
vendor_fail = sum(1 for e in vendor_events if not e.data["success"])
# Group LLM calls by model
model_counts: dict[str, int] = {}
for e in llm_events:
m = e.data.get("model", "unknown")
model_counts[m] = model_counts.get(m, 0) + 1
# Group vendor calls by vendor
vendor_counts: dict[str, dict] = {}
for e in vendor_events:
v = e.data["vendor"]
if v not in vendor_counts:
vendor_counts[v] = {"ok": 0, "fail": 0}
if e.data["success"]:
vendor_counts[v]["ok"] += 1
else:
vendor_counts[v]["fail"] += 1
return {
"elapsed_s": round(time.time() - self._start, 1),
"llm_calls": len(llm_events),
"tokens_in": total_tokens_in,
"tokens_out": total_tokens_out,
"tokens_total": total_tokens_in + total_tokens_out,
"models_used": model_counts,
"tool_calls": len(tool_events),
"tool_success": sum(1 for e in tool_events if e.data["success"]),
"tool_fail": sum(1 for e in tool_events if not e.data["success"]),
"vendor_calls": len(vendor_events),
"vendor_success": vendor_ok,
"vendor_fail": vendor_fail,
"vendors_used": vendor_counts,
}
def write_log(self, path: Path) -> None:
"""Write all events as JSON lines + a summary block to *path*."""
path.parent.mkdir(parents=True, exist_ok=True)
with self._lock:
events = list(self.events)
lines = [json.dumps(e.to_dict()) for e in events]
lines.append(json.dumps({"kind": "summary", **self.summary()}))
path.write_text("\n".join(lines) + "\n")
_py_logger.info("Run log written to %s", path)
# -- internals -------------------------------------------------------------
def _append(self, evt: _Event) -> None:
with self._lock:
self.events.append(evt)
_py_logger.debug("%s | %s", evt.kind, json.dumps(evt.data))
# ──────────────────────────────────────────────────────────────────────────────
# LangChain callback handler — captures LLM call details
# ──────────────────────────────────────────────────────────────────────────────
class _LLMCallbackHandler(BaseCallbackHandler):
"""LangChain callback that feeds LLM events into a ``RunLogger``."""
def __init__(self, run_logger: RunLogger) -> None:
super().__init__()
self._rl = run_logger
self._lock = threading.Lock()
# Track in-flight calls: run_id -> metadata
self._inflight: dict[str, dict] = {}
# -- chat model start (preferred path for ChatOpenAI / ChatAnthropic) ------
def on_chat_model_start(
self,
serialized: Dict[str, Any],
messages: List[List[Any]],
*,
run_id: Any = None,
**kwargs: Any,
) -> None:
model = _extract_model(serialized, kwargs)
agent = kwargs.get("name") or serialized.get("name") or _extract_graph_node(kwargs)
key = str(run_id) if run_id else str(id(messages))
with self._lock:
self._inflight[key] = {
"model": model,
"agent": agent or "",
"t0": time.time(),
}
# -- legacy LLM start (completion-style) -----------------------------------
def on_llm_start(
self,
serialized: Dict[str, Any],
prompts: List[str],
*,
run_id: Any = None,
**kwargs: Any,
) -> None:
model = _extract_model(serialized, kwargs)
agent = kwargs.get("name") or serialized.get("name") or _extract_graph_node(kwargs)
key = str(run_id) if run_id else str(id(prompts))
with self._lock:
self._inflight[key] = {
"model": model,
"agent": agent or "",
"t0": time.time(),
}
# -- LLM end ---------------------------------------------------------------
def on_llm_end(self, response: LLMResult, *, run_id: Any = None, **kwargs: Any) -> None:
key = str(run_id) if run_id else None
with self._lock:
meta = self._inflight.pop(key, None) if key else None
tokens_in = 0
tokens_out = 0
model_from_response = ""
try:
generation = response.generations[0][0]
if hasattr(generation, "message"):
msg = generation.message
if isinstance(msg, AIMessage) and hasattr(msg, "usage_metadata") and msg.usage_metadata:
tokens_in = msg.usage_metadata.get("input_tokens", 0)
tokens_out = msg.usage_metadata.get("output_tokens", 0)
if hasattr(msg, "response_metadata"):
model_from_response = msg.response_metadata.get("model_name", "") or msg.response_metadata.get("model", "")
except (IndexError, TypeError, AttributeError):
pass
model = model_from_response or (meta["model"] if meta else "unknown")
agent = meta["agent"] if meta else ""
duration_ms = (time.time() - meta["t0"]) * 1000 if meta else 0
evt = _Event(
kind="llm",
ts=time.time(),
data={
"model": model,
"agent": agent,
"tokens_in": tokens_in,
"tokens_out": tokens_out,
"duration_ms": round(duration_ms, 1),
},
)
self._rl._append(evt)
# ──────────────────────────────────────────────────────────────────────────────
# Helpers
# ──────────────────────────────────────────────────────────────────────────────
def _extract_model(serialized: dict, kwargs: dict) -> str:
"""Best-effort model name from LangChain callback metadata."""
# kwargs.invocation_params often has the model name
inv = kwargs.get("invocation_params") or {}
model = inv.get("model_name") or inv.get("model") or ""
if model:
return model
# serialized might have it nested
kw = serialized.get("kwargs", {})
return kw.get("model_name") or kw.get("model") or serialized.get("id", [""])[-1]
def _extract_graph_node(kwargs: dict) -> str:
"""Try to get the current graph node name from LangGraph metadata."""
tags = kwargs.get("tags") or []
for tag in tags:
if isinstance(tag, str) and tag.startswith("graph:node:"):
return tag.split(":", 2)[-1]
metadata = kwargs.get("metadata") or {}
return metadata.get("langgraph_node", "")
# ──────────────────────────────────────────────────────────────────────────────
# Thread-local context for passing RunLogger to vendor/tool layers
# ──────────────────────────────────────────────────────────────────────────────
_current_run_logger: threading.local = threading.local()
def set_run_logger(rl: RunLogger | None) -> None:
"""Set the active RunLogger for the current thread."""
_current_run_logger.instance = rl
def get_run_logger() -> RunLogger | None:
"""Get the active RunLogger (or None if not set)."""
return getattr(_current_run_logger, "instance", None)

View File

@ -193,7 +193,11 @@ def run_ticker_analysis(
try: try:
from tradingagents.graph.trading_graph import TradingAgentsGraph from tradingagents.graph.trading_graph import TradingAgentsGraph
ta = TradingAgentsGraph(debug=False, config=config) from tradingagents.observability import get_run_logger
rl = get_run_logger()
cbs = [rl.callback] if rl else None
ta = TradingAgentsGraph(debug=False, config=config, callbacks=cbs)
final_state, decision = ta.propagate(candidate.ticker, analysis_date) final_state, decision = ta.propagate(candidate.ticker, analysis_date)
result.market_report = final_state.get("market_report", "") result.market_report = final_state.get("market_report", "")

View File

@ -0,0 +1,44 @@
"""Unified report-path helpers.
Every CLI command and internal save routine should use these helpers so that
all generated artifacts land under a single ``reports/`` tree::
reports/
daily/{YYYY-MM-DD}/
market/ # scan results
geopolitical_report.md
...
{TICKER}/ # per-ticker analysis / pipeline
1_analysts/
...
complete_report.md
eval/
full_states_log.json
summary.md # pipeline combined summary
"""
from __future__ import annotations
from pathlib import Path
REPORTS_ROOT = Path("reports")
def get_daily_dir(date: str) -> Path:
"""``reports/daily/{date}/``"""
return REPORTS_ROOT / "daily" / date
def get_market_dir(date: str) -> Path:
"""``reports/daily/{date}/market/``"""
return get_daily_dir(date) / "market"
def get_ticker_dir(date: str, ticker: str) -> Path:
"""``reports/daily/{date}/{TICKER}/``"""
return get_daily_dir(date) / ticker.upper()
def get_eval_dir(date: str, ticker: str) -> Path:
"""``reports/daily/{date}/{TICKER}/eval/``"""
return get_ticker_dir(date, ticker) / "eval"