"""Filesystem document store for Portfolio Manager reports. Saves and loads all non-transactional portfolio artifacts (scans, per-ticker analysis, holding reviews, risk metrics, PM decisions) using the existing ``tradingagents/report_paths.py`` path convention. When a ``run_id`` is set on the store, all artifacts are written under a run-specific subdirectory so that same-day re-runs never overwrite earlier results:: reports/daily/{date}/runs/{run_id}/ ├── market/ │ └── macro_scan_summary.json ├── {TICKER}/ │ └── complete_report.json └── portfolio/ ├── {TICKER}_holding_review.json ├── {portfolio_id}_risk_metrics.json ├── {portfolio_id}_pm_decision.json └── {portfolio_id}_pm_decision.md A ``latest.json`` pointer at the date level is updated on every write so that load methods (when called *without* a ``run_id``) transparently resolve to the most recent run. Usage:: from tradingagents.portfolio.report_store import ReportStore store = ReportStore(run_id="a1b2c3d4") store.save_scan("2026-03-20", {"watchlist": [...]}) data = store.load_scan("2026-03-20") # reads from latest run """ from __future__ import annotations import json from pathlib import Path from typing import Any from tradingagents.portfolio.exceptions import ReportStoreError from tradingagents.report_paths import read_latest_pointer, write_latest_pointer class ReportStore: """Filesystem document store for all portfolio-related reports. Directories are created automatically on first write. All load methods return ``None`` when the file does not exist. When ``run_id`` is provided, write paths are scoped under ``{base_dir}/daily/{date}/runs/{run_id}/…`` and a ``latest.json`` pointer is updated automatically. Load methods resolve through the pointer when no ``run_id`` is set. """ def __init__( self, base_dir: str | Path = "reports", run_id: str | None = None, ) -> None: """Initialise the store with a base reports directory. Args: base_dir: Root directory for all reports. Defaults to ``"reports"`` (relative to CWD), matching ``report_paths.REPORTS_ROOT``. Override via the ``PORTFOLIO_DATA_DIR`` env var or ``get_portfolio_config()["data_dir"]``. run_id: Optional short identifier for the current run. When set, all writes are scoped under a ``runs/{run_id}/`` subdirectory so that same-day re-runs are preserved. """ self._base_dir = Path(base_dir) self._run_id = run_id @property def run_id(self) -> str | None: """The run identifier set on this store, if any.""" return self._run_id # ------------------------------------------------------------------ # Internal helpers # ------------------------------------------------------------------ def _date_root(self, date: str, *, for_write: bool = False) -> Path: """Return the base directory for a given date, scoped by run_id. When ``for_write=True``, the run_id *must* be used (if present) so that writes land in the run-specific directory. When ``for_write=False`` (reads), the method first tries the run_id directory, then falls back to latest.json pointer, and finally falls back to the legacy flat layout. """ daily = self._base_dir / "daily" / date if for_write and self._run_id: return daily / "runs" / self._run_id if self._run_id: return daily / "runs" / self._run_id # Read path: check latest.json pointer (using our base_dir) latest_id = read_latest_pointer(date, base_dir=self._base_dir) if latest_id: candidate = daily / "runs" / latest_id if candidate.exists(): return candidate # Fallback to legacy flat layout return daily def _update_latest(self, date: str) -> None: """Update the latest.json pointer if run_id is set.""" if self._run_id: write_latest_pointer(date, self._run_id, base_dir=self._base_dir) def _portfolio_dir(self, date: str, *, for_write: bool = False) -> Path: """Return the portfolio subdirectory for a given date. Path: ``{base}/daily/{date}[/runs/{run_id}]/portfolio/`` """ return self._date_root(date, for_write=for_write) / "portfolio" @staticmethod def _sanitize(obj: Any) -> Any: """Recursively convert non-JSON-serializable objects to safe types. Handles LangChain message objects (``HumanMessage``, ``AIMessage``, etc.) that appear in LangGraph state dicts, as well as any other arbitrary objects that are not natively JSON-serializable. """ if obj is None or isinstance(obj, (bool, int, float, str)): return obj if isinstance(obj, dict): return {k: ReportStore._sanitize(v) for k, v in obj.items()} if isinstance(obj, (list, tuple)): return [ReportStore._sanitize(item) for item in obj] # LangChain BaseMessage objects expose .type and .content if hasattr(obj, "type") and hasattr(obj, "content"): try: if hasattr(obj, "dict") and callable(obj.dict): return ReportStore._sanitize(obj.dict()) except Exception: pass return {"type": str(obj.type), "content": str(obj.content)} # Generic fallback: try a serialization probe first try: json.dumps(obj) return obj except (TypeError, ValueError): return str(obj) def _write_json(self, path: Path, data: dict[str, Any]) -> Path: """Write a dict to a JSON file, creating parent directories as needed. Args: path: Target file path. data: Data to serialise. Returns: The path written. Raises: ReportStoreError: On filesystem write failure. """ try: path.parent.mkdir(parents=True, exist_ok=True) sanitized = self._sanitize(data) path.write_text(json.dumps(sanitized, indent=2), encoding="utf-8") return path except OSError as exc: raise ReportStoreError(f"Failed to write {path}: {exc}") from exc def _read_json(self, path: Path) -> dict[str, Any] | None: """Read a JSON file, returning None if the file does not exist. Raises: ReportStoreError: On JSON parse error (file exists but is corrupt). """ if not path.exists(): return None try: return json.loads(path.read_text(encoding="utf-8")) except json.JSONDecodeError as exc: raise ReportStoreError(f"Corrupt JSON at {path}: {exc}") from exc # ------------------------------------------------------------------ # Macro Scan # ------------------------------------------------------------------ def save_scan(self, date: str, data: dict[str, Any]) -> Path: """Save macro scan summary JSON. Path: ``{base}/daily/{date}[/runs/{run_id}]/market/macro_scan_summary.json`` Args: date: ISO date string, e.g. ``"2026-03-20"``. data: Scan output dict (typically the macro_scan_summary). Returns: Path of the written file. """ root = self._date_root(date, for_write=True) path = root / "market" / "macro_scan_summary.json" result = self._write_json(path, data) self._update_latest(date) return result def load_scan(self, date: str) -> dict[str, Any] | None: """Load macro scan summary. Returns None if the file does not exist.""" root = self._date_root(date) path = root / "market" / "macro_scan_summary.json" return self._read_json(path) # ------------------------------------------------------------------ # Per-Ticker Analysis # ------------------------------------------------------------------ def save_analysis(self, date: str, ticker: str, data: dict[str, Any]) -> Path: """Save per-ticker analysis report as JSON. Path: ``{base}/daily/{date}[/runs/{run_id}]/{TICKER}/complete_report.json`` Args: date: ISO date string. ticker: Ticker symbol (stored as uppercase). data: Analysis output dict. """ root = self._date_root(date, for_write=True) path = root / ticker.upper() / "complete_report.json" result = self._write_json(path, data) self._update_latest(date) return result def load_analysis(self, date: str, ticker: str) -> dict[str, Any] | None: """Load per-ticker analysis JSON. Returns None if the file does not exist.""" root = self._date_root(date) path = root / ticker.upper() / "complete_report.json" return self._read_json(path) # ------------------------------------------------------------------ # Holding Reviews # ------------------------------------------------------------------ def save_holding_review( self, date: str, ticker: str, data: dict[str, Any], ) -> Path: """Save holding reviewer output for one ticker. Path: ``{base}/daily/{date}[/runs/{run_id}]/portfolio/{TICKER}_holding_review.json`` Args: date: ISO date string. ticker: Ticker symbol (stored as uppercase). data: HoldingReviewerAgent output dict. """ path = self._portfolio_dir(date, for_write=True) / f"{ticker.upper()}_holding_review.json" result = self._write_json(path, data) self._update_latest(date) return result def load_holding_review(self, date: str, ticker: str) -> dict[str, Any] | None: """Load holding review output. Returns None if the file does not exist.""" path = self._portfolio_dir(date) / f"{ticker.upper()}_holding_review.json" return self._read_json(path) # ------------------------------------------------------------------ # Risk Metrics # ------------------------------------------------------------------ def save_risk_metrics( self, date: str, portfolio_id: str, data: dict[str, Any], ) -> Path: """Save risk computation results. Path: ``{base}/daily/{date}[/runs/{run_id}]/portfolio/{portfolio_id}_risk_metrics.json`` Args: date: ISO date string. portfolio_id: UUID of the target portfolio. data: Risk metrics dict (Sharpe, Sortino, VaR, etc.). """ path = self._portfolio_dir(date, for_write=True) / f"{portfolio_id}_risk_metrics.json" result = self._write_json(path, data) self._update_latest(date) return result def load_risk_metrics( self, date: str, portfolio_id: str, ) -> dict[str, Any] | None: """Load risk metrics. Returns None if the file does not exist.""" path = self._portfolio_dir(date) / f"{portfolio_id}_risk_metrics.json" return self._read_json(path) # ------------------------------------------------------------------ # PM Decisions # ------------------------------------------------------------------ def save_pm_decision( self, date: str, portfolio_id: str, data: dict[str, Any], markdown: str | None = None, ) -> Path: """Save PM agent decision. JSON path: ``{base}/daily/{date}[/runs/{run_id}]/portfolio/{portfolio_id}_pm_decision.json`` MD path: ``…/{portfolio_id}_pm_decision.md`` (written only when ``markdown`` is not None) Args: date: ISO date string. portfolio_id: UUID of the target portfolio. data: PM decision dict (sells, buys, holds, rationale, …). markdown: Optional human-readable version; written when provided. Returns: Path of the written JSON file. """ pdir = self._portfolio_dir(date, for_write=True) json_path = pdir / f"{portfolio_id}_pm_decision.json" self._write_json(json_path, data) if markdown is not None: md_path = pdir / f"{portfolio_id}_pm_decision.md" try: md_path.write_text(markdown, encoding="utf-8") except OSError as exc: raise ReportStoreError(f"Failed to write {md_path}: {exc}") from exc self._update_latest(date) return json_path def load_pm_decision( self, date: str, portfolio_id: str, ) -> dict[str, Any] | None: """Load PM decision JSON. Returns None if the file does not exist.""" path = self._portfolio_dir(date) / f"{portfolio_id}_pm_decision.json" return self._read_json(path) def save_execution_result( self, date: str, portfolio_id: str, data: dict[str, Any], ) -> Path: """Save trade execution results. Path: ``{base}/daily/{date}[/runs/{run_id}]/portfolio/{portfolio_id}_execution_result.json`` Args: date: ISO date string. portfolio_id: UUID of the target portfolio. data: TradeExecutor output dict. """ path = self._portfolio_dir(date, for_write=True) / f"{portfolio_id}_execution_result.json" result = self._write_json(path, data) self._update_latest(date) return result def load_execution_result( self, date: str, portfolio_id: str, ) -> dict[str, Any] | None: """Load execution result. Returns None if the file does not exist.""" path = self._portfolio_dir(date) / f"{portfolio_id}_execution_result.json" return self._read_json(path) def clear_portfolio_stage(self, date: str, portfolio_id: str) -> list[str]: """Delete PM decision and execution result files for a given date/portfolio. Returns a list of deleted file names so the caller can log what was removed. """ pdir = self._portfolio_dir(date, for_write=True) targets = [ pdir / f"{portfolio_id}_pm_decision.json", pdir / f"{portfolio_id}_pm_decision.md", pdir / f"{portfolio_id}_execution_result.json", ] deleted = [] for path in targets: if path.exists(): path.unlink() deleted.append(path.name) return deleted # ------------------------------------------------------------------ # Run Meta / Events persistence # ------------------------------------------------------------------ def save_run_meta(self, date: str, data: dict[str, Any]) -> Path: """Save run metadata JSON. Path: ``{base}/daily/{date}[/runs/{run_id}]/run_meta.json`` """ root = self._date_root(date, for_write=True) path = root / "run_meta.json" result = self._write_json(path, data) self._update_latest(date) return result def load_run_meta(self, date: str) -> dict[str, Any] | None: """Load run metadata. Returns None if the file does not exist.""" root = self._date_root(date) path = root / "run_meta.json" return self._read_json(path) def save_run_events(self, date: str, events: list[dict[str, Any]]) -> Path: """Save run events as JSONL (one JSON object per line). Path: ``{base}/daily/{date}[/runs/{run_id}]/run_events.jsonl`` """ root = self._date_root(date, for_write=True) path = root / "run_events.jsonl" try: path.parent.mkdir(parents=True, exist_ok=True) lines = [] for evt in events: sanitized = self._sanitize(evt) lines.append(json.dumps(sanitized, separators=(",", ":"))) path.write_text("\n".join(lines) + "\n" if lines else "", encoding="utf-8") return path except OSError as exc: raise ReportStoreError(f"Failed to write {path}: {exc}") from exc def load_run_events(self, date: str) -> list[dict[str, Any]]: """Load run events from JSONL file. Returns empty list if file does not exist.""" root = self._date_root(date) path = root / "run_events.jsonl" if not path.exists(): return [] events: list[dict[str, Any]] = [] try: for line in path.read_text(encoding="utf-8").splitlines(): line = line.strip() if line: events.append(json.loads(line)) except json.JSONDecodeError as exc: raise ReportStoreError(f"Corrupt JSONL at {path}: {exc}") from exc return events @classmethod def list_run_metas(cls, base_dir: str | Path = "reports") -> list[dict[str, Any]]: """Scan for all run_meta.json files and return metadata dicts, newest first. Args: base_dir: Root reports directory. Returns: List of run_meta dicts sorted by ``created_at`` descending. """ base = Path(base_dir) pattern = "daily/*/runs/*/run_meta.json" metas: list[dict[str, Any]] = [] for path in base.glob(pattern): try: data = json.loads(path.read_text(encoding="utf-8")) metas.append(data) except (json.JSONDecodeError, OSError): continue metas.sort(key=lambda m: m.get("created_at", 0), reverse=True) return metas # ------------------------------------------------------------------ # Analyst / Trader Checkpoints # ------------------------------------------------------------------ def save_analysts_checkpoint( self, date: str, ticker: str, data: dict[str, Any] ) -> Path: """Save analysts checkpoint for a ticker. Path: ``{base}/daily/{date}[/runs/{run_id}]/{TICKER}/analysts_checkpoint.json`` """ root = self._date_root(date, for_write=True) path = root / ticker.upper() / "analysts_checkpoint.json" result = self._write_json(path, data) self._update_latest(date) return result def load_analysts_checkpoint( self, date: str, ticker: str ) -> dict[str, Any] | None: """Load analysts checkpoint. Returns None if file does not exist.""" root = self._date_root(date) path = root / ticker.upper() / "analysts_checkpoint.json" return self._read_json(path) def save_trader_checkpoint( self, date: str, ticker: str, data: dict[str, Any] ) -> Path: """Save trader checkpoint for a ticker. Path: ``{base}/daily/{date}[/runs/{run_id}]/{TICKER}/trader_checkpoint.json`` """ root = self._date_root(date, for_write=True) path = root / ticker.upper() / "trader_checkpoint.json" result = self._write_json(path, data) self._update_latest(date) return result def load_trader_checkpoint( self, date: str, ticker: str ) -> dict[str, Any] | None: """Load trader checkpoint. Returns None if file does not exist.""" root = self._date_root(date) path = root / ticker.upper() / "trader_checkpoint.json" return self._read_json(path) # ------------------------------------------------------------------ # PM Decisions # ------------------------------------------------------------------ def list_pm_decisions(self, portfolio_id: str) -> list[Path]: """Return all saved PM decision JSON paths for portfolio_id, newest first. Searches both run-scoped and legacy flat layouts. Args: portfolio_id: UUID of the target portfolio. Returns: Sorted list of Path objects, newest date first. """ # Run-scoped layout: daily/*/runs/*/portfolio/{pid}_pm_decision.json run_pattern = f"daily/*/runs/*/portfolio/{portfolio_id}_pm_decision.json" # Legacy flat layout: daily/*/portfolio/{pid}_pm_decision.json flat_pattern = f"daily/*/portfolio/{portfolio_id}_pm_decision.json" paths = set(self._base_dir.glob(run_pattern)) | set(self._base_dir.glob(flat_pattern)) return sorted(paths, reverse=True)