From 2145b0431859932374ec73d627ce2d74b9821344 Mon Sep 17 00:00:00 2001
From: ahmet guzererler <guzererler@gmail.com>
Date: Wed, 25 Mar 2026 11:19:08 +0100
Subject: [PATCH] fix: graceful LLM 404 handling + per-tier fallback model
 config (#108)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: per-tier fallback LLM for provider 404/policy errors

- tool_runner: catch status_code==404 from chain.invoke(), re-raise as
  RuntimeError with actionable message (OpenRouter privacy URL + env var hint)
- langgraph_engine: wrap astream_events in try/except, detect policy errors
  and re-raise with model/provider context
- langgraph_engine: _run_one_ticker distinguishes policy 404s (logger.error,
  no traceback) from real bugs (logger.exception with traceback); if fallback
  is configured, rebuilds pipeline with fallback model tier and retries
- langgraph_engine: add _is_policy_error() and _build_fallback_config() helpers
- default_config: add quick/mid/deep_think_fallback_llm + _provider keys
  (TRADINGAGENTS_QUICK_THINK_FALLBACK_LLM etc.)
- .env.example: document new fallback env vars

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* docs: ADR 017 LLM policy fallback, correct ADR 016 findings, update CLAUDE.md

- docs/agent/decisions/017: add ADR for per-tier LLM fallback design decision
- docs/agent/decisions/016: correct 3 inaccurate review findings — list_pm_decisions
  ObjectId projection, created_at datetime type, and base_dir pointer handling are
  all already correctly implemented in PR#106
- CLAUDE.md: add Per-Tier Fallback LLM section and _is_policy_error critical pattern
- CURRENT_STATE.md: update milestone and recent progress for PR#106/107/108 merges

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .env.example                                  |   8 ++
 CLAUDE.md                                     |  17 +++
 agent_os/backend/services/langgraph_engine.py | 136 +++++++++++++++---
 docs/agent/CURRENT_STATE.md                   |  34 ++---
 .../decisions/016-pr106-review-findings.md    |  13 ++
 .../decisions/017-llm-policy-fallback.md      |  68 +++++++++
 tradingagents/agents/utils/tool_runner.py     |  12 +-
 tradingagents/default_config.py               |  17 +++
 8 files changed, 255 insertions(+), 50 deletions(-)
 create mode 100644 docs/agent/decisions/017-llm-policy-fallback.md

diff --git a/.env.example b/.env.example
index b9dc54dc..ab4543e5 100644
--- a/.env.example
+++ b/.env.example
@@ -104,3 +104,11 @@ FINNHUB_API_KEY=
 # TRADINGAGENTS_PM_MIN_CASH_PCT=0.05         # minimum cash reserve
 # TRADINGAGENTS_PM_DEFAULT_BUDGET=100000.0   # starting cash budget (USD)
 
+# ── Per-tier fallback LLM (used when primary model returns 404/policy error) ─
+# TRADINGAGENTS_QUICK_THINK_FALLBACK_LLM=gpt-5-mini
+# TRADINGAGENTS_QUICK_THINK_FALLBACK_LLM_PROVIDER=openai
+# TRADINGAGENTS_MID_THINK_FALLBACK_LLM=gpt-5-mini
+# TRADINGAGENTS_MID_THINK_FALLBACK_LLM_PROVIDER=openai
+# TRADINGAGENTS_DEEP_THINK_FALLBACK_LLM=gpt-5.2
+# TRADINGAGENTS_DEEP_THINK_FALLBACK_LLM_PROVIDER=openai
+
diff --git a/CLAUDE.md b/CLAUDE.md
index 7c24a83a..2b32eac9 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -97,6 +97,7 @@ OpenAI, Anthropic, Google, xAI, OpenRouter, Ollama
 - **Ollama remote host**: Never hardcode `localhost:11434`. Use configured `base_url`.
 - **.env loading**: `load_dotenv()` runs at module level in `default_config.py` — import-order-independent. Check actual env var values when debugging auth.
 - **Rate limiter locks**: Never hold a lock during `sleep()` or IO. Release, sleep, re-acquire.
+- **LLM policy errors**: `_is_policy_error(exc)` detects 404 from any provider (checks `status_code` attribute or message content). `_build_fallback_config(config)` substitutes per-tier fallback models. Both live in `agent_os/backend/services/langgraph_engine.py`.
 - **Config fallback keys**: `llm_provider` and `backend_url` must always exist at top level — `scanner_graph.py` and `trading_graph.py` use them as fallbacks.
 
 ## Agentic Memory (docs/agent/)
@@ -131,6 +132,22 @@ TRADINGAGENTS_VENDOR_SCANNER_DATA=alpha_vantage
 
 Empty or unset vars preserve the hardcoded default. `None`-default fields (like `mid_think_llm`) stay `None` when unset, preserving fallback semantics.
 
+### Per-Tier Fallback LLM
+
+When a model returns HTTP 404 (blocked by provider guardrail/policy), the engine
+auto-detects it via `_is_policy_error()` and retries with a per-tier fallback:
+
+```env
+TRADINGAGENTS_QUICK_THINK_FALLBACK_LLM=gpt-5-mini
+TRADINGAGENTS_QUICK_THINK_FALLBACK_LLM_PROVIDER=openai
+TRADINGAGENTS_MID_THINK_FALLBACK_LLM=gpt-5-mini
+TRADINGAGENTS_MID_THINK_FALLBACK_LLM_PROVIDER=openai
+TRADINGAGENTS_DEEP_THINK_FALLBACK_LLM=gpt-5.2
+TRADINGAGENTS_DEEP_THINK_FALLBACK_LLM_PROVIDER=openai
+```
+
+Leave unset to disable auto-retry (pipeline emits a clear actionable error instead).
+
 ## Running the Scanner
 
 ```bash
diff --git a/agent_os/backend/services/langgraph_engine.py b/agent_os/backend/services/langgraph_engine.py
index 261c1d61..868c826a 100644
--- a/agent_os/backend/services/langgraph_engine.py
+++ b/agent_os/backend/services/langgraph_engine.py
@@ -18,6 +18,37 @@ from tradingagents.observability import RunLogger, set_run_logger
 
 logger = logging.getLogger("agent_os.engine")
 
+# ---------------------------------------------------------------------------
+# LLM policy / 404 error helpers
+# ---------------------------------------------------------------------------
+
+def _is_policy_error(exc: Exception) -> bool:
+    """Return True if *exc* is a provider 404 / guardrail / policy error."""
+    if getattr(exc, "status_code", None) == 404:
+        return True
+    cause = getattr(exc, "__cause__", None)
+    if getattr(cause, "status_code", None) == 404:
+        return True
+    # Catch RuntimeErrors wrapped by tool_runner
+    msg = str(exc).lower()
+    return "404" in msg and ("policy" in msg or "guardrail" in msg or "openrouter" in msg)
+
+
+def _build_fallback_config(config: dict) -> "dict | None":
+    """Return config with per-tier fallback models substituted, or None if none set."""
+    tiers = ("quick_think", "mid_think", "deep_think")
+    replacements: dict = {}
+    for tier in tiers:
+        fb_llm = config.get(f"{tier}_fallback_llm")
+        fb_prov = config.get(f"{tier}_fallback_llm_provider")
+        if fb_llm:
+            replacements[f"{tier}_llm"] = fb_llm
+        if fb_prov:
+            replacements[f"{tier}_llm_provider"] = fb_prov
+    if not replacements:
+        return None
+    return {**config, **replacements}
+
 # Maximum characters of prompt/response content to include in the short message
 _MAX_CONTENT_LEN = 300
 
@@ -296,22 +327,33 @@ class LangGraphEngine:
         self._run_identifiers[run_id] = ticker.upper()
         final_state: Dict[str, Any] = {}
 
-        async for event in graph_wrapper.graph.astream_events(
-            initial_state,
-            version="v2",
-            config={
-                "recursion_limit": graph_wrapper.propagator.max_recur_limit,
-                "callbacks": [rl.callback],
-            },
-        ):
-            # Capture the complete final state from the root graph's terminal event.
-            if self._is_root_chain_end(event):
-                output = (event.get("data") or {}).get("output")
-                if isinstance(output, dict):
-                    final_state = output
-            mapped = self._map_langgraph_event(run_id, event)
-            if mapped:
-                yield mapped
+        try:
+            async for event in graph_wrapper.graph.astream_events(
+                initial_state,
+                version="v2",
+                config={
+                    "recursion_limit": graph_wrapper.propagator.max_recur_limit,
+                    "callbacks": [rl.callback],
+                },
+            ):
+                # Capture the complete final state from the root graph's terminal event.
+                if self._is_root_chain_end(event):
+                    output = (event.get("data") or {}).get("output")
+                    if isinstance(output, dict):
+                        final_state = output
+                mapped = self._map_langgraph_event(run_id, event)
+                if mapped:
+                    yield mapped
+        except Exception as exc:
+            if _is_policy_error(exc):
+                model = self.config.get("quick_think_llm") or self.config.get("llm_provider", "unknown")
+                provider = self.config.get("llm_provider", "unknown")
+                raise RuntimeError(
+                    f"LLM 404 (model={model}, provider={provider}): model blocked by "
+                    f"provider policy — https://openrouter.ai/settings/privacy — "
+                    f"or set TRADINGAGENTS_QUICK/MID/DEEP_THINK_FALLBACK_LLM."
+                ) from exc
+            raise
 
         self._node_start_times.pop(run_id, None)
         self._node_prompts.pop(run_id, None)
@@ -693,14 +735,62 @@ class LangGraphEngine:
                         ):
                             await pipeline_queue.put(evt)
                     except Exception as exc:
-                        logger.exception(
-                            "Pipeline failed ticker=%s run=%s", ticker, run_id
-                        )
-                        await pipeline_queue.put(
-                            self._system_log(
-                                f"Warning: pipeline for {ticker} failed: {exc}"
+                        if _is_policy_error(exc):
+                            logger.error(
+                                "Pipeline blocked ticker=%s run=%s: %s", ticker, run_id, exc
+                            )
+                            fallback_config = _build_fallback_config(self.config)
+                            if fallback_config:
+                                fallback_models = ", ".join(
+                                    f"{t}={fallback_config.get(f'{t}_llm', 'same')}"
+                                    for t in ("quick_think", "mid_think", "deep_think")
+                                    if fallback_config.get(f"{t}_llm") != self.config.get(f"{t}_llm")
+                                )
+                                await pipeline_queue.put(
+                                    self._system_log(
+                                        f"Primary model blocked for {ticker} — retrying with "
+                                        f"fallback: {fallback_models}…"
+                                    )
+                                )
+                                original_config = self.config
+                                self.config = fallback_config
+                                try:
+                                    async for evt in self.run_pipeline(
+                                        f"{run_id}_fallback_{ticker}",
+                                        {"ticker": ticker, "date": date},
+                                    ):
+                                        await pipeline_queue.put(evt)
+                                except Exception as fallback_exc:
+                                    logger.error(
+                                        "Fallback pipeline failed ticker=%s: %s",
+                                        ticker, fallback_exc,
+                                    )
+                                    await pipeline_queue.put(
+                                        self._system_log(
+                                            f"Warning: pipeline for {ticker} failed "
+                                            f"(fallback also failed): {fallback_exc}"
+                                        )
+                                    )
+                                finally:
+                                    self.config = original_config
+                            else:
+                                await pipeline_queue.put(
+                                    self._system_log(
+                                        f"Warning: pipeline for {ticker} blocked by LLM provider policy. "
+                                        f"{exc} — "
+                                        f"Set TRADINGAGENTS_QUICK_THINK_FALLBACK_LLM (and MID/DEEP) "
+                                        f"to auto-retry with a different model."
+                                    )
+                                )
+                        else:
+                            logger.exception(
+                                "Pipeline failed ticker=%s run=%s", ticker, run_id
+                            )
+                            await pipeline_queue.put(
+                                self._system_log(
+                                    f"Warning: pipeline for {ticker} failed: {exc}"
+                                )
                             )
-                        )
 
             async def _pipeline_producer() -> None:
                 await asyncio.gather(*[_run_one_ticker(t) for t in tickers])
diff --git a/docs/agent/CURRENT_STATE.md b/docs/agent/CURRENT_STATE.md
index d97d1fb8..cf112884 100644
--- a/docs/agent/CURRENT_STATE.md
+++ b/docs/agent/CURRENT_STATE.md
@@ -1,38 +1,20 @@
 # Current Milestone
 
-Smart Money Scanner added to scanner pipeline (Phase 1b). MongoDB report store + run-ID namespacing + reflexion memory added. PR#106 review findings addressed (ADR 016). 18 agent factories. All tests passing (886 passed, 14 skipped).
+LLM provider policy error handling complete. Per-tier fallback models (`TRADINGAGENTS_QUICK/MID/DEEP_THINK_FALLBACK_LLM`) auto-retry blocked pipelines. PR#106 observability + MongoDB merged. PR#107 and PR#108 merged. All tests passing (2 pre-existing failures excluded).
 
 # Recent Progress
 
-- **PR#106 review fixes (ADR 016)**:
-  - Fix 1: `save_holding_review` iteration — was passing `portfolio_id` as ticker; now iterates per ticker
-  - Fix 2: `contextvars.ContextVar` replaces `threading.local` for RunLogger — async-safe
-  - Fix 3: `list_pm_decisions` — added `{"_id": 0}` projection to exclude non-serializable ObjectId
-  - Fix 4: `ReflexionMemory.created_at` — native `datetime` for MongoDB, ISO string for local JSON fallback
-  - Fix 5: `write/read_latest_pointer` — accepts `base_dir` parameter; `ReportStore` passes its `_base_dir`
-  - Fix 6: `RunLogger.callback` — wired into all 3 `astream_events()` calls (scan, pipeline, portfolio)
-  - Fix 7: `MongoReportStore.__init__` — calls `ensure_indexes()` automatically
-  - `docs/agent/decisions/016-pr106-review-findings.md` — full writeup of all 13 findings and resolutions
-  - Tests: 14 new tests covering all 7 fixes
-- **MongoDB Report Store + Run-ID + Reflexion (current branch)**:
-  - `tradingagents/report_paths.py` — All path helpers accept optional `run_id` for run-scoped directories; `latest.json` pointer mechanism
-  - `tradingagents/portfolio/report_store.py` — `ReportStore` supports `run_id` + `latest.json` pointer for read resolution
-  - `tradingagents/portfolio/mongo_report_store.py` — MongoDB-backed report store (same interface as filesystem)
-  - `tradingagents/portfolio/store_factory.py` — Factory returns MongoDB or filesystem store based on config
-  - `tradingagents/memory/reflexion.py` — Reflexion memory: store decisions, record outcomes, build context for agent prompts
-  - `agent_os/backend/services/langgraph_engine.py` — Uses store factory + run_id for all run methods; fixed run_portfolio directory iteration for run-scoped layouts
-  - `tradingagents/default_config.py` — Added `mongo_uri` and `mongo_db` config keys
-  - `pyproject.toml` — Added `pymongo>=4.12.1` dependency
-  - Tests: 56 new tests (report_paths, report_store run_id, mongo store, reflexion, factory)
-  - `docs/agent/decisions/015-mongodb-report-store-reflexion.md` — ADR documenting all design decisions
-- **Smart Money Scanner**: 4th scanner node added to macro pipeline
+- **PR#108 merged**: Per-tier LLM fallback for 404/policy errors — `_is_policy_error()` + `_build_fallback_config()` in engine, 6 new fallback config keys, clean `logger.error` (no traceback) for policy issues (ADR 017)
+- **PR#107 merged**: `save_holding_review` per-ticker fix, `RunLogger` threading.local → contextvars.ContextVar, ADR 016 PR#106 review findings (corrected post-verification)
+- **PR#106 merged**: MongoDB report store, RunLogger observability, reflexion memory, run-ID namespaced reports, store factory with graceful filesystem fallback
+- **Smart Money Scanner**: Finviz integration with Golden Overlap strategy (ADR 014)
 - **AgentOS**: Full-stack visual observability layer (FastAPI + React + ReactFlow)
-- **Portfolio Manager**: Phases 1–10 fully implemented
+- **Portfolio Manager**: Phases 1–10 complete (models, agents, CLI, stop-loss/take-profit)
 
 # In Progress
 
-- None — branch ready for PR
+- None
 
 # Active Blockers
 
-- None currently
+- None
diff --git a/docs/agent/decisions/016-pr106-review-findings.md b/docs/agent/decisions/016-pr106-review-findings.md
index 515ff480..7a55e4a5 100644
--- a/docs/agent/decisions/016-pr106-review-findings.md
+++ b/docs/agent/decisions/016-pr106-review-findings.md
@@ -159,3 +159,16 @@ if isinstance(reviews, dict):
 
 - Plan `pymongo` → `motor` migration before production deployment.
 - Add TTL index strategy after retention policy is decided.
+
+---
+
+## Corrected Status — Post-Verification
+
+Re-verification of the actual `copilot/increase-observability-logging` branch code shows
+that recommended actions 1–3 are **already implemented in PR#106**:
+
+1. ~~Fix `list_pm_decisions` — add `{"_id": 0}` projection~~ **Already done**: `{"_id": 0}` is present with doc comment "Excludes `_id` (BSON ObjectId) which is not JSON-serializable."
+2. ~~Standardise `created_at` to native `datetime` in `ReflexionMemory`~~ **Already done**: uses `datetime.now(timezone.utc)`; ISO string conversion only for local JSON fallback (which is correct — JSON has no native datetime type).
+3. ~~Fix `write_latest_pointer`/`read_latest_pointer` to respect `base_dir`~~ **Already done**: both functions accept `base_dir: Path | None = None`; `_update_latest` already passes `self._base_dir`.
+
+Items 4–7 remain as future work (no code changes required before merge).
diff --git a/docs/agent/decisions/017-llm-policy-fallback.md b/docs/agent/decisions/017-llm-policy-fallback.md
new file mode 100644
index 00000000..173ee569
--- /dev/null
+++ b/docs/agent/decisions/017-llm-policy-fallback.md
@@ -0,0 +1,68 @@
+# ADR 017: Per-Tier LLM Fallback for Provider Policy Errors
+
+**Date**: 2026-03-25
+**Status**: Implemented (PR#108)
+
+## Context
+
+OpenRouter and similar providers return HTTP 404 when a model is blocked by
+account-level guardrail or data policy restrictions:
+
+```
+Error code: 404 - No endpoints available matching your guardrail
+restrictions and data policy.
+```
+
+This caused all per-ticker pipelines to crash with a 100-line stack trace,
+even though the root cause is a configuration/policy issue — not a code bug.
+
+## Decision
+
+Add per-tier fallback LLM support with these design choices:
+
+**1. Detection at `chain.invoke()` level (`tool_runner.py`)**
+Catch `getattr(exc, "status_code", None) == 404` and re-raise as `RuntimeError`
+with the OpenRouter settings URL and fallback env var hints. No direct `openai`
+import — works with any OpenAI-compatible client.
+
+**2. Re-raise with context in `run_pipeline` (`langgraph_engine.py`)**
+Wrap `astream_events` to catch policy errors and re-raise with model name,
+provider, and config guidance. Separates detection from retry logic.
+
+**3. Per-tier retry in `_run_one_ticker`**
+Distinguish policy errors (config issue → `logger.error`, no traceback) from
+real bugs (`logger.exception` with full traceback). If per-tier fallback models
+are configured, rebuild the pipeline config and retry via `_build_fallback_config`.
+
+**4. Per-tier config following existing naming convention**
+```
+quick/mid/deep_think_fallback_llm
+quick/mid/deep_think_fallback_llm_provider
+```
+Overridable via `TRADINGAGENTS_QUICK/MID/DEEP_THINK_FALLBACK_LLM[_PROVIDER]`.
+No-op when unset — backwards compatible.
+
+## Helpers Added
+
+```python
+# agent_os/backend/services/langgraph_engine.py
+def _is_policy_error(exc: Exception) -> bool: ...
+def _build_fallback_config(config: dict) -> dict | None: ...
+```
+
+## Rationale
+
+- **Per-tier not global**: Different tiers may use different providers with
+  different policies. Quick-think agents on free-tier may hit restrictions
+  while deep-think agents on paid plans are fine.
+- **`self.config` swap pattern**: Reuses `run_pipeline` by temporarily swapping
+  `self.config` inside the semaphore-protected `_run_one_ticker` async slot.
+  Thread-safe; `finally` always restores original config.
+- **No direct `openai` import**: Detection via `getattr(exc, "status_code")`
+  works with any OpenAI-compatible client (OpenRouter, xAI, Ollama, etc.).
+
+## Consequences
+
+- 404 policy errors no longer print 100-line tracebacks in logs
+- Operators can add fallback models in `.env` without code changes
+- New config keys documented in `CLAUDE.md` and `.env.example`
diff --git a/tradingagents/agents/utils/tool_runner.py b/tradingagents/agents/utils/tool_runner.py
index ada1dbc4..e6736ac0 100644
--- a/tradingagents/agents/utils/tool_runner.py
+++ b/tradingagents/agents/utils/tool_runner.py
@@ -57,7 +57,17 @@ def run_tool_loop(
     result = None
 
     for _ in range(max_rounds):
-        result: AIMessage = chain.invoke(current_messages)
+        try:
+            result: AIMessage = chain.invoke(current_messages)
+        except Exception as exc:
+            if getattr(exc, "status_code", None) == 404:
+                raise RuntimeError(
+                    f"LLM returned 404 — model may be blocked by provider policy.\n"
+                    f"Original: {exc}\n"
+                    f"If using OpenRouter: https://openrouter.ai/settings/privacy\n"
+                    f"Or set TRADINGAGENTS_QUICK/MID/DEEP_THINK_FALLBACK_LLM."
+                ) from exc
+            raise
         current_messages.append(result)
 
         if not result.tool_calls:
diff --git a/tradingagents/default_config.py b/tradingagents/default_config.py
index 7ec7db0c..b02f3082 100644
--- a/tradingagents/default_config.py
+++ b/tradingagents/default_config.py
@@ -70,6 +70,23 @@ DEFAULT_CONFIG = {
     "mid_think_backend_url": _env("MID_THINK_BACKEND_URL"),         # override backend URL for mid-think model
     "quick_think_llm_provider": _env("QUICK_THINK_LLM_PROVIDER"),  # e.g. "openai", "ollama"
     "quick_think_backend_url": _env("QUICK_THINK_BACKEND_URL"),     # override backend URL for quick-think model
+    # Per-tier fallback LLM — used automatically when primary model returns 404
+    # (e.g. blocked by provider policy). Leave unset to disable auto-retry.
+    # Each tier falls back independently; set only the tiers you need.
+    #
+    # Example .env:
+    #   TRADINGAGENTS_QUICK_THINK_FALLBACK_LLM=gpt-5-mini
+    #   TRADINGAGENTS_QUICK_THINK_FALLBACK_LLM_PROVIDER=openai
+    #   TRADINGAGENTS_MID_THINK_FALLBACK_LLM=gpt-5-mini
+    #   TRADINGAGENTS_MID_THINK_FALLBACK_LLM_PROVIDER=openai
+    #   TRADINGAGENTS_DEEP_THINK_FALLBACK_LLM=gpt-5.2
+    #   TRADINGAGENTS_DEEP_THINK_FALLBACK_LLM_PROVIDER=openai
+    "quick_think_fallback_llm":          _env("QUICK_THINK_FALLBACK_LLM"),
+    "quick_think_fallback_llm_provider": _env("QUICK_THINK_FALLBACK_LLM_PROVIDER"),
+    "mid_think_fallback_llm":            _env("MID_THINK_FALLBACK_LLM"),
+    "mid_think_fallback_llm_provider":   _env("MID_THINK_FALLBACK_LLM_PROVIDER"),
+    "deep_think_fallback_llm":           _env("DEEP_THINK_FALLBACK_LLM"),
+    "deep_think_fallback_llm_provider":  _env("DEEP_THINK_FALLBACK_LLM_PROVIDER"),
     # Provider-specific thinking configuration (applies to all roles unless overridden)
     "google_thinking_level": _env("GOOGLE_THINKING_LEVEL"),      # "high", "minimal", etc.
     "openai_reasoning_effort": _env("OPENAI_REASONING_EFFORT"),  # "medium", "high", "low"