From f862e9187071140f2e4ff4c861a85a0ccf0660cc Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Tue, 14 Apr 2026 07:25:30 +0000
Subject: [PATCH] =?UTF-8?q?learn(iterate):=202026-04-14=20=E2=80=94=20auto?=
 =?UTF-8?q?mated=20iteration=20run?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/iterations/LEARNINGS.md                  | 14 ++---
 docs/iterations/pipeline/scoring.md           |  8 +++
 docs/iterations/scanners/insider_buying.md    | 13 ++++-
 docs/iterations/scanners/short_squeeze.md     |  8 +++
 docs/iterations/scanners/social_dd.md         | 10 +++-
 tradingagents/dataflows/discovery/ranker.py   |  2 +-
 .../discovery/scanners/insider_buying.py      | 52 ++++++++++++++++++-
 7 files changed, 95 insertions(+), 12 deletions(-)

diff --git a/docs/iterations/LEARNINGS.md b/docs/iterations/LEARNINGS.md
index e2c0c76a..c7dc33ed 100644
--- a/docs/iterations/LEARNINGS.md
+++ b/docs/iterations/LEARNINGS.md
@@ -1,19 +1,19 @@
 # Learnings Index
 
-**Last analyzed run:** 2026-04-13
+**Last analyzed run:** 2026-04-14
 
 | Domain | File | Last Updated | One-line Summary |
 |--------|------|--------------|-----------------|
-| options_flow | scanners/options_flow.md | 2026-04-12 | Premium filter confirmed applied; CSCO cross-scanner confluence detected; 45.6% 7d win rate |
-| insider_buying | scanners/insider_buying.md | 2026-04-12 | Staleness pattern (HMH 4 consecutive days); 38.1% 1d, 46.4% 7d win rates — worst volume-to-quality ratio |
+| options_flow | scanners/options_flow.md | 2026-04-12 | Premium filter confirmed applied; CSCO cross-scanner confluence detected; 45.1% 7d win rate (94 recs) |
+| insider_buying | scanners/insider_buying.md | 2026-04-14 | Staleness suppression filter added (PAGS/ZBIO/HMH 3-4 day repeats confirmed); 45.9% 7d, negative avg returns |
 | minervini | scanners/minervini.md | 2026-04-12 | Best performer: 100% 1d win rate (n=3), +3.68% avg; 7 candidates in Apr 6-12 week |
-| analyst_upgrades | scanners/analyst_upgrades.md | 2026-04-12 | 50% 7d win rate (breakeven); cross-scanner confluence with options_flow is positive signal |
+| analyst_upgrades | scanners/analyst_upgrades.md | 2026-04-12 | 51.6% 7d win rate (marginal positive); cross-scanner confluence with options_flow is positive signal |
 | earnings_calendar | scanners/earnings_calendar.md | 2026-04-12 | Appears as earnings_play; 38.1% 1d, 37.7% 7d — poor; best setups require high short interest |
-| pipeline/scoring | pipeline/scoring.md | 2026-04-12 | stats summary now surfaces worst performers; news_catalyst 0% 7d, social_hype 14.3% 7d — worst strategies |
+| pipeline/scoring | pipeline/scoring.md | 2026-04-14 | news_catalyst 0% 7d now explicit in ranker criteria; insider staleness filter implemented; 41.9% overall 7d win rate |
 | early_accumulation | scanners/early_accumulation.md | 2026-04-12 | Sub-threshold (score=60); no catalyst → structurally score-capped by ranker |
-| social_dd | scanners/social_dd.md | 2026-04-12 | Sub-threshold (score=56); BUT 55% 30d win rate — diverges from social_hype; ranker may be suppressing it incorrectly |
+| social_dd | scanners/social_dd.md | 2026-04-14 | 57.1% 30d win rate (+1.41% avg 30d, n=26) — only scanner positive at 30d; eval horizon mismatch persists |
 | volume_accumulation | scanners/volume_accumulation.md | — | No data yet |
-| short_squeeze | scanners/short_squeeze.md | 2026-04-13 | First real data: 60% 7d win rate, +2.15% avg 7d (n=10) — best 7d performer; DTC now surfaced in context |
+| short_squeeze | scanners/short_squeeze.md | 2026-04-14 | 60% 7d win rate (n=11), best 7d performer; BUT 30% 30d — short-term signal only, degrades at 30d |
 
 ## Research
 
diff --git a/docs/iterations/pipeline/scoring.md b/docs/iterations/pipeline/scoring.md
index 2a1b66ff..fa15e748 100644
--- a/docs/iterations/pipeline/scoring.md
+++ b/docs/iterations/pipeline/scoring.md
@@ -26,3 +26,11 @@ No evidence yet on whether confidence or score is a better predictor of outcomes
 - minervini: best short-term signal but small sample (n=3 for 1d tracking).
 - **Critical gap confirmed**: `format_stats_summary()` shows only top 3 best strategies. LLM never sees news_catalyst (0% 7d) or social_hype (14.3% 7d) as poor performers.
 - Confidence: high
+
+### 2026-04-14 — P&L update (mature recs, 3rd iteration: Apr 3-9)
+- news_catalyst: still 0% 7d win rate, -8.37% avg 7d (8 samples, +1). WTI appeared Apr 3 (score=72) and Apr 6 (score=78) despite 0% track record. Ranker prompt updated: news_catalyst now explicitly flagged as "AVOID by default" with 0% win rate stated in criteria section.
+- social_hype: 18.2% 7d win rate (updated from 14.3%), -4.58% avg 7d (22 samples). LLY scored 82 and AI scored 80 from social_hype in Apr 3-9 — overconfident. Ranker prompt already warns "SPECULATIVE" for social_hype.
+- short_squeeze: 7d 60% win rate confirmed; **30d 30%** — signal degrades sharply. Noted in short_squeeze.md.
+- insider_buying staleness: 50% of insider_buying picks in Apr 3-9 were stale repeats (PAGS×4, ZBIO×4, HMH×3). Staleness suppression filter implemented in `insider_buying.py`.
+- Overall pipeline: 626 tracked recs, 41.9% 7d win rate, 34.7% 30d win rate, -2.79% avg 30d return.
+- Confidence: high
diff --git a/docs/iterations/scanners/insider_buying.md b/docs/iterations/scanners/insider_buying.md
index 4443081b..313aa01f 100644
--- a/docs/iterations/scanners/insider_buying.md
+++ b/docs/iterations/scanners/insider_buying.md
@@ -28,7 +28,16 @@ and included in candidate context — dropping them loses signal clarity.
 - High confidence (avg 7.1) combined with poor actual win rates = miscalibration — scanner assigns scores optimistically but real outcomes are below 50%.
 - Confidence: high
 
+### 2026-04-14 — P&L review (Apr 3-9 mature recs) + staleness filter implementation
+- Staleness pattern confirmed at scale: PAGS appeared 4 consecutive days (Apr 3-6, identical $10.34 entry, same Director Frias $4.96M purchase). ZBIO appeared 4 consecutive days (Apr 3-6, same $5.59M cluster buy). HMH appeared 3 consecutive days (Apr 7-9, same CFO $1M purchase).
+- 11 of 22 insider_buying picks in Apr 3-9 (50%) were stale repeats — same Form 4 filing surfaced daily within the 7-day lookback window.
+- Root cause: `lookback_days=7` causes any filing made on day D to appear every day from D through D+6. The deduplication is within a single fetch, not across runs.
+- Code fix: Added `_load_recent_insider_tickers(suppress_days=2)` in `insider_buying.py`. Loads the past 2 days of recommendation files and filters out tickers already recommended as `insider_buying`. This directly suppresses the PAGS/ZBIO/HMH pattern.
+- Updated statistics: 184 recs total (+48 since last analysis). 7d win rate 45.9% (was 46.4%), 30d win rate 32.8%. Avg returns negative at all horizons: -0.01% 1d, -0.44% 7d, -1.62% 30d.
+- Confidence: high (staleness pattern now confirmed across 3 distinct tickers in a single week)
+
 ## Pending Hypotheses
 - [x] Does cluster detection (2+ insiders in 14 days) outperform single-insider signals? → **Already implemented**: cluster detection assigns CRITICAL priority. Code verified at `insider_buying.py:73-74`. Cannot assess outcome vs single-insider yet (all statuses 'open').
-- [ ] Is there a minimum transaction size below which signal quality degrades sharply? (current min: $25K — candidates with $25K-$50K transactions show up at lower scores but still make final ranking)
-- [ ] Does filtering out repeat appearances of the same ticker from the same scanner within 3 days improve precision?
+- [x] Does filtering out repeat appearances of the same ticker from the same scanner within 3 days improve precision? → **Implemented 2026-04-14**: staleness suppression added, uses 2-day lookback against recommendation history.
+- [ ] Is there a minimum transaction size below which signal quality degrades sharply? (current min: $100K raised from $25K as of 2026-04-07)
+- [ ] Does the staleness suppression (2-day lookback) measurably improve 7d win rate? Track over next 2 weeks.
diff --git a/docs/iterations/scanners/short_squeeze.md b/docs/iterations/scanners/short_squeeze.md
index 4a1d9f78..24726223 100644
--- a/docs/iterations/scanners/short_squeeze.md
+++ b/docs/iterations/scanners/short_squeeze.md
@@ -29,9 +29,17 @@ alone and is the primary confluence hypothesis under test.
 - Updated `short_squeeze.py` context string to include DTC value so ranker can use "days to cover" criterion.
 - Confidence: high (this is a clear context gap between ranker criteria and available data)
 
+### 2026-04-14 — P&L review (updated statistics, n=11)
+- 7d win rate: 60% (6/10 wins), avg 7d return: +2.15% — still best 7d performer. No change from prior analysis.
+- **NEW: 30d win rate: 30% (3/10), avg 30d return: -1.1%** — signal degrades sharply at 30d. The squeeze resolves (or fails) within 7 days; holding longer is harmful.
+- This confirms short_squeeze is a **short-term-only signal**. The 7d alpha is real; the 30d outcome is poor.
+- Pattern: WTI and TSLA appeared in Apr 3-9 mature recs as short_squeeze plays — high SI but no clear catalyst timing to trigger covering.
+- Confidence: medium (n=11 still small; 30d degradation pattern is consistent with academic squeeze literature)
+
 ## Pending Hypotheses
 - [ ] Does short_squeeze + options_flow confluence produce better 7d win rate than either scanner alone?
 - [ ] Does short_squeeze + earnings_calendar (SI>20%) produce better outcomes than earnings alone? (See earnings_calendar.md pending hypothesis)
 - [ ] Is there a volume threshold (e.g., market cap <$2B small-cap) that sharpens the signal?
 - [ ] Does DTC >5 (now surfaced in context) predict better outcomes than DTC 2-5 within the scanner?
 - [ ] Does standalone short_squeeze (no cross-scanner confluence) continue to outperform at 7d as sample grows?
+- [ ] Should max holding period for short_squeeze be capped at 7 days in ranker guidance? 30d win rate 30% supports this.
diff --git a/docs/iterations/scanners/social_dd.md b/docs/iterations/scanners/social_dd.md
index 8bd80a0c..7fb7f80d 100644
--- a/docs/iterations/scanners/social_dd.md
+++ b/docs/iterations/scanners/social_dd.md
@@ -28,8 +28,16 @@ incorrect. Setups currently score below 65 and are filtered by the score thresho
 - **Key insight**: the evaluation horizon mismatch is the real issue. Downstream recommendation scoring and ranker calibration use 7d outcomes, which penalize social_dd unfairly. The scanner works — but only at 30d.
 - Confidence: high (n=25, consistent with prior 55% 30d finding)
 
+### 2026-04-14 — P&L review (updated statistics, n=26)
+- 30d win rate: 57.1% (12/21 wins), avg 30d return: +1.41% — confirmed improvement from prior 55%/+0.94% reading.
+- 7d win rate: 44.0%, avg 7d return: -1.47% — poor at shorter horizon as expected.
+- 1d win rate: 46.2%, avg 1d return: +0.66% — slight positive 1d signal (new observation).
+- social_dd remains the **only scanner positive at 30d** across all strategies.
+- Apr 3-9 mature recs: GME (Apr 8, score=56, conf=5) was the only social_dd pick. Sub-threshold, no recommendation generated. Score reflects weak fundamentals (-13.9% rev growth, insider selling) — appropriate.
+- Confidence: high (n=26, consistent 30d outperformance confirmed across two analysis cycles)
+
 ## Pending Hypotheses
 - [x] Does the ranker's "social_dd / social_hype → SPECULATIVE" grouping suppress social_dd scores? → **Partially false**: avg score is 71.5, suppression affects only 3/25 picks. Not the primary issue.
-- [ ] Should social_dd get a separate ranker treatment from social_hype, given divergent 30d outcomes? → Still open. social_hype 7d win rate 14.3% vs social_dd 30d 60% — they are fundamentally different signals.
+- [ ] Should social_dd get a separate ranker treatment from social_hype, given divergent 30d outcomes? → Still open. social_hype 7d win rate 18.2% vs social_dd 30d 57.1% — they are fundamentally different signals.
 - [ ] Fix evaluation horizon: ranker and recommendation system should assess social_dd at 30d, not 7d. This may require a scanner-level `eval_horizon` config field.
 - [ ] At what social score threshold (>75? >85?) does the setup reliably score ≥65 to generate recommendations? → Lower priority now that suppression is not the main issue.
diff --git a/tradingagents/dataflows/discovery/ranker.py b/tradingagents/dataflows/discovery/ranker.py
index 26321b97..70491d2c 100644
--- a/tradingagents/dataflows/discovery/ranker.py
+++ b/tradingagents/dataflows/discovery/ranker.py
@@ -306,7 +306,7 @@ Each candidate was discovered by a specific scanner. Evaluate them using the cri
 - **social_hype**: Treat as SPECULATIVE (14.3% 7d win rate, -4.84% avg 7d return). Require strong corroborating evidence. Pure social sentiment without data backing should score below 50.
 - **short_squeeze**: Focus on short interest %, days to cover, cost to borrow, and whether a catalyst exists to trigger covering. High SI alone is not enough.
 - **contrarian_value**: Focus on oversold technicals (RSI <30), fundamental support (earnings stability), and a clear reason why the selloff is overdone.
-- **news_catalyst**: Focus on the materiality of the news, whether it's already priced in (check intraday move), and the timeline of impact.
+- **news_catalyst**: **AVOID by default** — 0% historical 7d win rate (-8.37% avg 7d return, n=8). Only score ≥55 if the catalyst is (1) not yet reflected in the intraday move, (2) mechanistic and specific (FDA decision, contract win, regulatory approval), NOT macroeconomic framing ('geopolitical tension', 'oil price', 'rate expectations'). Macro news_catalyst setups should score <50.
 - **sector_rotation**: Focus on relative strength vs sector ETF, whether the stock is a laggard in an accelerating sector.
 - **minervini**: Focus on the RS Rating (top 30% = RS>=70, top 10% = RS>=90) as the primary signal. Verify all 6 trend template conditions are met (price structure above rising SMAs). Strongest setups combine RS>=85 with price consolidating near highs (within 10-15% of 52w high) — these have minimal overhead supply. Penalize if RS Rating is borderline (70-75) without other confirming signals.
 - **ml_signal**: Use the ML Win Probability as a strong quantitative signal. Scores above 65% deserve significant weight.
diff --git a/tradingagents/dataflows/discovery/scanners/insider_buying.py b/tradingagents/dataflows/discovery/scanners/insider_buying.py
index 6cef7e3d..f57825aa 100644
--- a/tradingagents/dataflows/discovery/scanners/insider_buying.py
+++ b/tradingagents/dataflows/discovery/scanners/insider_buying.py
@@ -1,6 +1,9 @@
 """SEC Form 4 insider buying scanner."""
 
-from typing import Any, Dict, List
+import json
+from datetime import date, timedelta
+from pathlib import Path
+from typing import Any, Dict, List, Set
 
 from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner
 from tradingagents.dataflows.discovery.utils import Priority
@@ -115,6 +118,19 @@ class InsiderBuyingScanner(BaseScanner):
             candidates.sort(key=lambda c: c.get("insider_score", 0), reverse=True)
             candidates = candidates[: self.limit]
 
+            # Staleness suppression: filter tickers already recommended as insider_buying
+            # in the past 2 days (same Form 4 filing appears daily within lookback_days window)
+            recently_seen = self._load_recent_insider_tickers(suppress_days=2)
+            if recently_seen:
+                before_tickers = {c["ticker"] for c in candidates}
+                candidates = [c for c in candidates if c["ticker"] not in recently_seen]
+                suppressed = before_tickers - {c["ticker"] for c in candidates}
+                if suppressed:
+                    logger.info(
+                        f"Staleness filter: suppressed {len(suppressed)} ticker(s) already "
+                        f"recommended as insider_buying in the past 2 days: {suppressed}"
+                    )
+
             logger.info(f"Insider buying: {len(candidates)} candidates")
             return candidates
 
@@ -123,4 +139,38 @@ class InsiderBuyingScanner(BaseScanner):
             return []
 
 
+    def _load_recent_insider_tickers(self, suppress_days: int = 2) -> Set[str]:
+        """Return tickers recommended as insider_buying in the past N days.
+
+        Used to suppress stale Form 4 filings that re-appear daily within the
+        lookback_days window.  P&L review (Apr 3-9 2026) confirmed 3 tickers
+        (PAGS, ZBIO, HMH) each repeated 3-4 consecutive days from the same filing.
+        """
+        seen: Set[str] = set()
+        data_dir = Path(self.config.get("data_dir", "data"))
+        recs_dir = data_dir / "recommendations"
+
+        if not recs_dir.exists():
+            return seen
+
+        today = date.today()
+        for i in range(1, suppress_days + 1):
+            check_date = today - timedelta(days=i)
+            rec_file = recs_dir / f"{check_date.isoformat()}.json"
+            if not rec_file.exists():
+                continue
+            try:
+                with open(rec_file) as f:
+                    data = json.load(f)
+                for rec in data.get("recommendations", []):
+                    if rec.get("strategy_match") == "insider_buying":
+                        ticker = rec.get("ticker", "").upper()
+                        if ticker:
+                            seen.add(ticker)
+            except Exception:
+                pass
+
+        return seen
+
+
 SCANNER_REGISTRY.register(InsiderBuyingScanner)