From a51d6193f8efc64ba8759b3be611804af3eb19dd Mon Sep 17 00:00:00 2001 From: Youssef Aitousarrah Date: Sun, 12 Apr 2026 19:10:30 -0700 Subject: [PATCH] =?UTF-8?q?research(short-squeeze):=202026-04-12=20?= =?UTF-8?q?=E2=80=94=20new=20short=5Fsqueeze=20scanner;=20high=20SI=20(>20?= =?UTF-8?q?%)=20as=20squeeze-risk=20discovery=20for=20cross-scanner=20conf?= =?UTF-8?q?luence?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements ShortSqueezeScanner wrapping existing get_short_interest() in finviz_scraper.py. Research finding: raw high SI predicts negative long-term returns (academic); edge is using SI as a squeeze-risk flag when combined with earnings_calendar or options_flow catalysts. Directly addresses earnings_calendar pending hypothesis (APLD 30.6% SI was strongest setup). Co-Authored-By: Claude Sonnet 4.6 --- docs/iterations/LEARNINGS.md | 7 ++ .../2026-04-12-short-interest-squeeze.md | 66 +++++++++++ docs/iterations/scanners/short_squeeze.md | 20 ++++ .../dataflows/discovery/scanners/__init__.py | 1 + .../discovery/scanners/short_squeeze.py | 108 ++++++++++++++++++ 5 files changed, 202 insertions(+) create mode 100644 docs/iterations/research/2026-04-12-short-interest-squeeze.md create mode 100644 docs/iterations/scanners/short_squeeze.md create mode 100644 tradingagents/dataflows/discovery/scanners/short_squeeze.py diff --git a/docs/iterations/LEARNINGS.md b/docs/iterations/LEARNINGS.md index 30718b91..d0f175d1 100644 --- a/docs/iterations/LEARNINGS.md +++ b/docs/iterations/LEARNINGS.md @@ -13,6 +13,13 @@ | early_accumulation | scanners/early_accumulation.md | 2026-04-12 | Sub-threshold (score=60); no catalyst → structurally score-capped by ranker | | social_dd | scanners/social_dd.md | 2026-04-12 | Sub-threshold (score=56); BUT 55% 30d win rate — diverges from social_hype; ranker may be suppressing it incorrectly | | volume_accumulation | scanners/volume_accumulation.md | — | No data yet | +| short_squeeze | scanners/short_squeeze.md | — | No data yet — new scanner, research: high SI (>20%) + catalyst = squeeze risk; not a directional signal alone | + +## Research + +| Title | File | Date | Summary | +|-------|------|------|---------| +| Short Interest Squeeze Scanner | research/2026-04-12-short-interest-squeeze.md | 2026-04-12 | High SI (>20%) + DTC >5 as squeeze-risk discovery; implemented as short_squeeze scanner | | reddit_dd | scanners/reddit_dd.md | — | No data yet | | reddit_trending | scanners/reddit_trending.md | — | No data yet | | semantic_news | scanners/semantic_news.md | — | No data yet | diff --git a/docs/iterations/research/2026-04-12-short-interest-squeeze.md b/docs/iterations/research/2026-04-12-short-interest-squeeze.md new file mode 100644 index 00000000..90b57f6e --- /dev/null +++ b/docs/iterations/research/2026-04-12-short-interest-squeeze.md @@ -0,0 +1,66 @@ +# Research: Short Interest Squeeze Scanner + +**Date:** 2026-04-12 +**Mode:** autonomous + +## Summary + +Stocks with high short interest (>20% of float) and high days-to-cover (DTC >5) face elevated squeeze +risk when a positive catalyst arrives — earnings beat, news, or unusual options activity. Academic +literature confirms that *decreases* in short interest predict positive future returns (14.6% annualized +for distressed firms), while raw high SI alone is actually a negative long-term indicator. The edge +here is not buying high-SI blindly, but using high SI + catalyst as a squeeze-risk scanner: a +discovery tool that surfaces stocks where short sellers are structurally vulnerable. + +## Sources Reviewed + +- QuantifiedStrategies (short squeeze backtest): Short squeeze strategies alone backtested poorly — + rarity and randomness of squeezes prevent a reliable standalone edge +- Alpha Architect (DTC & short covering): DTC is a better predictor of poor returns than raw SI; + long-short strategy using DTC generated 1.2% monthly return; short covering (SI decrease) signals + informed belief change +- QuantPedia / academic: SI decrease in distressed firms predicts +14.6% annualized risk-adjusted + return; short sellers are informed traders whose exit signals conviction shift +- Scanz / practitioner screeners: Consensus thresholds — SI% of float > 10% (moderate), >20% + (high), DTC > 5 (high squeeze pressure) +- tosindicators.com: "Upcoming earnings with high short interest" scan is a common institutional + approach — validates the earnings_calendar pending hypothesis +- earnings_calendar.md (internal): Pending hypothesis that SI > 20% pre-earnings produces better + outcomes; APLD (30.6% SI, score=75) was the strongest recent earnings setup +- social_dd.md (internal): GME scan (15.7% SI, score=56) showed 55% 30d win rate — best 30d + performer in pipeline + +## Fit Evaluation + +| Dimension | Score | Notes | +|-----------|-------|-------| +| Data availability | ✅ | `get_short_interest(return_structured=True)` in `finviz_scraper.py` fully integrated | +| Complexity | trivial | Wrap existing function, map to `{ticker, source, context, priority}` format | +| Signal uniqueness | low overlap | No existing standalone short-interest scanner; social_dd uses SI as one factor among many | +| Evidence quality | qualitative | Academic support for DTC as predictor; practitioner consensus on thresholds | + +## Recommendation + +**Implement** — The data source is already integrated and the signal fills a genuine gap. The scanner +should NOT simply buy high-SI stocks (negative long-term returns). Instead, it surfaces squeeze +candidates for downstream ranker scoring: stocks where short sellers are structurally vulnerable and +any catalyst could force rapid covering. The ranker then assigns final conviction based on cross- +scanner signals (options flow, earnings, news). This directly addresses the earnings_calendar pending +hypothesis (SI > 20% pre-earnings). + +## Proposed Scanner Spec + +- **Scanner name:** `short_squeeze` +- **Data source:** `tradingagents/dataflows/finviz_scraper.py` → `get_short_interest(return_structured=True)` +- **Signal logic:** + - Fetch Finviz tickers with SI > 15% of float, verified by Yahoo Finance + - CRITICAL: SI >= 30% (extreme squeeze risk — one catalyst away from violent covering) + - HIGH: SI >= 20% (high squeeze potential — elevated squeeze risk) + - MEDIUM: SI >= 15% (moderate squeeze potential — worth watching) + - Context string includes: SI%, DTC if available, squeeze signal label +- **Priority rules:** + - CRITICAL if `short_interest_pct >= 30` (extreme_squeeze_risk) + - HIGH if `short_interest_pct >= 20` (high_squeeze_potential) + - MEDIUM otherwise (moderate_squeeze_potential) +- **Context format:** `"Short interest {SI:.1f}% of float — {signal_label} | squeeze risk if catalyst arrives"` +- **Strategy tag:** `short_squeeze` diff --git a/docs/iterations/scanners/short_squeeze.md b/docs/iterations/scanners/short_squeeze.md new file mode 100644 index 00000000..2bdd3784 --- /dev/null +++ b/docs/iterations/scanners/short_squeeze.md @@ -0,0 +1,20 @@ +# Short Squeeze Scanner + +## Current Understanding +Identifies stocks with structurally high short interest (>15% of float by default, CRITICAL at >30%) +where short sellers are vulnerable to forced covering on any positive catalyst. The scanner uses +Finviz for discovery (screener filters) + Yahoo Finance for exact SI% verification. + +Key distinction: High SI alone predicts *negative* long-term returns on average (academic consensus). +The scanner is a squeeze-risk flag, not a directional buy signal. Value comes from cross-scanner +confluence: a stock appearing here AND in options_flow or earnings_calendar is significantly stronger +than either signal alone. + +## Evidence Log + +_(populated by /iterate runs)_ + +## Pending Hypotheses +- [ ] Does short_squeeze + options_flow confluence produce better 7d win rate than either scanner alone? +- [ ] Does short_squeeze + earnings_calendar (SI>20%) produce better outcomes than earnings alone? (See earnings_calendar.md pending hypothesis) +- [ ] Is there a volume threshold (e.g., market cap <$2B small-cap) that sharpens the signal? diff --git a/tradingagents/dataflows/discovery/scanners/__init__.py b/tradingagents/dataflows/discovery/scanners/__init__.py index bd971c11..16709d89 100644 --- a/tradingagents/dataflows/discovery/scanners/__init__.py +++ b/tradingagents/dataflows/discovery/scanners/__init__.py @@ -13,6 +13,7 @@ from . import ( reddit_trending, # noqa: F401 sector_rotation, # noqa: F401 semantic_news, # noqa: F401 + short_squeeze, # noqa: F401 technical_breakout, # noqa: F401 volume_accumulation, # noqa: F401 ) diff --git a/tradingagents/dataflows/discovery/scanners/short_squeeze.py b/tradingagents/dataflows/discovery/scanners/short_squeeze.py new file mode 100644 index 00000000..5aa5a719 --- /dev/null +++ b/tradingagents/dataflows/discovery/scanners/short_squeeze.py @@ -0,0 +1,108 @@ +"""Short interest squeeze-risk scanner. + +Surfaces stocks with structurally elevated short interest where any positive +catalyst (earnings beat, news, options activity) could force rapid short covering. + +Research basis: docs/iterations/research/2026-04-12-short-interest-squeeze.md +Key insight: High SI alone predicts *negative* long-term returns (mean reversion); +the edge is using high SI as a squeeze-risk flag for downstream cross-scanner +ranker scoring, not as a directional buy signal on its own. +""" + +from typing import Any, Dict, List + +from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner +from tradingagents.dataflows.discovery.utils import Priority +from tradingagents.utils.logger import get_logger + +logger = get_logger(__name__) + +_SIGNAL_LABELS = { + "extreme_squeeze_risk": "extreme squeeze risk", + "high_squeeze_potential": "high squeeze potential", + "moderate_squeeze_potential": "moderate squeeze potential", + "low_squeeze_potential": "low squeeze potential", +} + + +class ShortSqueezeScanner(BaseScanner): + """Scan for stocks with high short interest and elevated squeeze risk.""" + + name = "short_squeeze" + pipeline = "edge" + strategy = "short_squeeze" + + def __init__(self, config: Dict[str, Any]): + super().__init__(config) + self.min_short_interest_pct = self.scanner_config.get("min_short_interest_pct", 15.0) + self.min_days_to_cover = self.scanner_config.get("min_days_to_cover", 2.0) + self.top_n = self.scanner_config.get("top_n", 20) + + def scan(self, state: Dict[str, Any]) -> List[Dict[str, Any]]: + if not self.is_enabled(): + return [] + + logger.info( + f"📉 Scanning short interest (SI >{self.min_short_interest_pct}%)..." + ) + + try: + from tradingagents.dataflows.finviz_scraper import get_short_interest + + raw = get_short_interest( + min_short_interest_pct=self.min_short_interest_pct, + min_days_to_cover=self.min_days_to_cover, + top_n=self.top_n, + return_structured=True, + ) + + if not raw: + logger.info("No short squeeze candidates found") + return [] + + logger.info(f"Found {len(raw)} high short interest candidates") + + candidates = [] + for item in raw: + ticker = item.get("ticker", "").upper().strip() + if not ticker: + continue + + si_pct = item.get("short_interest_pct", 0) + signal = item.get("signal", "low_squeeze_potential") + label = _SIGNAL_LABELS.get(signal, signal) + + # Priority based on squeeze intensity + if signal == "extreme_squeeze_risk": + priority = Priority.CRITICAL.value + elif signal == "high_squeeze_potential": + priority = Priority.HIGH.value + else: + priority = Priority.MEDIUM.value + + context = ( + f"Short interest {si_pct:.1f}% of float — {label}" + " | squeeze risk elevates if catalyst arrives" + ) + + candidates.append( + { + "ticker": ticker, + "source": self.name, + "context": context, + "priority": priority, + "strategy": self.strategy, + "short_interest_pct": si_pct, + "squeeze_signal": signal, + } + ) + + candidates = candidates[: self.limit] + return candidates + + except Exception as e: + logger.warning(f"⚠️ Short squeeze scanner failed: {e}") + return [] + + +SCANNER_REGISTRY.register(ShortSqueezeScanner)