174 lines
6.9 KiB
Python
174 lines
6.9 KiB
Python
"""52-week high breakout scanner — volume-confirmed new 52-week high crossings.
|
|
|
|
Based on George & Hwang (2004): proximity to the 52-week high dominates
|
|
past-return momentum for forecasting future returns. The key insight is that
|
|
the 52-week high acts as a psychological anchor — investors are reluctant to
|
|
bid above it, so when price clears it on high volume, institutional conviction
|
|
is confirmed.
|
|
|
|
Volume confirmation threshold: 1.5x (eliminates 63% of false signals;
|
|
breakouts with >1.5x volume succeed 72% of the time, avg +11.4% over 31 days).
|
|
"""
|
|
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
import pandas as pd
|
|
|
|
from tradingagents.dataflows.data_cache.ohlcv_cache import download_ohlcv_cached
|
|
from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner
|
|
from tradingagents.dataflows.discovery.utils import Priority
|
|
from tradingagents.dataflows.universe import load_universe
|
|
from tradingagents.utils.logger import get_logger
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
class High52wBreakoutScanner(BaseScanner):
|
|
"""Scan for stocks making volume-confirmed new 52-week high crossings.
|
|
|
|
Distinct from TechnicalBreakoutScanner (20-day lookback resistance):
|
|
this scanner specifically targets the event of crossing the 52-week high,
|
|
which has strong academic backing as a standalone predictor of future returns.
|
|
|
|
Data requirement: ~260 trading days of OHLCV (1y lookback).
|
|
Cost: single batch yfinance download, zero per-ticker API calls.
|
|
"""
|
|
|
|
name = "high_52w_breakout"
|
|
pipeline = "momentum"
|
|
strategy = "high_52w_breakout"
|
|
|
|
def __init__(self, config: Dict[str, Any]):
|
|
super().__init__(config)
|
|
self.max_tickers = self.scanner_config.get("max_tickers", 150)
|
|
# Academic threshold: 1.5x eliminates 63% of false signals
|
|
self.min_volume_multiple = self.scanner_config.get("min_volume_multiple", 1.5)
|
|
self.vol_avg_days = self.scanner_config.get("vol_avg_days", 20)
|
|
# Freshness: was the stock below the 52w high within the last N days?
|
|
self.freshness_days = self.scanner_config.get("freshness_days", 5)
|
|
self.freshness_threshold = self.scanner_config.get("freshness_threshold", 0.97)
|
|
# Liquidity gates
|
|
self.min_price = self.scanner_config.get("min_price", 5.0)
|
|
self.min_avg_volume = self.scanner_config.get("min_avg_volume", 100_000)
|
|
|
|
def scan(self, state: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
if not self.is_enabled():
|
|
return []
|
|
|
|
logger.info("🏔️ Scanning for 52-week high breakouts...")
|
|
|
|
tickers = load_universe(self.config)
|
|
if not tickers:
|
|
logger.warning("No tickers loaded for 52w-high breakout scan")
|
|
return []
|
|
|
|
tickers = tickers[: self.max_tickers]
|
|
|
|
cache_dir = self.config.get("discovery", {}).get("ohlcv_cache_dir", "data/ohlcv_cache")
|
|
logger.info(f"Loading OHLCV for {len(tickers)} tickers from cache...")
|
|
data = download_ohlcv_cached(tickers, period="1y", cache_dir=cache_dir)
|
|
|
|
if not data:
|
|
return []
|
|
|
|
candidates = []
|
|
for ticker, df in data.items():
|
|
result = self._check_52w_breakout_df(df)
|
|
if result:
|
|
result["ticker"] = ticker
|
|
candidates.append(result)
|
|
|
|
# Sort by strongest signal: fresh critical first, then by volume multiple
|
|
candidates.sort(
|
|
key=lambda c: (c.get("is_fresh", False), c.get("volume_multiple", 0)),
|
|
reverse=True,
|
|
)
|
|
candidates = candidates[: self.limit]
|
|
logger.info(f"52-week high breakouts: {len(candidates)} candidates")
|
|
return candidates
|
|
|
|
def _check_52w_breakout_df(self, df: pd.DataFrame) -> Optional[Dict[str, Any]]:
|
|
"""Check if a pre-extracted ticker DataFrame is making a new 52-week high with volume confirmation."""
|
|
try:
|
|
df = df.dropna()
|
|
|
|
# Need at least 260 days for a proper 52-week window
|
|
min_rows = self.vol_avg_days + self.freshness_days + 5
|
|
if len(df) < min_rows:
|
|
return None
|
|
|
|
close = df["Close"]
|
|
high = df["High"]
|
|
volume = df["Volume"]
|
|
|
|
current_close = float(close.iloc[-1])
|
|
current_vol = float(volume.iloc[-1])
|
|
|
|
# --- Liquidity gates ---
|
|
avg_vol_20d = float(volume.iloc[-(self.vol_avg_days + 1) : -1].mean())
|
|
if avg_vol_20d < self.min_avg_volume:
|
|
return None
|
|
if current_close < self.min_price:
|
|
return None
|
|
if avg_vol_20d <= 0:
|
|
return None
|
|
|
|
# --- 52-week high (exclude today's session) ---
|
|
# Use up to 252 prior trading days for the window
|
|
lookback_end = -1 # exclude today
|
|
lookback_start = max(0, len(df) - 253)
|
|
prior_52w_high = float(high.iloc[lookback_start:lookback_end].max())
|
|
|
|
# Main signal: current close crossed the prior 52-week high
|
|
if current_close < prior_52w_high:
|
|
return None
|
|
|
|
# --- Volume confirmation ---
|
|
vol_multiple = current_vol / avg_vol_20d
|
|
if vol_multiple < self.min_volume_multiple:
|
|
return None
|
|
|
|
# --- Freshness: was the stock already at new highs recently? ---
|
|
# Check if N days ago the close was still below the 52w high threshold
|
|
if len(close) > self.freshness_days + 1:
|
|
close_n_days_ago = float(close.iloc[-(self.freshness_days + 1)])
|
|
is_fresh = close_n_days_ago < prior_52w_high * self.freshness_threshold
|
|
else:
|
|
is_fresh = False
|
|
|
|
# --- Priority ---
|
|
if vol_multiple >= 3.0 and is_fresh:
|
|
priority = Priority.CRITICAL.value
|
|
elif vol_multiple >= 2.0 or (vol_multiple >= 1.5 and is_fresh):
|
|
priority = Priority.HIGH.value
|
|
else:
|
|
priority = Priority.MEDIUM.value
|
|
|
|
breakout_pct = ((current_close - prior_52w_high) / prior_52w_high) * 100
|
|
|
|
context = (
|
|
f"New 52-week high: closed at ${current_close:.2f} "
|
|
f"(+{breakout_pct:.1f}% above prior 52w high of ${prior_52w_high:.2f}) "
|
|
f"on {vol_multiple:.1f}x avg volume"
|
|
)
|
|
if is_fresh:
|
|
context += " | Fresh crossing — first time at new high this week"
|
|
|
|
return {
|
|
"source": self.name,
|
|
"context": context,
|
|
"priority": priority,
|
|
"strategy": self.strategy,
|
|
"volume_multiple": round(vol_multiple, 2),
|
|
"breakout_pct": round(breakout_pct, 2),
|
|
"prior_52w_high": round(prior_52w_high, 2),
|
|
"is_fresh": is_fresh,
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.debug(f"52w-high check failed: {e}")
|
|
return None
|
|
|
|
|
|
SCANNER_REGISTRY.register(High52wBreakoutScanner)
|