TradingAgents/tradingagents/dataflows/discovery/scanners/insider_buying.py

176 lines
7.0 KiB
Python

"""SEC Form 4 insider buying scanner."""
import json
from datetime import date, timedelta
from pathlib import Path
from typing import Any, Dict, List, Set
from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner
from tradingagents.dataflows.discovery.utils import Priority
from tradingagents.utils.logger import get_logger
logger = get_logger(__name__)
class InsiderBuyingScanner(BaseScanner):
"""Scan SEC Form 4 for insider purchases."""
name = "insider_buying"
pipeline = "edge"
strategy = "insider_buying"
def __init__(self, config: Dict[str, Any]):
super().__init__(config)
self.lookback_days = self.scanner_config.get("lookback_days", 7)
# Raised from $25K to $100K: P&L data (178 recs, -2.05% 30d avg) suggests
# sub-$100K transactions add noise. Tests the insider_buying-min-txn-100k
# hypothesis registered 2026-04-07.
self.min_transaction_value = self.scanner_config.get("min_transaction_value", 100_000)
def scan(self, state: Dict[str, Any]) -> List[Dict[str, Any]]:
if not self.is_enabled():
return []
logger.info("Scanning insider buying (OpenInsider)...")
try:
from tradingagents.dataflows.finviz_scraper import get_finviz_insider_buying
transactions = get_finviz_insider_buying(
lookback_days=self.lookback_days,
min_value=self.min_transaction_value,
return_structured=True,
deduplicate=False,
)
if not transactions:
logger.info("No insider buying transactions found")
return []
logger.info(f"Found {len(transactions)} insider transactions")
# Group by ticker for cluster detection
by_ticker: Dict[str, list] = {}
for txn in transactions:
ticker = txn.get("ticker", "").upper().strip()
if not ticker:
continue
by_ticker.setdefault(ticker, []).append(txn)
candidates = []
for ticker, txns in by_ticker.items():
# Use the largest transaction as primary
txns.sort(key=lambda t: t.get("value_num", 0), reverse=True)
primary = txns[0]
insider_name = primary.get("insider", "Unknown")
title = primary.get("title", "")
value = primary.get("value_num", 0)
value_str = primary.get("value_str", f"${value:,.0f}")
num_insiders = len(set(t.get("insider", "") for t in txns))
# Priority by significance
title_lower = title.lower()
is_c_suite = any(
t in title_lower for t in ["ceo", "cfo", "coo", "cto", "president", "chairman"]
)
is_director = "director" in title_lower
if num_insiders >= 2:
priority = Priority.CRITICAL.value
elif is_c_suite and value >= 100_000:
priority = Priority.CRITICAL.value
elif is_c_suite or (is_director and value >= 50_000):
priority = Priority.HIGH.value
elif value >= 50_000:
priority = Priority.HIGH.value
else:
priority = Priority.MEDIUM.value
# Build context
if num_insiders > 1:
context = (
f"Cluster: {num_insiders} insiders buying {ticker}. "
f"Largest: {title} {insider_name} purchased {value_str}"
)
else:
context = f"{title} {insider_name} purchased {value_str} of {ticker}"
# Scoring: cluster buys > C-suite > dollar value
insider_score = value + (num_insiders * 500_000) + (1_000_000 if is_c_suite else 0)
candidates.append(
{
"ticker": ticker,
"source": self.name,
"context": context,
"priority": priority,
"strategy": self.strategy,
"insider_name": insider_name,
"insider_title": title,
"transaction_value": value,
"num_insiders_buying": num_insiders,
"insider_score": insider_score,
}
)
# Sort by signal quality, then limit
candidates.sort(key=lambda c: c.get("insider_score", 0), reverse=True)
candidates = candidates[: self.limit]
# Staleness suppression: filter tickers already recommended as insider_buying
# in the past 2 days (same Form 4 filing appears daily within lookback_days window)
recently_seen = self._load_recent_insider_tickers(suppress_days=2)
if recently_seen:
before_tickers = {c["ticker"] for c in candidates}
candidates = [c for c in candidates if c["ticker"] not in recently_seen]
suppressed = before_tickers - {c["ticker"] for c in candidates}
if suppressed:
logger.info(
f"Staleness filter: suppressed {len(suppressed)} ticker(s) already "
f"recommended as insider_buying in the past 2 days: {suppressed}"
)
logger.info(f"Insider buying: {len(candidates)} candidates")
return candidates
except Exception as e:
logger.error(f"Insider buying scan failed: {e}", exc_info=True)
return []
def _load_recent_insider_tickers(self, suppress_days: int = 2) -> Set[str]:
"""Return tickers recommended as insider_buying in the past N days.
Used to suppress stale Form 4 filings that re-appear daily within the
lookback_days window. P&L review (Apr 3-9 2026) confirmed 3 tickers
(PAGS, ZBIO, HMH) each repeated 3-4 consecutive days from the same filing.
"""
seen: Set[str] = set()
data_dir = Path(self.config.get("data_dir", "data"))
recs_dir = data_dir / "recommendations"
if not recs_dir.exists():
return seen
today = date.today()
for i in range(1, suppress_days + 1):
check_date = today - timedelta(days=i)
rec_file = recs_dir / f"{check_date.isoformat()}.json"
if not rec_file.exists():
continue
try:
with open(rec_file) as f:
data = json.load(f)
for rec in data.get("recommendations", []):
if rec.get("strategy_match") == "insider_buying":
ticker = rec.get("ticker", "").upper()
if ticker:
seen.add(ticker)
except Exception:
pass
return seen
SCANNER_REGISTRY.register(InsiderBuyingScanner)