feat(cache): unified ticker universe + nightly OHLCV prefetch

- tradingagents/dataflows/universe.py: single source of truth for ticker
  universe; all scanners now call load_universe(config) instead of
  duplicating the 3-level fallback chain with hardcoded "data/tickers.txt"

- scripts/prefetch_ohlcv.py: nightly script using existing ohlcv_cache.py
  incremental logic; first run downloads 1y history, subsequent runs append
  only new trading days

- .github/workflows/prefetch.yml: runs at 01:00 UTC daily, before all other
  workflows; commits updated parquet to repo

- Updated 6 scanners: minervini, high_52w_breakout, ml_signal, options_flow,
  sector_rotation, technical_breakout — removed duplicate DEFAULT_TICKER_FILE
  constants and _load_tickers_from_file() functions

- minervini, high_52w_breakout, technical_breakout: replace yf.download()
  with download_ohlcv_cached() — reads from prefetched cache instead of
  hitting yfinance at discovery time

- default_config.py: added discovery.ohlcv_cache_dir config key

- data/ohlcv_cache/: initial 1y backfill (588 tickers, 5.4MB parquet)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Youssef Aitousarrah 2026-04-14 16:18:52 -07:00
parent 17e77f036f
commit e15e2df7a5
13 changed files with 262 additions and 254 deletions

2
.gitattributes vendored Normal file
View File

@ -0,0 +1,2 @@
# Treat parquet cache files as binary — skip text diffs
data/ohlcv_cache/*.parquet binary

51
.github/workflows/prefetch.yml vendored Normal file
View File

@ -0,0 +1,51 @@
name: Nightly OHLCV Prefetch
on:
schedule:
# 1:00 AM UTC — runs before iterate (6:00), research (7:00), hypothesis (8:00), discovery (12:30)
- cron: "0 1 * * *"
workflow_dispatch:
inputs:
period:
description: "History window for initial download (e.g. 1y, 6mo)"
required: false
default: "1y"
jobs:
prefetch:
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up git identity
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: pip
- name: Install dependencies
run: pip install -r requirements.txt
- name: Run OHLCV prefetch
env:
PERIOD: ${{ github.event.inputs.period || '1y' }}
run: |
python scripts/prefetch_ohlcv.py --period "$PERIOD"
- name: Commit cache updates
run: |
git add data/ohlcv_cache/
if git diff --cached --quiet; then
echo "No cache changes to commit"
else
git commit -m "chore(cache): nightly OHLCV prefetch $(date -u +%Y-%m-%d)"
git push origin main
fi

View File

@ -0,0 +1 @@
{"last_updated": "2026-04-14", "tickers": ["AA", "AAL", "AAP", "AAPL", "ABBV", "ABT", "ACGL", "ACN", "ADBE", "ADM", "ADP", "ADSK", "AEE", "AEP", "AES", "AFL", "AIV", "AKAM", "ALB", "ALGN", "ALK", "ALL", "AMAT", "AMD", "AME", "AMGN", "AMT", "AMZN", "ANF", "AON", "AOS", "APA", "APD", "APH", "ARE", "ATKR", "ATO", "AVB", "AVGO", "AVY", "AWK", "AXON", "AXP", "AZO", "BA", "BAC", "BAX", "BBWI", "BBY", "BEN", "BF-B", "BIIB", "BIO", "BK", "BKNG", "BKR", "BLK", "BLMN", "BMY", "BNTX", "BR", "BRK-B", "BRO", "BRT", "BRX", "BSX", "BWA", "BXP", "C", "CAG", "CAH", "CARR", "CAT", "CAVA", "CB", "CBOE", "CBRE", "CCL", "CDNS", "CE", "CEG", "CF", "CFG", "CHTR", "CI", "CINF", "CL", "CLB", "CLF", "CLH", "CLX", "CMA", "CMC", "CMCSA", "CME", "CMG", "CMI", "CMS", "CNC", "CNP", "COF", "COIN", "COMP", "COO", "COP", "COST", "CPB", "CPRT", "CPT", "CRL", "CRM", "CRWD", "CSCO", "CSGP", "CSX", "CTAS", "CTRA", "CTSH", "CTVA", "CUBE", "CURV", "CVNA", "CVS", "CVX", "CWH", "CWK", "CZR", "D", "DAL", "DD", "DDOG", "DE", "DG", "DGX", "DHI", "DHR", "DIN", "DINO", "DIS", "DKS", "DLR", "DLTR", "DOV", "DPZ", "DQ", "DRI", "DT", "DTE", "DUK", "DVA", "DVN", "DXCM", "EA", "EBAY", "ECL", "ED", "EFX", "EG", "EIX", "EL", "ELV", "EMN", "EMR", "ENPH", "ENTG", "EOG", "EPAM", "EQH", "EQIX", "EQR", "EQT", "ES", "ESS", "ESTC", "ETN", "ETR", "ETSY", "EVH", "EVRG", "EWBC", "EXAS", "EXC", "EXPD", "EXPE", "EXPI", "F", "FANG", "FAST", "FBNC", "FCNCA", "FCX", "FDS", "FDX", "FE", "FFIV", "FHI", "FIS", "FISV", "FITB", "FIVE", "FIVN", "FMC", "FNB", "FNF", "FOX", "FOXA", "FRT", "FSLR", "FTI", "FTNT", "FTV", "FWRD", "G", "GATX", "GD", "GE", "GEHC", "GEN", "GILD", "GIS", "GL", "GM", "GNRC", "GOOG", "GOOGL", "GPC", "GPN", "GRMN", "GS", "GSHD", "GTLS", "HAL", "HAS", "HBAN", "HBI", "HCA", "HD", "HIG", "HII", "HLT", "HOG", "HOLX", "HOMB", "HON", "HOOD", "HPE", "HRL", "HSIC", "HST", "HSY", "HUM", "HWM", "HXL", "IBM", "ICE", "IDXX", "IEX", "IFF", "ILMN", "INCY", "INTC", "INVH", "IP", "IPG", "IQV", "IR", "IRM", "ISRG", "IT", "IVZ", "JACK", "JBHT", "JBL", "JCI", "JKHY", "JLL", "JNJ", "JPM", "K", "KDP", "KEY", "KHC", "KIM", "KLAC", "KMB", "KMI", "KMX", "KNX", "KO", "KR", "KRC", "L", "LAD", "LAMR", "LBRDA", "LBRDK", "LCID", "LDOS", "LEN", "LFUS", "LHX", "LIN", "LLY", "LMT", "LNC", "LNT", "LPLA", "LRCX", "LUMN", "LUV", "LVS", "LYB", "LYV", "MA", "MAA", "MAR", "MAS", "MAT", "MCHP", "MCK", "MCO", "MDB", "MDLZ", "MDT", "MELI", "MET", "META", "MGM", "MHK", "MKC", "MKTX", "MLI", "MMI", "MMM", "MNST", "MO", "MOH", "MOS", "MPC", "MPWR", "MRK", "MRNA", "MRVL", "MS", "MSCI", "MSFT", "MSI", "MT", "MTB", "MTCH", "MTD", "MTRX", "MUR", "NCLH", "NDAQ", "NEE", "NEM", "NET", "NFLX", "NI", "NKE", "NOC", "NOV", "NRG", "NSC", "NTAP", "NTRS", "NUE", "NVAX", "NVDA", "NVR", "NVST", "NXPI", "O", "ODFL", "OGN", "OI", "OKTA", "OMC", "OMCL", "ON", "ONB", "ONON", "OPEN", "ORCL", "ORLY", "OTIS", "OVV", "OXY", "PAG", "PAYC", "PAYX", "PCAR", "PCG", "PEG", "PENN", "PEP", "PFE", "PG", "PGR", "PH", "PHM", "PII", "PKG", "PLD", "PLTR", "PM", "PNC", "PNR", "PODD", "POOL", "PPG", "PPL", "PRGO", "PSA", "PSX", "PTC", "PVH", "PWR", "PYPL", "PZZA", "QCOM", "QLYS", "QRVO", "RBLX", "RCL", "REG", "REGN", "REIT", "RELX", "RGA", "RHI", "RIO", "RIVN", "RJF", "RKT", "RL", "RMD", "RNR", "ROL", "ROP", "ROST", "RRC", "RS", "RSG", "RTX", "RVLV", "RXO", "RYAN", "SAIC", "SBAC", "SBUX", "SCI", "SEE", "SHAK", "SJM", "SLB", "SLGN", "SMCI", "SNA", "SNPS", "SO", "SPG", "SPGI", "SRE", "STE", "STLD", "STT", "STX", "STZ", "SWK", "SWKS", "SYF", "SYK", "SYY", "T", "TAP", "TCBI", "TCOM", "TDG", "TDOC", "TDY", "TEAM", "TECH", "TEL", "TENB", "TER", "TFC", "TFX", "TGT", "TJX", "TKO", "TMO", "TNDM", "TOL", "TOST", "TPG", "TRGP", "TRV", "TSCO", "TSLA", "TSN", "TT", "TTD", "TTWO", "TXN", "TXT", "TYL", "U", "UAL", "UDR", "UHS", "ULTA", "UNH", "UNP", "UPS", "URI", "USB", "USFD", "UTHR", "UWMC", "V", "VALE", "VEEV", "VFC", "VICI", "VLO", "VMC", "VMI", "VNO", "VNT", "VOD", "VRM", "VRNS", "VRSK", "VRSN", "VRTX", "VSAT", "VST", "VTR", "VTRS", "VTYX", "VZ", "W", "WAB", "WAL", "WAT", "WBD", "WBS", "WCC", "WDAY", "WDC", "WEC", "WELL", "WEN", "WEX", "WFC", "WHR", "WING", "WLK", "WM", "WMB", "WMT", "WOLF", "WOOF", "WOR", "WPC", "WRB", "WSM", "WSO", "WTFC", "WTM", "WTRG", "WTS", "WWD", "WY", "WYNN", "XEL", "XOM", "XPO", "XYL", "YELP", "YETI", "YUM", "Z", "ZBH", "ZBRA", "ZION", "ZM", "ZS", "ZTS", "ZWS"], "period": "1y"}

Binary file not shown.

77
scripts/prefetch_ohlcv.py Normal file
View File

@ -0,0 +1,77 @@
#!/usr/bin/env python3
"""Nightly OHLCV prefetch — populates the shared cache for all scanners.
Run nightly at 01:00 UTC (before discovery at 12:30 UTC) so scanners read
from disk instead of hitting yfinance at run time.
First run: downloads 1y of history for the full ticker universe (~592 tickers).
Subsequent runs: appends only the new trading day's bars (incremental update).
Usage:
python scripts/prefetch_ohlcv.py
python scripts/prefetch_ohlcv.py --period 6mo # shorter initial window
"""
import argparse
import os
import sys
import time
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))
from tradingagents.dataflows.data_cache.ohlcv_cache import download_ohlcv_cached
from tradingagents.dataflows.universe import load_universe
from tradingagents.default_config import DEFAULT_CONFIG
def main():
parser = argparse.ArgumentParser(description="Prefetch OHLCV data for the ticker universe")
parser.add_argument(
"--period",
default="1y",
help="History window for initial download (default: 1y). Incremental runs ignore this.",
)
parser.add_argument(
"--cache-dir",
default=str(ROOT / "data" / "ohlcv_cache"),
help="Directory to store parquet cache files",
)
args = parser.parse_args()
tickers = load_universe(DEFAULT_CONFIG)
if not tickers:
print("ERROR: No tickers loaded — check data/tickers.txt", flush=True)
sys.exit(1)
print(f"Prefetching OHLCV for {len(tickers)} tickers (period={args.period})...", flush=True)
print(f"Cache dir: {args.cache_dir}", flush=True)
start = time.time()
data = download_ohlcv_cached(
tickers=tickers,
period=args.period,
cache_dir=args.cache_dir,
)
elapsed = time.time() - start
# Summary
n_tickers = len(data)
total_rows = sum(len(df) for df in data.values())
cache_size_mb = sum(
p.stat().st_size for p in Path(args.cache_dir).glob("*.parquet")
) / 1024 / 1024
print(f"\nDone in {elapsed:.1f}s", flush=True)
print(f" Tickers cached : {n_tickers}/{len(tickers)}", flush=True)
print(f" Total rows : {total_rows:,}", flush=True)
print(f" Cache size : {cache_size_mb:.1f} MB", flush=True)
missing = set(tickers) - set(data.keys())
if missing:
print(f" Missing tickers: {len(missing)} (delisted or no data)", flush=True)
if __name__ == "__main__":
main()

View File

@ -14,33 +14,14 @@ from typing import Any, Dict, List, Optional
import pandas as pd
from tradingagents.dataflows.data_cache.ohlcv_cache import download_ohlcv_cached
from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner
from tradingagents.dataflows.discovery.utils import Priority
from tradingagents.dataflows.universe import load_universe
from tradingagents.utils.logger import get_logger
logger = get_logger(__name__)
DEFAULT_TICKER_FILE = "data/tickers.txt"
def _load_tickers_from_file(path: str) -> List[str]:
"""Load ticker symbols from a text file."""
try:
with open(path) as f:
tickers = [
line.strip().upper()
for line in f
if line.strip() and not line.strip().startswith("#")
]
if tickers:
logger.info(f"52w-high scanner: loaded {len(tickers)} tickers from {path}")
return tickers
except FileNotFoundError:
logger.warning(f"Ticker file not found: {path}")
except Exception as e:
logger.warning(f"Failed to load ticker file {path}: {e}")
return []
class High52wBreakoutScanner(BaseScanner):
"""Scan for stocks making volume-confirmed new 52-week high crossings.
@ -59,10 +40,6 @@ class High52wBreakoutScanner(BaseScanner):
def __init__(self, config: Dict[str, Any]):
super().__init__(config)
self.ticker_file = self.scanner_config.get(
"ticker_file",
config.get("tickers_file", DEFAULT_TICKER_FILE),
)
self.max_tickers = self.scanner_config.get("max_tickers", 150)
# Academic threshold: 1.5x eliminates 63% of false signals
self.min_volume_multiple = self.scanner_config.get("min_volume_multiple", 1.5)
@ -80,34 +57,25 @@ class High52wBreakoutScanner(BaseScanner):
logger.info("🏔️ Scanning for 52-week high breakouts...")
tickers = _load_tickers_from_file(self.ticker_file)
tickers = load_universe(self.config)
if not tickers:
logger.warning("No tickers loaded for 52w-high breakout scan")
return []
tickers = tickers[: self.max_tickers]
from tradingagents.dataflows.y_finance import download_history
cache_dir = self.config.get("discovery", {}).get("ohlcv_cache_dir", "data/ohlcv_cache")
logger.info(f"Loading OHLCV for {len(tickers)} tickers from cache...")
data = download_ohlcv_cached(tickers, period="1y", cache_dir=cache_dir)
try:
data = download_history(
tickers,
period="1y",
interval="1d",
auto_adjust=True,
progress=False,
)
except Exception as e:
logger.error(f"Batch download failed: {e}")
return []
if data is None or data.empty:
if not data:
return []
candidates = []
for ticker in tickers:
result = self._check_52w_breakout(ticker, data)
for ticker, df in data.items():
result = self._check_52w_breakout_df(df)
if result:
result["ticker"] = ticker
candidates.append(result)
# Sort by strongest signal: fresh critical first, then by volume multiple
@ -119,18 +87,10 @@ class High52wBreakoutScanner(BaseScanner):
logger.info(f"52-week high breakouts: {len(candidates)} candidates")
return candidates
def _check_52w_breakout(
self, ticker: str, data: pd.DataFrame
) -> Optional[Dict[str, Any]]:
"""Check if ticker is making a new 52-week high with volume confirmation."""
def _check_52w_breakout_df(self, df: pd.DataFrame) -> Optional[Dict[str, Any]]:
"""Check if a pre-extracted ticker DataFrame is making a new 52-week high with volume confirmation."""
try:
# Extract single-ticker series from multi-ticker download
if isinstance(data.columns, pd.MultiIndex):
if ticker not in data.columns.get_level_values(1):
return None
df = data.xs(ticker, axis=1, level=1).dropna()
else:
df = data.dropna()
df = df.dropna()
# Need at least 260 days for a proper 52-week window
min_rows = self.vol_avg_days + self.freshness_days + 5
@ -195,7 +155,6 @@ class High52wBreakoutScanner(BaseScanner):
context += " | Fresh crossing — first time at new high this week"
return {
"ticker": ticker,
"source": self.name,
"context": context,
"priority": priority,
@ -207,7 +166,7 @@ class High52wBreakoutScanner(BaseScanner):
}
except Exception as e:
logger.debug(f"52w-high check failed for {ticker}: {e}")
logger.debug(f"52w-high check failed: {e}")
return None

View File

@ -11,33 +11,14 @@ from typing import Any, Dict, List, Optional, Tuple
import pandas as pd
from tradingagents.dataflows.data_cache.ohlcv_cache import download_ohlcv_cached
from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner
from tradingagents.dataflows.discovery.utils import Priority
from tradingagents.dataflows.universe import load_universe
from tradingagents.utils.logger import get_logger
logger = get_logger(__name__)
DEFAULT_TICKER_FILE = "data/tickers.txt"
def _load_tickers_from_file(path: str) -> List[str]:
"""Load ticker symbols from a text file."""
try:
with open(path) as f:
tickers = [
line.strip().upper()
for line in f
if line.strip() and not line.strip().startswith("#")
]
if tickers:
logger.info(f"Minervini scanner: loaded {len(tickers)} tickers from {path}")
return tickers
except FileNotFoundError:
logger.warning(f"Ticker file not found: {path}")
except Exception as e:
logger.warning(f"Failed to load ticker file {path}: {e}")
return []
class MinerviniScanner(BaseScanner):
"""Scan for stocks in a confirmed Minervini Stage 2 uptrend.
@ -56,10 +37,6 @@ class MinerviniScanner(BaseScanner):
def __init__(self, config: Dict[str, Any]):
super().__init__(config)
self.ticker_file = self.scanner_config.get(
"ticker_file",
config.get("tickers_file", DEFAULT_TICKER_FILE),
)
self.min_rs_rating = self.scanner_config.get("min_rs_rating", 70)
self.lookback_period = self.scanner_config.get("lookback_period", "1y")
self.sma_200_slope_days = self.scanner_config.get("sma_200_slope_days", 20)
@ -73,7 +50,7 @@ class MinerviniScanner(BaseScanner):
logger.info("📊 Scanning for Minervini Stage 2 uptrends...")
tickers = _load_tickers_from_file(self.ticker_file)
tickers = load_universe(self.config)
if not tickers:
logger.warning("No tickers loaded for Minervini scan")
return []
@ -82,24 +59,12 @@ class MinerviniScanner(BaseScanner):
logger.info(f"Limiting Minervini scan to {self.max_tickers}/{len(tickers)} tickers")
tickers = tickers[: self.max_tickers]
# Batch download OHLCV — 1y needed for SMA200
import yfinance as yf
cache_dir = self.config.get("discovery", {}).get("ohlcv_cache_dir", "data/ohlcv_cache")
logger.info(f"Loading OHLCV for {len(tickers)} tickers from cache ({self.lookback_period})...")
data = download_ohlcv_cached(tickers, period=self.lookback_period, cache_dir=cache_dir)
try:
logger.info(f"Batch-downloading {len(tickers)} tickers ({self.lookback_period})...")
raw = yf.download(
tickers,
period=self.lookback_period,
interval="1d",
auto_adjust=True,
progress=False,
)
except Exception as e:
logger.error(f"Batch download failed: {e}")
return []
if raw is None or raw.empty:
logger.warning("Minervini scanner: batch download returned empty data")
if not data:
logger.warning("Minervini scanner: no OHLCV data available")
return []
# Compute 12-month returns for RS Rating (need all tickers' data)
@ -107,10 +72,12 @@ class MinerviniScanner(BaseScanner):
passing_tickers: List[Tuple[str, Dict[str, Any]]] = []
for ticker in tickers:
result = self._check_minervini(ticker, raw)
df = data.get(ticker)
if df is None or df.empty:
continue
result = self._check_minervini_df(df)
if result is not None:
ticker_df, metrics = result
# Compute 12-month cumulative return for RS rating
ret = self._compute_return(ticker_df)
if ret is not None:
universe_returns[ticker] = ret
@ -119,13 +86,10 @@ class MinerviniScanner(BaseScanner):
# Also compute returns for tickers that DIDN'T pass (for RS percentile ranking)
for ticker in tickers:
if ticker not in universe_returns:
df = data.get(ticker)
if df is None or df.empty:
continue
try:
if isinstance(raw.columns, pd.MultiIndex):
if ticker not in raw.columns.get_level_values(1):
continue
df = raw.xs(ticker, axis=1, level=1).dropna()
else:
df = raw.dropna()
ret = self._compute_return(df)
if ret is not None:
universe_returns[ticker] = ret
@ -197,21 +161,15 @@ class MinerviniScanner(BaseScanner):
)
return candidates
def _check_minervini(
self, ticker: str, raw: pd.DataFrame
def _check_minervini_df(
self, df: pd.DataFrame
) -> Optional[Tuple[pd.DataFrame, Dict[str, Any]]]:
"""Apply the 6-condition Minervini trend template to one ticker.
"""Apply the 6-condition Minervini trend template to a pre-extracted ticker DataFrame.
Returns (df, metrics) if all conditions pass, None otherwise.
"""
try:
# Extract single-ticker slice
if isinstance(raw.columns, pd.MultiIndex):
if ticker not in raw.columns.get_level_values(1):
return None
df = raw.xs(ticker, axis=1, level=1).dropna()
else:
df = raw.dropna()
df = df.dropna()
# Need at least 200 rows for SMA200
if len(df) < 200:

View File

@ -11,32 +11,11 @@ import pandas as pd
from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner
from tradingagents.dataflows.discovery.utils import Priority
from tradingagents.dataflows.universe import load_universe
from tradingagents.utils.logger import get_logger
logger = get_logger(__name__)
# Default ticker file path (relative to project root)
DEFAULT_TICKER_FILE = "data/tickers.txt"
def _load_tickers_from_file(path: str) -> List[str]:
"""Load ticker symbols from a text file (one per line, # comments allowed)."""
try:
with open(path) as f:
tickers = [
line.strip().upper()
for line in f
if line.strip() and not line.strip().startswith("#")
]
if tickers:
logger.info(f"ML scanner: loaded {len(tickers)} tickers from {path}")
return tickers
except FileNotFoundError:
logger.warning(f"Ticker file not found: {path}")
except Exception as e:
logger.warning(f"Failed to load ticker file {path}: {e}")
return []
class MLSignalScanner(BaseScanner):
"""Scan a ticker universe for high ML win-probability setups.
@ -64,17 +43,13 @@ class MLSignalScanner(BaseScanner):
self.max_workers = self.scanner_config.get("max_workers", 8)
self.fetch_market_cap = self.scanner_config.get("fetch_market_cap", False)
# Load universe: config list > config file > default tickers file
# Load universe: explicit config list overrides the shared universe file
if "ticker_universe" in self.scanner_config:
self.universe = self.scanner_config["ticker_universe"]
else:
ticker_file = self.scanner_config.get(
"ticker_file",
config.get("tickers_file", DEFAULT_TICKER_FILE),
)
self.universe = _load_tickers_from_file(ticker_file)
self.universe = load_universe(config)
if not self.universe:
logger.warning(f"No tickers loaded from {ticker_file} scanner will be empty")
logger.warning("No tickers loaded — ML scanner will be empty")
def scan(self, state: Dict[str, Any]) -> List[Dict[str, Any]]:
if not self.is_enabled():

View File

@ -10,32 +10,12 @@ from typing import Any, Dict, List, Optional
from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner
from tradingagents.dataflows.discovery.utils import Priority
from tradingagents.dataflows.universe import load_universe
from tradingagents.dataflows.y_finance import get_option_chain, get_ticker_options
from tradingagents.utils.logger import get_logger
logger = get_logger(__name__)
DEFAULT_TICKER_FILE = "data/tickers.txt"
def _load_tickers_from_file(path: str) -> List[str]:
"""Load ticker symbols from a text file (one per line, # comments allowed)."""
try:
with open(path) as f:
tickers = [
line.strip().upper()
for line in f
if line.strip() and not line.strip().startswith("#")
]
if tickers:
logger.info(f"Options scanner: loaded {len(tickers)} tickers from {path}")
return tickers
except FileNotFoundError:
logger.warning(f"Ticker file not found: {path}")
except Exception as e:
logger.warning(f"Failed to load ticker file {path}: {e}")
return []
class OptionsFlowScanner(BaseScanner):
"""Scan for unusual options activity across a ticker universe."""
@ -52,15 +32,11 @@ class OptionsFlowScanner(BaseScanner):
self.max_tickers = self.scanner_config.get("max_tickers", 150)
self.max_workers = self.scanner_config.get("max_workers", 8)
# Load universe: explicit list > ticker_file > default file
# Load universe: explicit config list overrides the shared universe file
if "ticker_universe" in self.scanner_config:
self.ticker_universe = self.scanner_config["ticker_universe"]
else:
ticker_file = self.scanner_config.get(
"ticker_file",
config.get("tickers_file", DEFAULT_TICKER_FILE),
)
self.ticker_universe = _load_tickers_from_file(ticker_file)
self.ticker_universe = load_universe(config)
if not self.ticker_universe:
logger.warning("No tickers loaded — options scanner will be empty")

View File

@ -6,6 +6,7 @@ import pandas as pd
from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner
from tradingagents.dataflows.discovery.utils import Priority
from tradingagents.dataflows.universe import load_universe
from tradingagents.utils.logger import get_logger
logger = get_logger(__name__)
@ -25,22 +26,6 @@ SECTOR_ETFS = {
"XLC": "Communication Services",
}
DEFAULT_TICKER_FILE = "data/tickers.txt"
def _load_tickers_from_file(path: str) -> List[str]:
"""Load ticker symbols from a text file."""
try:
with open(path) as f:
return [
line.strip().upper()
for line in f
if line.strip() and not line.strip().startswith("#")
]
except Exception:
return []
class SectorRotationScanner(BaseScanner):
"""Detect sector momentum shifts and find laggards in accelerating sectors."""
@ -50,10 +35,6 @@ class SectorRotationScanner(BaseScanner):
def __init__(self, config: Dict[str, Any]):
super().__init__(config)
self.ticker_file = self.scanner_config.get(
"ticker_file",
config.get("tickers_file", DEFAULT_TICKER_FILE),
)
self.max_tickers = self.scanner_config.get("max_tickers", 100)
self.min_sector_accel = self.scanner_config.get("min_sector_acceleration", 2.0)
@ -89,7 +70,7 @@ class SectorRotationScanner(BaseScanner):
# Step 2: Batch-download 5-day close prices for all candidate tickers at once.
# This replaces the previous serial get_ticker_info() + download_history() loop
# which made up to max_tickers individual HTTP requests and would time out.
tickers = _load_tickers_from_file(self.ticker_file)
tickers = load_universe(self.config)
if not tickers:
return []

View File

@ -4,33 +4,14 @@ from typing import Any, Dict, List, Optional
import pandas as pd
from tradingagents.dataflows.data_cache.ohlcv_cache import download_ohlcv_cached
from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner
from tradingagents.dataflows.discovery.utils import Priority
from tradingagents.dataflows.universe import load_universe
from tradingagents.utils.logger import get_logger
logger = get_logger(__name__)
DEFAULT_TICKER_FILE = "data/tickers.txt"
def _load_tickers_from_file(path: str) -> List[str]:
"""Load ticker symbols from a text file."""
try:
with open(path) as f:
tickers = [
line.strip().upper()
for line in f
if line.strip() and not line.strip().startswith("#")
]
if tickers:
logger.info(f"Breakout scanner: loaded {len(tickers)} tickers from {path}")
return tickers
except FileNotFoundError:
logger.warning(f"Ticker file not found: {path}")
except Exception as e:
logger.warning(f"Failed to load ticker file {path}: {e}")
return []
class TechnicalBreakoutScanner(BaseScanner):
"""Scan for volume-confirmed technical breakouts."""
@ -41,10 +22,6 @@ class TechnicalBreakoutScanner(BaseScanner):
def __init__(self, config: Dict[str, Any]):
super().__init__(config)
self.ticker_file = self.scanner_config.get(
"ticker_file",
config.get("tickers_file", DEFAULT_TICKER_FILE),
)
self.max_tickers = self.scanner_config.get("max_tickers", 150)
self.min_volume_multiple = self.scanner_config.get("min_volume_multiple", 2.0)
self.lookback_days = self.scanner_config.get("lookback_days", 20)
@ -55,34 +32,23 @@ class TechnicalBreakoutScanner(BaseScanner):
logger.info("📈 Scanning for technical breakouts...")
tickers = _load_tickers_from_file(self.ticker_file)
tickers = load_universe(self.config)
if not tickers:
logger.warning("No tickers loaded for breakout scan")
return []
tickers = tickers[: self.max_tickers]
# Batch download OHLCV
from tradingagents.dataflows.y_finance import download_history
cache_dir = self.config.get("discovery", {}).get("ohlcv_cache_dir", "data/ohlcv_cache")
logger.info(f"Loading OHLCV for {len(tickers)} tickers from cache (3mo)...")
data = download_ohlcv_cached(tickers, period="3mo", cache_dir=cache_dir)
try:
data = download_history(
tickers,
period="3mo",
interval="1d",
auto_adjust=True,
progress=False,
)
except Exception as e:
logger.error(f"Batch download failed: {e}")
return []
if data is None or data.empty:
if not data:
return []
candidates = []
for ticker in tickers:
result = self._check_breakout(ticker, data)
for ticker, df in data.items():
result = self._check_breakout(ticker, df)
if result:
candidates.append(result)
@ -92,16 +58,10 @@ class TechnicalBreakoutScanner(BaseScanner):
logger.info(f"Technical breakouts: {len(candidates)} candidates")
return candidates
def _check_breakout(self, ticker: str, data: pd.DataFrame) -> Optional[Dict[str, Any]]:
def _check_breakout(self, ticker: str, df: pd.DataFrame) -> Optional[Dict[str, Any]]:
"""Check if ticker has a volume-confirmed breakout."""
try:
# Extract single-ticker data from multi-ticker download
if isinstance(data.columns, pd.MultiIndex):
if ticker not in data.columns.get_level_values(1):
return None
df = data.xs(ticker, axis=1, level=1).dropna()
else:
df = data.dropna()
df = df.dropna()
if len(df) < self.lookback_days + 5:
return None

View File

@ -0,0 +1,66 @@
"""Ticker universe — single source of truth.
All scanners that need a list of tickers should call load_universe(config).
Do NOT hardcode "data/tickers.txt" in scanner files import this module instead.
Priority order:
1. config["discovery"]["universe"] explicit list (tests / overrides)
2. config["tickers_file"] path from top-level config
3. Default: data/tickers.txt resolved relative to repo root
"""
from pathlib import Path
from typing import Any, Dict, List, Optional
from tradingagents.utils.logger import get_logger
logger = get_logger(__name__)
# Resolved once at import time — works regardless of cwd
_REPO_ROOT = Path(__file__).resolve().parent.parent.parent
DEFAULT_TICKERS_FILE = str(_REPO_ROOT / "data" / "tickers.txt")
def load_universe(config: Optional[Dict[str, Any]] = None) -> List[str]:
"""Return the full ticker universe as a list of uppercase strings.
Args:
config: Top-level app config dict. If None, falls back to default file.
Returns:
Deduplicated list of ticker symbols in the order they appear in the file.
"""
cfg = config or {}
# 1. Explicit list in config (useful for tests or targeted overrides)
explicit = cfg.get("discovery", {}).get("universe")
if explicit:
tickers = [t.strip().upper() for t in explicit if t.strip()]
logger.info(f"Universe: {len(tickers)} tickers from config override")
return tickers
# 2. Config-specified file path, falling back to repo-relative default
file_path = cfg.get("tickers_file", DEFAULT_TICKERS_FILE)
return _load_from_file(file_path)
def _load_from_file(path: str) -> List[str]:
"""Load tickers from a text file (one per line, # comments ignored)."""
try:
with open(path) as f:
tickers = [
line.strip().upper()
for line in f
if line.strip() and not line.strip().startswith("#")
]
# Deduplicate while preserving order
seen: set = set()
unique = [t for t in tickers if not (t in seen or seen.add(t))]
logger.info(f"Universe: loaded {len(unique)} tickers from {path}")
return unique
except FileNotFoundError:
logger.warning(f"Ticker file not found: {path} — universe will be empty")
return []
except Exception as e:
logger.warning(f"Failed to load ticker file {path}: {e}")
return []

View File

@ -111,6 +111,10 @@ DEFAULT_CONFIG = {
"events": {"enabled": True, "priority": 5, "deep_dive_budget": 3},
},
# ========================================
# OHLCV CACHE (populated nightly by prefetch workflow)
# ========================================
"ohlcv_cache_dir": "data/ohlcv_cache", # Shared cache read by all OHLCV-based scanners
# ========================================
# SCANNER EXECUTION SETTINGS
# ========================================
"scanner_execution": {
@ -137,8 +141,7 @@ DEFAULT_CONFIG = {
"unusual_volume_multiple": 2.0, # Min volume/OI ratio for unusual activity
"min_premium": 25000, # Minimum premium ($) to filter noise
"min_volume": 1000, # Minimum option volume to consider
# ticker_file: path to ticker list (defaults to tickers_file from root config)
# ticker_universe: explicit list overrides ticker_file if set
# ticker_universe: explicit list overrides the shared universe (data/tickers.txt)
"max_tickers": 1000, # Max tickers to scan (from start of file)
"max_workers": 8, # Parallel option chain fetch threads
},
@ -222,8 +225,7 @@ DEFAULT_CONFIG = {
"limit": 15,
"min_win_prob": 0.35, # Minimum P(WIN) to surface as candidate
"lookback_period": "6mo", # OHLCV history to fetch (needs ~130 trading days)
# ticker_file: path to ticker list (defaults to tickers_file from root config)
# ticker_universe: explicit list overrides ticker_file if set
# ticker_universe: explicit list overrides the shared universe (data/tickers.txt)
"fetch_market_cap": False, # Skip for speed (1 NaN out of 30 features)
"max_workers": 8, # Parallel feature computation threads
},
@ -242,13 +244,13 @@ DEFAULT_CONFIG = {
"enabled": True,
"pipeline": "momentum",
"limit": 10,
"max_tickers": 150, # Cap universe for scan speed
"max_tickers": 150, # Cap universe for scan speed
"min_volume_multiple": 1.5, # Min volume vs 20d avg to confirm breakout
"vol_avg_days": 20, # Days for volume average baseline
"freshness_days": 5, # Max days since 52w high was set
"freshness_threshold": 0.97, # Price must be within X% of 52w high
"min_price": 5.0, # Filter penny stocks
"min_avg_volume": 100_000, # Min avg daily volume for liquidity
"vol_avg_days": 20, # Days for volume average baseline
"freshness_days": 5, # Max days since 52w high was set
"freshness_threshold": 0.97, # Price must be within X% of 52w high
"min_price": 5.0, # Filter penny stocks
"min_avg_volume": 100_000, # Min avg daily volume for liquidity
},
},
},