diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..685029ff --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Treat parquet cache files as binary — skip text diffs +data/ohlcv_cache/*.parquet binary diff --git a/.github/workflows/prefetch.yml b/.github/workflows/prefetch.yml new file mode 100644 index 00000000..8dce99b3 --- /dev/null +++ b/.github/workflows/prefetch.yml @@ -0,0 +1,51 @@ +name: Nightly OHLCV Prefetch + +on: + schedule: + # 1:00 AM UTC — runs before iterate (6:00), research (7:00), hypothesis (8:00), discovery (12:30) + - cron: "0 1 * * *" + workflow_dispatch: + inputs: + period: + description: "History window for initial download (e.g. 1y, 6mo)" + required: false + default: "1y" + +jobs: + prefetch: + runs-on: ubuntu-latest + timeout-minutes: 30 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up git identity + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: pip + + - name: Install dependencies + run: pip install -r requirements.txt + + - name: Run OHLCV prefetch + env: + PERIOD: ${{ github.event.inputs.period || '1y' }} + run: | + python scripts/prefetch_ohlcv.py --period "$PERIOD" + + - name: Commit cache updates + run: | + git add data/ohlcv_cache/ + if git diff --cached --quiet; then + echo "No cache changes to commit" + else + git commit -m "chore(cache): nightly OHLCV prefetch $(date -u +%Y-%m-%d)" + git push origin main + fi diff --git a/data/ohlcv_cache/dc926f21d12e.meta.json b/data/ohlcv_cache/dc926f21d12e.meta.json new file mode 100644 index 00000000..1f55708d --- /dev/null +++ b/data/ohlcv_cache/dc926f21d12e.meta.json @@ -0,0 +1 @@ +{"last_updated": "2026-04-14", "tickers": ["AA", "AAL", "AAP", "AAPL", "ABBV", "ABT", "ACGL", "ACN", "ADBE", "ADM", "ADP", "ADSK", "AEE", "AEP", "AES", "AFL", "AIV", "AKAM", "ALB", "ALGN", "ALK", "ALL", "AMAT", "AMD", "AME", "AMGN", "AMT", "AMZN", "ANF", "AON", "AOS", "APA", "APD", "APH", "ARE", "ATKR", "ATO", "AVB", "AVGO", "AVY", "AWK", "AXON", "AXP", "AZO", "BA", "BAC", "BAX", "BBWI", "BBY", "BEN", "BF-B", "BIIB", "BIO", "BK", "BKNG", "BKR", "BLK", "BLMN", "BMY", "BNTX", "BR", "BRK-B", "BRO", "BRT", "BRX", "BSX", "BWA", "BXP", "C", "CAG", "CAH", "CARR", "CAT", "CAVA", "CB", "CBOE", "CBRE", "CCL", "CDNS", "CE", "CEG", "CF", "CFG", "CHTR", "CI", "CINF", "CL", "CLB", "CLF", "CLH", "CLX", "CMA", "CMC", "CMCSA", "CME", "CMG", "CMI", "CMS", "CNC", "CNP", "COF", "COIN", "COMP", "COO", "COP", "COST", "CPB", "CPRT", "CPT", "CRL", "CRM", "CRWD", "CSCO", "CSGP", "CSX", "CTAS", "CTRA", "CTSH", "CTVA", "CUBE", "CURV", "CVNA", "CVS", "CVX", "CWH", "CWK", "CZR", "D", "DAL", "DD", "DDOG", "DE", "DG", "DGX", "DHI", "DHR", "DIN", "DINO", "DIS", "DKS", "DLR", "DLTR", "DOV", "DPZ", "DQ", "DRI", "DT", "DTE", "DUK", "DVA", "DVN", "DXCM", "EA", "EBAY", "ECL", "ED", "EFX", "EG", "EIX", "EL", "ELV", "EMN", "EMR", "ENPH", "ENTG", "EOG", "EPAM", "EQH", "EQIX", "EQR", "EQT", "ES", "ESS", "ESTC", "ETN", "ETR", "ETSY", "EVH", "EVRG", "EWBC", "EXAS", "EXC", "EXPD", "EXPE", "EXPI", "F", "FANG", "FAST", "FBNC", "FCNCA", "FCX", "FDS", "FDX", "FE", "FFIV", "FHI", "FIS", "FISV", "FITB", "FIVE", "FIVN", "FMC", "FNB", "FNF", "FOX", "FOXA", "FRT", "FSLR", "FTI", "FTNT", "FTV", "FWRD", "G", "GATX", "GD", "GE", "GEHC", "GEN", "GILD", "GIS", "GL", "GM", "GNRC", "GOOG", "GOOGL", "GPC", "GPN", "GRMN", "GS", "GSHD", "GTLS", "HAL", "HAS", "HBAN", "HBI", "HCA", "HD", "HIG", "HII", "HLT", "HOG", "HOLX", "HOMB", "HON", "HOOD", "HPE", "HRL", "HSIC", "HST", "HSY", "HUM", "HWM", "HXL", "IBM", "ICE", "IDXX", "IEX", "IFF", "ILMN", "INCY", "INTC", "INVH", "IP", "IPG", "IQV", "IR", "IRM", "ISRG", "IT", "IVZ", "JACK", "JBHT", "JBL", "JCI", "JKHY", "JLL", "JNJ", "JPM", "K", "KDP", "KEY", "KHC", "KIM", "KLAC", "KMB", "KMI", "KMX", "KNX", "KO", "KR", "KRC", "L", "LAD", "LAMR", "LBRDA", "LBRDK", "LCID", "LDOS", "LEN", "LFUS", "LHX", "LIN", "LLY", "LMT", "LNC", "LNT", "LPLA", "LRCX", "LUMN", "LUV", "LVS", "LYB", "LYV", "MA", "MAA", "MAR", "MAS", "MAT", "MCHP", "MCK", "MCO", "MDB", "MDLZ", "MDT", "MELI", "MET", "META", "MGM", "MHK", "MKC", "MKTX", "MLI", "MMI", "MMM", "MNST", "MO", "MOH", "MOS", "MPC", "MPWR", "MRK", "MRNA", "MRVL", "MS", "MSCI", "MSFT", "MSI", "MT", "MTB", "MTCH", "MTD", "MTRX", "MUR", "NCLH", "NDAQ", "NEE", "NEM", "NET", "NFLX", "NI", "NKE", "NOC", "NOV", "NRG", "NSC", "NTAP", "NTRS", "NUE", "NVAX", "NVDA", "NVR", "NVST", "NXPI", "O", "ODFL", "OGN", "OI", "OKTA", "OMC", "OMCL", "ON", "ONB", "ONON", "OPEN", "ORCL", "ORLY", "OTIS", "OVV", "OXY", "PAG", "PAYC", "PAYX", "PCAR", "PCG", "PEG", "PENN", "PEP", "PFE", "PG", "PGR", "PH", "PHM", "PII", "PKG", "PLD", "PLTR", "PM", "PNC", "PNR", "PODD", "POOL", "PPG", "PPL", "PRGO", "PSA", "PSX", "PTC", "PVH", "PWR", "PYPL", "PZZA", "QCOM", "QLYS", "QRVO", "RBLX", "RCL", "REG", "REGN", "REIT", "RELX", "RGA", "RHI", "RIO", "RIVN", "RJF", "RKT", "RL", "RMD", "RNR", "ROL", "ROP", "ROST", "RRC", "RS", "RSG", "RTX", "RVLV", "RXO", "RYAN", "SAIC", "SBAC", "SBUX", "SCI", "SEE", "SHAK", "SJM", "SLB", "SLGN", "SMCI", "SNA", "SNPS", "SO", "SPG", "SPGI", "SRE", "STE", "STLD", "STT", "STX", "STZ", "SWK", "SWKS", "SYF", "SYK", "SYY", "T", "TAP", "TCBI", "TCOM", "TDG", "TDOC", "TDY", "TEAM", "TECH", "TEL", "TENB", "TER", "TFC", "TFX", "TGT", "TJX", "TKO", "TMO", "TNDM", "TOL", "TOST", "TPG", "TRGP", "TRV", "TSCO", "TSLA", "TSN", "TT", "TTD", "TTWO", "TXN", "TXT", "TYL", "U", "UAL", "UDR", "UHS", "ULTA", "UNH", "UNP", "UPS", "URI", "USB", "USFD", "UTHR", "UWMC", "V", "VALE", "VEEV", "VFC", "VICI", "VLO", "VMC", "VMI", "VNO", "VNT", "VOD", "VRM", "VRNS", "VRSK", "VRSN", "VRTX", "VSAT", "VST", "VTR", "VTRS", "VTYX", "VZ", "W", "WAB", "WAL", "WAT", "WBD", "WBS", "WCC", "WDAY", "WDC", "WEC", "WELL", "WEN", "WEX", "WFC", "WHR", "WING", "WLK", "WM", "WMB", "WMT", "WOLF", "WOOF", "WOR", "WPC", "WRB", "WSM", "WSO", "WTFC", "WTM", "WTRG", "WTS", "WWD", "WY", "WYNN", "XEL", "XOM", "XPO", "XYL", "YELP", "YETI", "YUM", "Z", "ZBH", "ZBRA", "ZION", "ZM", "ZS", "ZTS", "ZWS"], "period": "1y"} \ No newline at end of file diff --git a/data/ohlcv_cache/dc926f21d12e.parquet b/data/ohlcv_cache/dc926f21d12e.parquet new file mode 100644 index 00000000..29414418 Binary files /dev/null and b/data/ohlcv_cache/dc926f21d12e.parquet differ diff --git a/scripts/prefetch_ohlcv.py b/scripts/prefetch_ohlcv.py new file mode 100644 index 00000000..69b67254 --- /dev/null +++ b/scripts/prefetch_ohlcv.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +"""Nightly OHLCV prefetch — populates the shared cache for all scanners. + +Run nightly at 01:00 UTC (before discovery at 12:30 UTC) so scanners read +from disk instead of hitting yfinance at run time. + +First run: downloads 1y of history for the full ticker universe (~592 tickers). +Subsequent runs: appends only the new trading day's bars (incremental update). + +Usage: + python scripts/prefetch_ohlcv.py + python scripts/prefetch_ohlcv.py --period 6mo # shorter initial window +""" + +import argparse +import os +import sys +import time +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(ROOT)) + +from tradingagents.dataflows.data_cache.ohlcv_cache import download_ohlcv_cached +from tradingagents.dataflows.universe import load_universe +from tradingagents.default_config import DEFAULT_CONFIG + + +def main(): + parser = argparse.ArgumentParser(description="Prefetch OHLCV data for the ticker universe") + parser.add_argument( + "--period", + default="1y", + help="History window for initial download (default: 1y). Incremental runs ignore this.", + ) + parser.add_argument( + "--cache-dir", + default=str(ROOT / "data" / "ohlcv_cache"), + help="Directory to store parquet cache files", + ) + args = parser.parse_args() + + tickers = load_universe(DEFAULT_CONFIG) + if not tickers: + print("ERROR: No tickers loaded — check data/tickers.txt", flush=True) + sys.exit(1) + + print(f"Prefetching OHLCV for {len(tickers)} tickers (period={args.period})...", flush=True) + print(f"Cache dir: {args.cache_dir}", flush=True) + + start = time.time() + data = download_ohlcv_cached( + tickers=tickers, + period=args.period, + cache_dir=args.cache_dir, + ) + elapsed = time.time() - start + + # Summary + n_tickers = len(data) + total_rows = sum(len(df) for df in data.values()) + cache_size_mb = sum( + p.stat().st_size for p in Path(args.cache_dir).glob("*.parquet") + ) / 1024 / 1024 + + print(f"\nDone in {elapsed:.1f}s", flush=True) + print(f" Tickers cached : {n_tickers}/{len(tickers)}", flush=True) + print(f" Total rows : {total_rows:,}", flush=True) + print(f" Cache size : {cache_size_mb:.1f} MB", flush=True) + + missing = set(tickers) - set(data.keys()) + if missing: + print(f" Missing tickers: {len(missing)} (delisted or no data)", flush=True) + + +if __name__ == "__main__": + main() diff --git a/tradingagents/dataflows/discovery/scanners/high_52w_breakout.py b/tradingagents/dataflows/discovery/scanners/high_52w_breakout.py index 8d15c985..f15520ff 100644 --- a/tradingagents/dataflows/discovery/scanners/high_52w_breakout.py +++ b/tradingagents/dataflows/discovery/scanners/high_52w_breakout.py @@ -14,33 +14,14 @@ from typing import Any, Dict, List, Optional import pandas as pd +from tradingagents.dataflows.data_cache.ohlcv_cache import download_ohlcv_cached from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner from tradingagents.dataflows.discovery.utils import Priority +from tradingagents.dataflows.universe import load_universe from tradingagents.utils.logger import get_logger logger = get_logger(__name__) -DEFAULT_TICKER_FILE = "data/tickers.txt" - - -def _load_tickers_from_file(path: str) -> List[str]: - """Load ticker symbols from a text file.""" - try: - with open(path) as f: - tickers = [ - line.strip().upper() - for line in f - if line.strip() and not line.strip().startswith("#") - ] - if tickers: - logger.info(f"52w-high scanner: loaded {len(tickers)} tickers from {path}") - return tickers - except FileNotFoundError: - logger.warning(f"Ticker file not found: {path}") - except Exception as e: - logger.warning(f"Failed to load ticker file {path}: {e}") - return [] - class High52wBreakoutScanner(BaseScanner): """Scan for stocks making volume-confirmed new 52-week high crossings. @@ -59,10 +40,6 @@ class High52wBreakoutScanner(BaseScanner): def __init__(self, config: Dict[str, Any]): super().__init__(config) - self.ticker_file = self.scanner_config.get( - "ticker_file", - config.get("tickers_file", DEFAULT_TICKER_FILE), - ) self.max_tickers = self.scanner_config.get("max_tickers", 150) # Academic threshold: 1.5x eliminates 63% of false signals self.min_volume_multiple = self.scanner_config.get("min_volume_multiple", 1.5) @@ -80,34 +57,25 @@ class High52wBreakoutScanner(BaseScanner): logger.info("🏔️ Scanning for 52-week high breakouts...") - tickers = _load_tickers_from_file(self.ticker_file) + tickers = load_universe(self.config) if not tickers: logger.warning("No tickers loaded for 52w-high breakout scan") return [] tickers = tickers[: self.max_tickers] - from tradingagents.dataflows.y_finance import download_history + cache_dir = self.config.get("discovery", {}).get("ohlcv_cache_dir", "data/ohlcv_cache") + logger.info(f"Loading OHLCV for {len(tickers)} tickers from cache...") + data = download_ohlcv_cached(tickers, period="1y", cache_dir=cache_dir) - try: - data = download_history( - tickers, - period="1y", - interval="1d", - auto_adjust=True, - progress=False, - ) - except Exception as e: - logger.error(f"Batch download failed: {e}") - return [] - - if data is None or data.empty: + if not data: return [] candidates = [] - for ticker in tickers: - result = self._check_52w_breakout(ticker, data) + for ticker, df in data.items(): + result = self._check_52w_breakout_df(df) if result: + result["ticker"] = ticker candidates.append(result) # Sort by strongest signal: fresh critical first, then by volume multiple @@ -119,18 +87,10 @@ class High52wBreakoutScanner(BaseScanner): logger.info(f"52-week high breakouts: {len(candidates)} candidates") return candidates - def _check_52w_breakout( - self, ticker: str, data: pd.DataFrame - ) -> Optional[Dict[str, Any]]: - """Check if ticker is making a new 52-week high with volume confirmation.""" + def _check_52w_breakout_df(self, df: pd.DataFrame) -> Optional[Dict[str, Any]]: + """Check if a pre-extracted ticker DataFrame is making a new 52-week high with volume confirmation.""" try: - # Extract single-ticker series from multi-ticker download - if isinstance(data.columns, pd.MultiIndex): - if ticker not in data.columns.get_level_values(1): - return None - df = data.xs(ticker, axis=1, level=1).dropna() - else: - df = data.dropna() + df = df.dropna() # Need at least 260 days for a proper 52-week window min_rows = self.vol_avg_days + self.freshness_days + 5 @@ -195,7 +155,6 @@ class High52wBreakoutScanner(BaseScanner): context += " | Fresh crossing — first time at new high this week" return { - "ticker": ticker, "source": self.name, "context": context, "priority": priority, @@ -207,7 +166,7 @@ class High52wBreakoutScanner(BaseScanner): } except Exception as e: - logger.debug(f"52w-high check failed for {ticker}: {e}") + logger.debug(f"52w-high check failed: {e}") return None diff --git a/tradingagents/dataflows/discovery/scanners/minervini.py b/tradingagents/dataflows/discovery/scanners/minervini.py index 470b94b4..bebcd214 100644 --- a/tradingagents/dataflows/discovery/scanners/minervini.py +++ b/tradingagents/dataflows/discovery/scanners/minervini.py @@ -11,33 +11,14 @@ from typing import Any, Dict, List, Optional, Tuple import pandas as pd +from tradingagents.dataflows.data_cache.ohlcv_cache import download_ohlcv_cached from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner from tradingagents.dataflows.discovery.utils import Priority +from tradingagents.dataflows.universe import load_universe from tradingagents.utils.logger import get_logger logger = get_logger(__name__) -DEFAULT_TICKER_FILE = "data/tickers.txt" - - -def _load_tickers_from_file(path: str) -> List[str]: - """Load ticker symbols from a text file.""" - try: - with open(path) as f: - tickers = [ - line.strip().upper() - for line in f - if line.strip() and not line.strip().startswith("#") - ] - if tickers: - logger.info(f"Minervini scanner: loaded {len(tickers)} tickers from {path}") - return tickers - except FileNotFoundError: - logger.warning(f"Ticker file not found: {path}") - except Exception as e: - logger.warning(f"Failed to load ticker file {path}: {e}") - return [] - class MinerviniScanner(BaseScanner): """Scan for stocks in a confirmed Minervini Stage 2 uptrend. @@ -56,10 +37,6 @@ class MinerviniScanner(BaseScanner): def __init__(self, config: Dict[str, Any]): super().__init__(config) - self.ticker_file = self.scanner_config.get( - "ticker_file", - config.get("tickers_file", DEFAULT_TICKER_FILE), - ) self.min_rs_rating = self.scanner_config.get("min_rs_rating", 70) self.lookback_period = self.scanner_config.get("lookback_period", "1y") self.sma_200_slope_days = self.scanner_config.get("sma_200_slope_days", 20) @@ -73,7 +50,7 @@ class MinerviniScanner(BaseScanner): logger.info("📊 Scanning for Minervini Stage 2 uptrends...") - tickers = _load_tickers_from_file(self.ticker_file) + tickers = load_universe(self.config) if not tickers: logger.warning("No tickers loaded for Minervini scan") return [] @@ -82,24 +59,12 @@ class MinerviniScanner(BaseScanner): logger.info(f"Limiting Minervini scan to {self.max_tickers}/{len(tickers)} tickers") tickers = tickers[: self.max_tickers] - # Batch download OHLCV — 1y needed for SMA200 - import yfinance as yf + cache_dir = self.config.get("discovery", {}).get("ohlcv_cache_dir", "data/ohlcv_cache") + logger.info(f"Loading OHLCV for {len(tickers)} tickers from cache ({self.lookback_period})...") + data = download_ohlcv_cached(tickers, period=self.lookback_period, cache_dir=cache_dir) - try: - logger.info(f"Batch-downloading {len(tickers)} tickers ({self.lookback_period})...") - raw = yf.download( - tickers, - period=self.lookback_period, - interval="1d", - auto_adjust=True, - progress=False, - ) - except Exception as e: - logger.error(f"Batch download failed: {e}") - return [] - - if raw is None or raw.empty: - logger.warning("Minervini scanner: batch download returned empty data") + if not data: + logger.warning("Minervini scanner: no OHLCV data available") return [] # Compute 12-month returns for RS Rating (need all tickers' data) @@ -107,10 +72,12 @@ class MinerviniScanner(BaseScanner): passing_tickers: List[Tuple[str, Dict[str, Any]]] = [] for ticker in tickers: - result = self._check_minervini(ticker, raw) + df = data.get(ticker) + if df is None or df.empty: + continue + result = self._check_minervini_df(df) if result is not None: ticker_df, metrics = result - # Compute 12-month cumulative return for RS rating ret = self._compute_return(ticker_df) if ret is not None: universe_returns[ticker] = ret @@ -119,13 +86,10 @@ class MinerviniScanner(BaseScanner): # Also compute returns for tickers that DIDN'T pass (for RS percentile ranking) for ticker in tickers: if ticker not in universe_returns: + df = data.get(ticker) + if df is None or df.empty: + continue try: - if isinstance(raw.columns, pd.MultiIndex): - if ticker not in raw.columns.get_level_values(1): - continue - df = raw.xs(ticker, axis=1, level=1).dropna() - else: - df = raw.dropna() ret = self._compute_return(df) if ret is not None: universe_returns[ticker] = ret @@ -197,21 +161,15 @@ class MinerviniScanner(BaseScanner): ) return candidates - def _check_minervini( - self, ticker: str, raw: pd.DataFrame + def _check_minervini_df( + self, df: pd.DataFrame ) -> Optional[Tuple[pd.DataFrame, Dict[str, Any]]]: - """Apply the 6-condition Minervini trend template to one ticker. + """Apply the 6-condition Minervini trend template to a pre-extracted ticker DataFrame. Returns (df, metrics) if all conditions pass, None otherwise. """ try: - # Extract single-ticker slice - if isinstance(raw.columns, pd.MultiIndex): - if ticker not in raw.columns.get_level_values(1): - return None - df = raw.xs(ticker, axis=1, level=1).dropna() - else: - df = raw.dropna() + df = df.dropna() # Need at least 200 rows for SMA200 if len(df) < 200: diff --git a/tradingagents/dataflows/discovery/scanners/ml_signal.py b/tradingagents/dataflows/discovery/scanners/ml_signal.py index 05f9be1d..9e55962e 100644 --- a/tradingagents/dataflows/discovery/scanners/ml_signal.py +++ b/tradingagents/dataflows/discovery/scanners/ml_signal.py @@ -11,32 +11,11 @@ import pandas as pd from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner from tradingagents.dataflows.discovery.utils import Priority +from tradingagents.dataflows.universe import load_universe from tradingagents.utils.logger import get_logger logger = get_logger(__name__) -# Default ticker file path (relative to project root) -DEFAULT_TICKER_FILE = "data/tickers.txt" - - -def _load_tickers_from_file(path: str) -> List[str]: - """Load ticker symbols from a text file (one per line, # comments allowed).""" - try: - with open(path) as f: - tickers = [ - line.strip().upper() - for line in f - if line.strip() and not line.strip().startswith("#") - ] - if tickers: - logger.info(f"ML scanner: loaded {len(tickers)} tickers from {path}") - return tickers - except FileNotFoundError: - logger.warning(f"Ticker file not found: {path}") - except Exception as e: - logger.warning(f"Failed to load ticker file {path}: {e}") - return [] - class MLSignalScanner(BaseScanner): """Scan a ticker universe for high ML win-probability setups. @@ -64,17 +43,13 @@ class MLSignalScanner(BaseScanner): self.max_workers = self.scanner_config.get("max_workers", 8) self.fetch_market_cap = self.scanner_config.get("fetch_market_cap", False) - # Load universe: config list > config file > default tickers file + # Load universe: explicit config list overrides the shared universe file if "ticker_universe" in self.scanner_config: self.universe = self.scanner_config["ticker_universe"] else: - ticker_file = self.scanner_config.get( - "ticker_file", - config.get("tickers_file", DEFAULT_TICKER_FILE), - ) - self.universe = _load_tickers_from_file(ticker_file) + self.universe = load_universe(config) if not self.universe: - logger.warning(f"No tickers loaded from {ticker_file} — scanner will be empty") + logger.warning("No tickers loaded — ML scanner will be empty") def scan(self, state: Dict[str, Any]) -> List[Dict[str, Any]]: if not self.is_enabled(): diff --git a/tradingagents/dataflows/discovery/scanners/options_flow.py b/tradingagents/dataflows/discovery/scanners/options_flow.py index 850789fe..5acb7ba9 100644 --- a/tradingagents/dataflows/discovery/scanners/options_flow.py +++ b/tradingagents/dataflows/discovery/scanners/options_flow.py @@ -10,32 +10,12 @@ from typing import Any, Dict, List, Optional from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner from tradingagents.dataflows.discovery.utils import Priority +from tradingagents.dataflows.universe import load_universe from tradingagents.dataflows.y_finance import get_option_chain, get_ticker_options from tradingagents.utils.logger import get_logger logger = get_logger(__name__) -DEFAULT_TICKER_FILE = "data/tickers.txt" - - -def _load_tickers_from_file(path: str) -> List[str]: - """Load ticker symbols from a text file (one per line, # comments allowed).""" - try: - with open(path) as f: - tickers = [ - line.strip().upper() - for line in f - if line.strip() and not line.strip().startswith("#") - ] - if tickers: - logger.info(f"Options scanner: loaded {len(tickers)} tickers from {path}") - return tickers - except FileNotFoundError: - logger.warning(f"Ticker file not found: {path}") - except Exception as e: - logger.warning(f"Failed to load ticker file {path}: {e}") - return [] - class OptionsFlowScanner(BaseScanner): """Scan for unusual options activity across a ticker universe.""" @@ -52,15 +32,11 @@ class OptionsFlowScanner(BaseScanner): self.max_tickers = self.scanner_config.get("max_tickers", 150) self.max_workers = self.scanner_config.get("max_workers", 8) - # Load universe: explicit list > ticker_file > default file + # Load universe: explicit config list overrides the shared universe file if "ticker_universe" in self.scanner_config: self.ticker_universe = self.scanner_config["ticker_universe"] else: - ticker_file = self.scanner_config.get( - "ticker_file", - config.get("tickers_file", DEFAULT_TICKER_FILE), - ) - self.ticker_universe = _load_tickers_from_file(ticker_file) + self.ticker_universe = load_universe(config) if not self.ticker_universe: logger.warning("No tickers loaded — options scanner will be empty") diff --git a/tradingagents/dataflows/discovery/scanners/sector_rotation.py b/tradingagents/dataflows/discovery/scanners/sector_rotation.py index c35ed9ee..f292bd93 100644 --- a/tradingagents/dataflows/discovery/scanners/sector_rotation.py +++ b/tradingagents/dataflows/discovery/scanners/sector_rotation.py @@ -6,6 +6,7 @@ import pandas as pd from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner from tradingagents.dataflows.discovery.utils import Priority +from tradingagents.dataflows.universe import load_universe from tradingagents.utils.logger import get_logger logger = get_logger(__name__) @@ -25,22 +26,6 @@ SECTOR_ETFS = { "XLC": "Communication Services", } -DEFAULT_TICKER_FILE = "data/tickers.txt" - - -def _load_tickers_from_file(path: str) -> List[str]: - """Load ticker symbols from a text file.""" - try: - with open(path) as f: - return [ - line.strip().upper() - for line in f - if line.strip() and not line.strip().startswith("#") - ] - except Exception: - return [] - - class SectorRotationScanner(BaseScanner): """Detect sector momentum shifts and find laggards in accelerating sectors.""" @@ -50,10 +35,6 @@ class SectorRotationScanner(BaseScanner): def __init__(self, config: Dict[str, Any]): super().__init__(config) - self.ticker_file = self.scanner_config.get( - "ticker_file", - config.get("tickers_file", DEFAULT_TICKER_FILE), - ) self.max_tickers = self.scanner_config.get("max_tickers", 100) self.min_sector_accel = self.scanner_config.get("min_sector_acceleration", 2.0) @@ -89,7 +70,7 @@ class SectorRotationScanner(BaseScanner): # Step 2: Batch-download 5-day close prices for all candidate tickers at once. # This replaces the previous serial get_ticker_info() + download_history() loop # which made up to max_tickers individual HTTP requests and would time out. - tickers = _load_tickers_from_file(self.ticker_file) + tickers = load_universe(self.config) if not tickers: return [] diff --git a/tradingagents/dataflows/discovery/scanners/technical_breakout.py b/tradingagents/dataflows/discovery/scanners/technical_breakout.py index 4dfa03c1..22208d03 100644 --- a/tradingagents/dataflows/discovery/scanners/technical_breakout.py +++ b/tradingagents/dataflows/discovery/scanners/technical_breakout.py @@ -4,33 +4,14 @@ from typing import Any, Dict, List, Optional import pandas as pd +from tradingagents.dataflows.data_cache.ohlcv_cache import download_ohlcv_cached from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner from tradingagents.dataflows.discovery.utils import Priority +from tradingagents.dataflows.universe import load_universe from tradingagents.utils.logger import get_logger logger = get_logger(__name__) -DEFAULT_TICKER_FILE = "data/tickers.txt" - - -def _load_tickers_from_file(path: str) -> List[str]: - """Load ticker symbols from a text file.""" - try: - with open(path) as f: - tickers = [ - line.strip().upper() - for line in f - if line.strip() and not line.strip().startswith("#") - ] - if tickers: - logger.info(f"Breakout scanner: loaded {len(tickers)} tickers from {path}") - return tickers - except FileNotFoundError: - logger.warning(f"Ticker file not found: {path}") - except Exception as e: - logger.warning(f"Failed to load ticker file {path}: {e}") - return [] - class TechnicalBreakoutScanner(BaseScanner): """Scan for volume-confirmed technical breakouts.""" @@ -41,10 +22,6 @@ class TechnicalBreakoutScanner(BaseScanner): def __init__(self, config: Dict[str, Any]): super().__init__(config) - self.ticker_file = self.scanner_config.get( - "ticker_file", - config.get("tickers_file", DEFAULT_TICKER_FILE), - ) self.max_tickers = self.scanner_config.get("max_tickers", 150) self.min_volume_multiple = self.scanner_config.get("min_volume_multiple", 2.0) self.lookback_days = self.scanner_config.get("lookback_days", 20) @@ -55,34 +32,23 @@ class TechnicalBreakoutScanner(BaseScanner): logger.info("📈 Scanning for technical breakouts...") - tickers = _load_tickers_from_file(self.ticker_file) + tickers = load_universe(self.config) if not tickers: logger.warning("No tickers loaded for breakout scan") return [] tickers = tickers[: self.max_tickers] - # Batch download OHLCV - from tradingagents.dataflows.y_finance import download_history + cache_dir = self.config.get("discovery", {}).get("ohlcv_cache_dir", "data/ohlcv_cache") + logger.info(f"Loading OHLCV for {len(tickers)} tickers from cache (3mo)...") + data = download_ohlcv_cached(tickers, period="3mo", cache_dir=cache_dir) - try: - data = download_history( - tickers, - period="3mo", - interval="1d", - auto_adjust=True, - progress=False, - ) - except Exception as e: - logger.error(f"Batch download failed: {e}") - return [] - - if data is None or data.empty: + if not data: return [] candidates = [] - for ticker in tickers: - result = self._check_breakout(ticker, data) + for ticker, df in data.items(): + result = self._check_breakout(ticker, df) if result: candidates.append(result) @@ -92,16 +58,10 @@ class TechnicalBreakoutScanner(BaseScanner): logger.info(f"Technical breakouts: {len(candidates)} candidates") return candidates - def _check_breakout(self, ticker: str, data: pd.DataFrame) -> Optional[Dict[str, Any]]: + def _check_breakout(self, ticker: str, df: pd.DataFrame) -> Optional[Dict[str, Any]]: """Check if ticker has a volume-confirmed breakout.""" try: - # Extract single-ticker data from multi-ticker download - if isinstance(data.columns, pd.MultiIndex): - if ticker not in data.columns.get_level_values(1): - return None - df = data.xs(ticker, axis=1, level=1).dropna() - else: - df = data.dropna() + df = df.dropna() if len(df) < self.lookback_days + 5: return None diff --git a/tradingagents/dataflows/universe.py b/tradingagents/dataflows/universe.py new file mode 100644 index 00000000..83e880a9 --- /dev/null +++ b/tradingagents/dataflows/universe.py @@ -0,0 +1,66 @@ +"""Ticker universe — single source of truth. + +All scanners that need a list of tickers should call load_universe(config). +Do NOT hardcode "data/tickers.txt" in scanner files — import this module instead. + +Priority order: + 1. config["discovery"]["universe"] — explicit list (tests / overrides) + 2. config["tickers_file"] — path from top-level config + 3. Default: data/tickers.txt resolved relative to repo root +""" + +from pathlib import Path +from typing import Any, Dict, List, Optional + +from tradingagents.utils.logger import get_logger + +logger = get_logger(__name__) + +# Resolved once at import time — works regardless of cwd +_REPO_ROOT = Path(__file__).resolve().parent.parent.parent +DEFAULT_TICKERS_FILE = str(_REPO_ROOT / "data" / "tickers.txt") + + +def load_universe(config: Optional[Dict[str, Any]] = None) -> List[str]: + """Return the full ticker universe as a list of uppercase strings. + + Args: + config: Top-level app config dict. If None, falls back to default file. + + Returns: + Deduplicated list of ticker symbols in the order they appear in the file. + """ + cfg = config or {} + + # 1. Explicit list in config (useful for tests or targeted overrides) + explicit = cfg.get("discovery", {}).get("universe") + if explicit: + tickers = [t.strip().upper() for t in explicit if t.strip()] + logger.info(f"Universe: {len(tickers)} tickers from config override") + return tickers + + # 2. Config-specified file path, falling back to repo-relative default + file_path = cfg.get("tickers_file", DEFAULT_TICKERS_FILE) + return _load_from_file(file_path) + + +def _load_from_file(path: str) -> List[str]: + """Load tickers from a text file (one per line, # comments ignored).""" + try: + with open(path) as f: + tickers = [ + line.strip().upper() + for line in f + if line.strip() and not line.strip().startswith("#") + ] + # Deduplicate while preserving order + seen: set = set() + unique = [t for t in tickers if not (t in seen or seen.add(t))] + logger.info(f"Universe: loaded {len(unique)} tickers from {path}") + return unique + except FileNotFoundError: + logger.warning(f"Ticker file not found: {path} — universe will be empty") + return [] + except Exception as e: + logger.warning(f"Failed to load ticker file {path}: {e}") + return [] diff --git a/tradingagents/default_config.py b/tradingagents/default_config.py index 3d2db515..be4e0499 100644 --- a/tradingagents/default_config.py +++ b/tradingagents/default_config.py @@ -111,6 +111,10 @@ DEFAULT_CONFIG = { "events": {"enabled": True, "priority": 5, "deep_dive_budget": 3}, }, # ======================================== + # OHLCV CACHE (populated nightly by prefetch workflow) + # ======================================== + "ohlcv_cache_dir": "data/ohlcv_cache", # Shared cache read by all OHLCV-based scanners + # ======================================== # SCANNER EXECUTION SETTINGS # ======================================== "scanner_execution": { @@ -137,8 +141,7 @@ DEFAULT_CONFIG = { "unusual_volume_multiple": 2.0, # Min volume/OI ratio for unusual activity "min_premium": 25000, # Minimum premium ($) to filter noise "min_volume": 1000, # Minimum option volume to consider - # ticker_file: path to ticker list (defaults to tickers_file from root config) - # ticker_universe: explicit list overrides ticker_file if set + # ticker_universe: explicit list overrides the shared universe (data/tickers.txt) "max_tickers": 1000, # Max tickers to scan (from start of file) "max_workers": 8, # Parallel option chain fetch threads }, @@ -222,8 +225,7 @@ DEFAULT_CONFIG = { "limit": 15, "min_win_prob": 0.35, # Minimum P(WIN) to surface as candidate "lookback_period": "6mo", # OHLCV history to fetch (needs ~130 trading days) - # ticker_file: path to ticker list (defaults to tickers_file from root config) - # ticker_universe: explicit list overrides ticker_file if set + # ticker_universe: explicit list overrides the shared universe (data/tickers.txt) "fetch_market_cap": False, # Skip for speed (1 NaN out of 30 features) "max_workers": 8, # Parallel feature computation threads }, @@ -242,13 +244,13 @@ DEFAULT_CONFIG = { "enabled": True, "pipeline": "momentum", "limit": 10, - "max_tickers": 150, # Cap universe for scan speed + "max_tickers": 150, # Cap universe for scan speed "min_volume_multiple": 1.5, # Min volume vs 20d avg to confirm breakout - "vol_avg_days": 20, # Days for volume average baseline - "freshness_days": 5, # Max days since 52w high was set - "freshness_threshold": 0.97, # Price must be within X% of 52w high - "min_price": 5.0, # Filter penny stocks - "min_avg_volume": 100_000, # Min avg daily volume for liquidity + "vol_avg_days": 20, # Days for volume average baseline + "freshness_days": 5, # Max days since 52w high was set + "freshness_threshold": 0.97, # Price must be within X% of 52w high + "min_price": 5.0, # Filter penny stocks + "min_avg_volume": 100_000, # Min avg daily volume for liquidity }, }, },