diff --git a/data/universe_cache.json b/data/universe_cache.json new file mode 100644 index 00000000..0e8a25e0 --- /dev/null +++ b/data/universe_cache.json @@ -0,0 +1 @@ +{"russell1000": {"ts": 1776209755.673307, "tickers": ["NVDA", "AAPL", "MSFT", "AMZN", "GOOGL", "AVGO", "GOOG", "META", "TSLA", "BRKB", "JPM", "LLY", "XOM", "JNJ", "WMT", "V", "MU", "NFLX", "COST", "MA", "AMD", "CAT", "ABBV", "CVX", "BAC", "HD", "PG", "LRCX", "CSCO", "GE", "AMAT", "MRK", "KO", "PLTR", "INTC", "UNH", "RTX", "WFC", "GEV", "ORCL", "GS", "PM", "LIN", "KLAC", "IBM", "C", "MCD", "PEP", "MS", "TXN", "TMO", "NEE", "VZ", "AMGN", "DIS", "TJX", "APH", "T", "AXP", "ABT", "GILD", "ADI", "BA", "ISRG", "SCHW", "ETN", "CRM", "ANET", "BLK", "PFE", "COP", "UNP", "HON", "DE", "UBER", "BKNG", "WELL", "QCOM", "LOW", "GLW", "SNDK", "SPGI", "PANW", "LMT", "NEM", "PLD", "PH", "DHR", "COF", "CB", "SYK", "WDC", "BMY", "ACN", "PGR", "VRT", "MDT", "MO", "VRTX", "MRVL", "SBUX", "CME", "MCK", "TT", "SO", "EQIX", "HWM", "DUK", "CMCSA", "INTU", "APP", "CRWD", "ADBE", "CVS", "FCX", "ICE", "BSX", "NOW", "TMUS", "CEG", "NOC", "BX", "BK", "USB", "PWR", "JCI", "WMB", "PNC", "GD", "WM", "CMI", "AMT", "MRSH", "MMM", "EMR", "HCA", "ITW", "FDX", "MAR", "SNPS", "CSX", "ORLY", "ADP", "CDNS", "CRH", "SLB", "SPOT", "SHW", "XTSLA", "REGN", "UPS", "EOG", "MDLZ", "HLT", "VLO", "MSI", "CI", "AEP", "RCL", "ROST", "ELV", "GM", "ECL", "AON", "MCO", "TDG", "CIEN", "MPC", "CL", "NSC", "LHX", "KKR", "APD", "DLR", "PCAR", "TRV", "SPG", "PSX", "WBD", "MPWR", "TFC", "SRE", "KMI", "BKR", "LITE", "CTAS", "DELL", "AZO", "COR", "DASH", "O", "TER", "NET", "KEYS", "FIX", "CTVA", "AJG", "ALL", "LNG", "AU", "ABNB", "HOOD", "AME", "COHR", "D", "TGT", "VST", "OKE", "ZTS", "AFL", "FAST", "MNST", "ETR", "EA", "TRGP", "CAH", "GWW", "CARR", "NKE", "NU", "URI", "FERG", "EXC", "FTNT", "CVNA", "ADSK", "XEL", "F", "PSA", "USD", "APO", "ROK", "WAB", "IDXX", "CMG", "YUM", "EW", "FITB", "PYPL", "BDX", "SNOW", "EBAY", "DAL", "NUE", "RSG", "CBRE", "GRMN", "AMP", "ALNY", "AIG", "MET", "MSTR", "OXY", "PEG", "ED", "KR", "VTR", "MSCI", "STT", "VMC", "PCG", "NDAQ", "MCHP", "HIG", "COIN", "ODFL", "ROP", "DHI", "MLM", "WEC", "CCI", "FANG", "TTWO", "EME", "RBLX", "SYY", "NRG", "IR", "EQT", "XYZ", "ACGL", "RKLB", "PRU", "DDOG", "MTB", "GEHC", "RMD", "KDP", "ADM", "HBAN", "A", "HPE", "IRM", "KVUE", "HAL", "OTIS", "WAT", "JBL", "KMB", "IBKR", "CCL", "INSM", "FISV", "XYL", "CBOE", "TPR", "UAL", "DTE", "FTI", "VICI", "TDY", "AEE", "ATO", "DOV", "CPRT", "EXR", "DVN", "CTSH", "NTRS", "FLEX", "PPL", "HUBB", "ON", "IQV", "HSY", "PAYX", "DOW", "Q", "EXPE", "MTD", "CFG", "CNP", "FE", "WTW", "EIX", "AXON", "CASY", "CW", "RJF", "STLD", "NTRA", "FTAI", "QSR", "CPNG", "LYV", "DG", "AWK", "WDAY", "ES", "BIIB", "LPLA", "SYF", "CINF", "PPG", "CTRA", "DXCM", "FIS", "UTHR", "OMC", "RF", "TSCO", "MKL", "TPL", "XPO", "AVB", "PHM", "HUM", "CMS", "WWD", "SBAC", "ULTA", "VEEV", "RVMD", "FICO", "STZ", "VRSK", "EQR", "WSM", "CHD", "VRSN", "EFX", "MTZ", "NI", "VLTO", "RGLD", "EXE", "DRI", "ZM", "LH", "ATI", "STE", "SW", "ASTS", "ALB", "DGX", "TROW", "CPAY", "CRS", "NVT", "BURL", "BWXT", "ALAB", "ENTG", "SOFI", "KEY", "USFD", "PFG", "FSLR", "ARES", "LDOS", "SNA", "PSTG", "DD", "DLTR", "EXPD", "NTAP", "ILMN", "BRO", "CHRW", "RDDT", "IP", "LYB", "KHC", "FWONK", "AA", "FTV", "RBA", "EVRG", "ZBH", "CHTR", "CF", "RPRX", "L", "ITT", "LNT", "MKSI", "AMCR", "MDB", "PKG", "WST", "GIS", "LEN", "RBC", "BR", "IFF", "NVR", "EL", "CNC", "LUV", "HPQ", "FCNCA", "WY", "MRNA", "TSN", "JBHT", "BAM", "TWLO", "MTSI", "BALL", "CDW", "FFIV", "RS", "LULU", "LVS", "THC", "HEIA", "FLUT", "APG", "LII", "SCCO", "ROL", "GPN", "BG", "TXT", "OVV", "VTRS", "PTC", "SGI", "EWBC", "DECK", "ESS", "TRMB", "SUI", "INVH", "NLY", "SNX", "IEX", "WPC", "CSGP", "HII", "KIM", "MDLN", "INCY", "CLH", "WCC", "JLL", "GPC", "WRB", "NDSN", "PNR", "J", "CSL", "TLN", "MAA", "LSCC", "PR", "GGG", "SSNC", "TW", "WSO", "RKT", "QXO", "RL", "COO", "AFRM", "REG", "LECO", "EG", "TRU", "RRX", "RNR", "PODD", "APA", "HST", "RPM", "TYL", "MKC", "PFGC", "VIK", "AKAM", "TOL", "RGA", "PNFP", "MAS", "DTM", "TKO", "AVY", "MLI", "DKS", "SMCI", "ROKU", "FOXA", "SOLS", "UNM", "OHI", "HAS", "FNF", "APTV", "CG", "CLX", "NYT", "GLPI", "EVR", "CCK", "ALLE", "PEN", "TOST", "HEI", "BJ", "ALGN", "ONTO", "ROIV", "RIVN", "ZS", "PNW", "NBIX", "ELS", "FHN", "EXEL", "FIVE", "WBS", "SF", "CACI", "GNRC", "GL", "LAMR", "BBY", "EQH", "DPZ", "ALLY", "BWA", "AGNC", "ELAN", "IONS", "JAZZ", "ZBRA", "SCI", "UDR", "DOC", "ARMK", "IT", "ACM", "COKE", "WTRG", "BLD", "GDDY", "SWK", "CRCL", "AIT", "JKHY", "AIZ", "MEDP", "CPT", "RVTY", "ALSN", "TXRH", "DKNG", "AR", "FLS", "WMS", "OKTA", "PINS", "EHC", "GWRE", "AES", "GMED", "HUBS", "SAIA", "DCI", "CNM", "BMRN", "GEN", "TEAM", "CNH", "AMH", "UHS", "EGP", "DT", "SOLV", "OC", "OGE", "BAH", "OSK", "RRC", "SSB", "WTFC", "DAR", "TTC", "KNX", "NTNX", "SN", "NWSA", "GME", "ZION", "TTD", "BLDR", "BPOP", "ORI", "CR", "CGNX", "ARW", "DINO", "FRT", "LFUS", "SJM", "BRX", "TECH", "DOCU", "NCLH", "BAX", "UBFUT", "IVZ", "R", "PRI", "ATR", "AFG", "AYI", "CUBE", "CRL", "SWKS", "COLB", "FDS", "ESI", "QGEN", "TIGO", "VNOM", "EMN", "HLI", "VMI", "MUSA", "CFR", "FOX", "ADC", "CAVA", "BXP", "XP", "IOT", "UGI", "WAL", "CRUS", "MTCH", "REXR", "TTEK", "NFG", "AMG", "MASI", "NNN", "MP", "SEIC", "HSIC", "LKQ", "CHRD", "AXS", "KEX", "WYNN", "MOS", "KNSL", "HALO", "AOS", "AGCO", "IDA", "AWI", "MOH", "FR", "TAP", "SSD", "AMKR", "KMX", "MANH", "MGM", "ONON", "ARE", "WFRD", "AAL", "SFM", "CE", "NOV", "AM", "DOX", "U", "INGR", "POOL", "MTDR", "STAG", "JEF", "W", "RBRK", "CAG", "EPAM", "VOYA", "BEN", "CART", "MIDD", "ECG", "QRVO", "MKTX", "CBSH", "LAD", "HXL", "EXP", "JHG", "TKR", "CAR", "OMF", "GXO", "Z", "H", "MSA", "VFC", "WH", "GTES", "FNB", "LEA", "THG", "RGEN", "RYN", "AAON", "SARO", "PCOR", "PB", "SITE", "AXTA", "PAYC", "LNC", "BROS", "STWD", "FAF", "MTG", "GFS", "LSTR", "HRL", "WEX", "ACI", "AVT", "VSNT", "CHWY", "PLNT", "LBRDK", "GAP", "CHDN", "ESAB", "LW", "HR", "G", "FCN", "BEPC", "AN", "BIO", "CELH", "SON", "CHE", "ST", "VNT", "TFX", "MHK", "WTM", "OLLI", "CZR", "LYFT", "AVTR", "ETSY", "DVA", "OZK", "RAL", "MSGS", "FND", "AS", "CLF", "M", "NXST", "RITM", "WING", "GNTX", "BSY", "OWL", "TPG", "AUR", "CROX", "BYD", "FBIN", "BC", "PRMB", "RLI", "VNO", "POST", "LPX", "UHALB", "KBR", "TNL", "EXLS", "BFAM", "MRP", "ALGM", "MAT", "MTN", "SLM", "LLYVK", "SAIC", "LOPE", "ALK", "SIRI", "MDU", "KRMN", "NVST", "TREX", "LAZ", "TEM", "OLED", "PCTY", "VVV", "BFB", "VIRT", "AMTM", "PVH", "APLS", "DBX", "MSM", "ENPH", "MORN", "HRB", "THO", "CORT", "PATH", "EPR", "UI", "BRKR", "HLNE", "ESTC", "BBWI", "CPB", "WLK", "NEU", "SHC", "CUZ", "AGO", "S", "DLB", "RYAN", "NWS", "VKTX", "BHF", "SLGN", "DRS", "PEGA", "DUOL", "COLD", "ADT", "ELF", "OLN", "FHB", "WSC", "IRDM", "KRC", "CWEN", "APPF", "JHX", "FRPT", "RNG", "YETI", "IPGP", "CCC", "BOKF", "FOUR", "CHH", "WHR", "KD", "IAC", "BILL", "PSN", "DOCS", "GPK", "HAYW", "NSA", "LOAR", "PAG", "EEFT", "SMG", "RHI", "FRHC", "HOG", "QS", "GTLB", "TDC", "ASH", "LINE", "WU", "MPT", "HIW", "ACHC", "PENN", "XRAY", "HUN", "SMMT", "PK", "OGN", "FMC", "VGNT", "DXC", "CXT", "CACC", "HHH", "FWONA", "BIRK", "RARE", "LBTYA", "KMPR", "SRPT", "BRBR", "GLOB", "ZG", "SAM", "LLYVA", "RH", "NCNO", "DDS", "CAI", "LBTYK", "SEB", "INSP", "NWL", "PRGO", "DJT", "GTM", "FLO", "COLM", "BFA", "PPC", "DV", "MAN", "CWENA", "SNDR", "SFD", "BLSH", "CNXC", "FIGR", "UAA", "CNA", "WEN", "REYN", "GLIBK", "UA", "LCID", "CLVT", "LBRDA", "TFSL", "COTY", "LENB", "SAIL", "CERT", "UWMC", "CBC", "NIQ", "INGM", "UHAL", "FRMI", "GLIBA"]}} \ No newline at end of file diff --git a/tradingagents/dataflows/data_cache/__init__.py b/tradingagents/dataflows/data_cache/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tradingagents/dataflows/universe.py b/tradingagents/dataflows/universe.py index 83e880a9..bbb781f1 100644 --- a/tradingagents/dataflows/universe.py +++ b/tradingagents/dataflows/universe.py @@ -4,11 +4,14 @@ All scanners that need a list of tickers should call load_universe(config). Do NOT hardcode "data/tickers.txt" in scanner files — import this module instead. Priority order: - 1. config["discovery"]["universe"] — explicit list (tests / overrides) - 2. config["tickers_file"] — path from top-level config - 3. Default: data/tickers.txt resolved relative to repo root + 1. config["discovery"]["universe"] — explicit list (tests / overrides) + 2. config["discovery"]["universe_source"] — dynamic index ("russell1000") + 3. config["tickers_file"] — path from top-level config + 4. Default: data/tickers.txt resolved relative to repo root """ +import json +import time from pathlib import Path from typing import Any, Dict, List, Optional @@ -19,6 +22,8 @@ logger = get_logger(__name__) # Resolved once at import time — works regardless of cwd _REPO_ROOT = Path(__file__).resolve().parent.parent.parent DEFAULT_TICKERS_FILE = str(_REPO_ROOT / "data" / "tickers.txt") +_UNIVERSE_CACHE_FILE = _REPO_ROOT / "data" / "universe_cache.json" +_CACHE_TTL_SECONDS = 7 * 24 * 3600 # refresh weekly def load_universe(config: Optional[Dict[str, Any]] = None) -> List[str]: @@ -28,7 +33,7 @@ def load_universe(config: Optional[Dict[str, Any]] = None) -> List[str]: config: Top-level app config dict. If None, falls back to default file. Returns: - Deduplicated list of ticker symbols in the order they appear in the file. + Deduplicated list of ticker symbols in the order they appear in the source. """ cfg = config or {} @@ -39,11 +44,100 @@ def load_universe(config: Optional[Dict[str, Any]] = None) -> List[str]: logger.info(f"Universe: {len(tickers)} tickers from config override") return tickers - # 2. Config-specified file path, falling back to repo-relative default + # 2. Dynamic index source + source = cfg.get("discovery", {}).get("universe_source", "") + if source == "russell1000": + tickers = _load_russell1000() + if tickers: + return tickers + logger.warning("Russell 1000 fetch failed — falling back to tickers.txt") + + # 3. Config-specified file path, falling back to repo-relative default file_path = cfg.get("tickers_file", DEFAULT_TICKERS_FILE) return _load_from_file(file_path) +def _load_russell1000() -> List[str]: + """Fetch Russell 1000 constituents from iShares IWB ETF holdings, with weekly disk cache.""" + # Return cached copy if fresh + cached = _read_universe_cache("russell1000") + if cached: + return cached + + logger.info("Fetching Russell 1000 constituents from iShares IWB holdings...") + try: + import io + import urllib.request + + import pandas as pd + + url = ( + "https://www.ishares.com/us/products/239707/ISHARES-RUSSELL-1000-ETF" + "/1467271812596.ajax?fileType=csv&fileName=IWB_holdings&dataType=fund" + ) + req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}) + with urllib.request.urlopen(req, timeout=30) as r: + content = r.read().decode("utf-8", errors="ignore") + + # iShares CSV has a few header rows before the actual data + df = pd.read_csv(io.StringIO(content), skiprows=9) + + if "Ticker" not in df.columns: + logger.warning("Could not find Ticker column in iShares IWB CSV") + return [] + + tickers = [] + for t in df["Ticker"].dropna(): + s = str(t).strip().upper().replace(".", "-") + # Valid tickers: 1-5 alpha chars, optionally one hyphen (e.g. BRK-B) + if s and len(s) <= 6 and s.replace("-", "").isalpha(): + tickers.append(s) + + # Deduplicate while preserving order (by weight — iShares sorts by weight desc) + seen: set = set() + tickers = [t for t in tickers if not (t in seen or seen.add(t))] + + if not tickers: + logger.warning("No tickers parsed from iShares IWB CSV") + return [] + + _write_universe_cache("russell1000", tickers) + logger.info(f"Universe: {len(tickers)} Russell 1000 tickers (cached)") + return tickers + + except Exception as e: + logger.warning(f"Failed to fetch Russell 1000 from iShares: {e}") + return [] + + +def _read_universe_cache(key: str) -> List[str]: + """Return cached ticker list if it exists and is within TTL.""" + try: + if not _UNIVERSE_CACHE_FILE.exists(): + return [] + data = json.loads(_UNIVERSE_CACHE_FILE.read_text()) + entry = data.get(key, {}) + if time.time() - entry.get("ts", 0) < _CACHE_TTL_SECONDS: + tickers = entry.get("tickers", []) + logger.info(f"Universe: {len(tickers)} {key} tickers (from disk cache)") + return tickers + except Exception: + pass + return [] + + +def _write_universe_cache(key: str, tickers: List[str]) -> None: + """Persist ticker list to disk cache.""" + try: + data: dict = {} + if _UNIVERSE_CACHE_FILE.exists(): + data = json.loads(_UNIVERSE_CACHE_FILE.read_text()) + data[key] = {"ts": time.time(), "tickers": tickers} + _UNIVERSE_CACHE_FILE.write_text(json.dumps(data)) + except Exception as e: + logger.debug(f"Failed to write universe cache: {e}") + + def _load_from_file(path: str) -> List[str]: """Load tickers from a text file (one per line, # comments ignored).""" try: diff --git a/tradingagents/default_config.py b/tradingagents/default_config.py index be4e0499..933e8369 100644 --- a/tradingagents/default_config.py +++ b/tradingagents/default_config.py @@ -28,6 +28,7 @@ DEFAULT_CONFIG = { "final_recommendations": 15, # Number of final opportunities to recommend "deep_dive_max_workers": 1, # Parallel workers for deep-dive analysis (1 = sequential) "discovery_mode": "hybrid", # "traditional", "semantic", or "hybrid" + "universe_source": "russell1000", # "russell1000" or "" (uses tickers_file) # Ranking context truncation "truncate_ranking_context": False, # True = truncate to save tokens, False = full context "max_news_chars": 500, # Only used if truncate_ranking_context=True