fix(universe): robust iShares→yfinance ticker mapping for class shares
- Filter to Asset Class == Equity only (removes UBFUT cash collateral, XTSLA money market) - Static remap for 16 known iShares compact tickers → canonical yfinance hyphenated form (BRKB→BRK-B, BFA→BF-A, HEIA→HEI-A, LENB→LEN-B, UHALB→UHAL-B, CWENA→CWEN-A, etc.) - Avoids false positives: META, NVDA, TSLA, ABNB, ZBRA, CMCSA now preserved correctly - Result: 1003 clean equity tickers from Russell 1000 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
25278cdb92
commit
0cee7b0161
File diff suppressed because one or more lines are too long
|
|
@ -86,12 +86,39 @@ def _load_russell1000() -> List[str]:
|
||||||
logger.warning("Could not find Ticker column in iShares IWB CSV")
|
logger.warning("Could not find Ticker column in iShares IWB CSV")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
# Only take equity rows — excludes cash collateral, money market, etc.
|
||||||
|
if "Asset Class" in df.columns:
|
||||||
|
df = df[df["Asset Class"].astype(str).str.strip() == "Equity"]
|
||||||
|
|
||||||
|
# iShares uses compact tickers for some dual-class shares (no hyphen).
|
||||||
|
# Map the compact form → canonical yfinance symbol.
|
||||||
|
_ISHARES_REMAP = {
|
||||||
|
"BRKB": "BRK-B",
|
||||||
|
"BFA": "BF-A",
|
||||||
|
"BFB": "BF-B",
|
||||||
|
"HEIA": "HEI-A",
|
||||||
|
"LENB": "LEN-B",
|
||||||
|
"UHALB": "UHAL-B",
|
||||||
|
"CWENA": "CWEN-A",
|
||||||
|
"FWONA": "FWON-A",
|
||||||
|
"LBTYA": "LBTY-A",
|
||||||
|
"LBTYK": "LBTY-K",
|
||||||
|
"LLYVA": "LLYV-A",
|
||||||
|
"LBRDA": "LBRD-A",
|
||||||
|
"LBRDK": "LBRD-K",
|
||||||
|
"GLIBA": "GLIB-A",
|
||||||
|
"NWSA": "NWS-A",
|
||||||
|
"FOXA": "FOX-A",
|
||||||
|
}
|
||||||
|
|
||||||
tickers = []
|
tickers = []
|
||||||
for t in df["Ticker"].dropna():
|
for t in df["Ticker"].dropna():
|
||||||
s = str(t).strip().upper().replace(".", "-")
|
s = str(t).strip().upper().replace(".", "-")
|
||||||
# Valid tickers: 1-5 alpha chars, optionally one hyphen (e.g. BRK-B)
|
# Valid tickers: 1-6 alpha chars only
|
||||||
if s and len(s) <= 6 and s.replace("-", "").isalpha():
|
if not (s and len(s) <= 7 and s.replace("-", "").isalpha()):
|
||||||
tickers.append(s)
|
continue
|
||||||
|
s = _ISHARES_REMAP.get(s, s)
|
||||||
|
tickers.append(s)
|
||||||
|
|
||||||
# Deduplicate while preserving order (by weight — iShares sorts by weight desc)
|
# Deduplicate while preserving order (by weight — iShares sorts by weight desc)
|
||||||
seen: set = set()
|
seen: set = set()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue