TradingAgents/tradingagents/dataflows/trending/stock_resolver.py

539 lines
14 KiB
Python

import logging
import re
from typing import Optional
import yfinance as yf
logger = logging.getLogger(__name__)
COMPANY_TO_TICKER = {
"apple": "AAPL",
"apple inc": "AAPL",
"apple inc.": "AAPL",
"apple corporation": "AAPL",
"the iphone maker": "AAPL",
"iphone maker": "AAPL",
"microsoft": "MSFT",
"microsoft inc": "MSFT",
"microsoft inc.": "MSFT",
"microsoft corp": "MSFT",
"microsoft corp.": "MSFT",
"microsoft corporation": "MSFT",
"google": "GOOGL",
"alphabet": "GOOGL",
"alphabet inc": "GOOGL",
"alphabet inc.": "GOOGL",
"the search giant": "GOOGL",
"amazon": "AMZN",
"amazon inc": "AMZN",
"amazon inc.": "AMZN",
"amazon.com": "AMZN",
"amazon.com inc": "AMZN",
"the e-commerce giant": "AMZN",
"e-commerce giant": "AMZN",
"meta": "META",
"meta platforms": "META",
"meta platforms inc": "META",
"meta platforms inc.": "META",
"facebook": "META",
"facebook inc": "META",
"facebook inc.": "META",
"tesla": "TSLA",
"tesla inc": "TSLA",
"tesla inc.": "TSLA",
"tesla motors": "TSLA",
"ev maker tesla": "TSLA",
"nvidia": "NVDA",
"nvidia corp": "NVDA",
"nvidia corp.": "NVDA",
"nvidia corporation": "NVDA",
"berkshire hathaway": "BRK-B",
"berkshire": "BRK-B",
"jpmorgan": "JPM",
"jpmorgan chase": "JPM",
"jp morgan": "JPM",
"jp morgan chase": "JPM",
"johnson & johnson": "JNJ",
"johnson and johnson": "JNJ",
"j&j": "JNJ",
"unitedhealth": "UNH",
"unitedhealth group": "UNH",
"visa": "V",
"visa inc": "V",
"visa inc.": "V",
"procter & gamble": "PG",
"procter and gamble": "PG",
"p&g": "PG",
"mastercard": "MA",
"mastercard inc": "MA",
"mastercard inc.": "MA",
"home depot": "HD",
"the home depot": "HD",
"chevron": "CVX",
"chevron corp": "CVX",
"chevron corporation": "CVX",
"exxon": "XOM",
"exxon mobil": "XOM",
"exxonmobil": "XOM",
"pfizer": "PFE",
"pfizer inc": "PFE",
"pfizer inc.": "PFE",
"abbvie": "ABBV",
"abbvie inc": "ABBV",
"abbvie inc.": "ABBV",
"coca-cola": "KO",
"coca cola": "KO",
"coke": "KO",
"the coca-cola company": "KO",
"pepsico": "PEP",
"pepsi": "PEP",
"pepsi co": "PEP",
"costco": "COST",
"costco wholesale": "COST",
"walmart": "WMT",
"wal-mart": "WMT",
"walmart inc": "WMT",
"bank of america": "BAC",
"bofa": "BAC",
"merck": "MRK",
"merck & co": "MRK",
"merck and co": "MRK",
"eli lilly": "LLY",
"lilly": "LLY",
"eli lilly and company": "LLY",
"adobe": "ADBE",
"adobe inc": "ADBE",
"adobe inc.": "ADBE",
"adobe systems": "ADBE",
"salesforce": "CRM",
"salesforce inc": "CRM",
"salesforce.com": "CRM",
"cisco": "CSCO",
"cisco systems": "CSCO",
"cisco systems inc": "CSCO",
"netflix": "NFLX",
"netflix inc": "NFLX",
"netflix inc.": "NFLX",
"oracle": "ORCL",
"oracle corp": "ORCL",
"oracle corporation": "ORCL",
"intel": "INTC",
"intel corp": "INTC",
"intel corporation": "INTC",
"amd": "AMD",
"advanced micro devices": "AMD",
"qualcomm": "QCOM",
"qualcomm inc": "QCOM",
"qualcomm inc.": "QCOM",
"broadcom": "AVGO",
"broadcom inc": "AVGO",
"broadcom inc.": "AVGO",
"texas instruments": "TXN",
"ti": "TXN",
"disney": "DIS",
"walt disney": "DIS",
"the walt disney company": "DIS",
"walt disney company": "DIS",
"comcast": "CMCSA",
"comcast corp": "CMCSA",
"comcast corporation": "CMCSA",
"verizon": "VZ",
"verizon communications": "VZ",
"at&t": "T",
"att": "T",
"t-mobile": "TMUS",
"tmobile": "TMUS",
"t-mobile us": "TMUS",
"american express": "AXP",
"amex": "AXP",
"goldman sachs": "GS",
"goldman": "GS",
"morgan stanley": "MS",
"wells fargo": "WFC",
"wells": "WFC",
"citigroup": "C",
"citi": "C",
"citibank": "C",
"charles schwab": "SCHW",
"schwab": "SCHW",
"blackrock": "BLK",
"blackrock inc": "BLK",
"paypal": "PYPL",
"paypal holdings": "PYPL",
"paypal inc": "PYPL",
"square": "SQ",
"block": "SQ",
"block inc": "SQ",
"shopify": "SHOP",
"shopify inc": "SHOP",
"uber": "UBER",
"uber technologies": "UBER",
"lyft": "LYFT",
"lyft inc": "LYFT",
"airbnb": "ABNB",
"airbnb inc": "ABNB",
"doordash": "DASH",
"doordash inc": "DASH",
"snap": "SNAP",
"snap inc": "SNAP",
"snapchat": "SNAP",
"pinterest": "PINS",
"pinterest inc": "PINS",
"twitter": "TWTR",
"twitter inc": "TWTR",
"linkedin": "MSFT",
"zoom": "ZM",
"zoom video": "ZM",
"zoom video communications": "ZM",
"slack": "CRM",
"slack technologies": "CRM",
"palantir": "PLTR",
"palantir technologies": "PLTR",
"snowflake": "SNOW",
"snowflake inc": "SNOW",
"datadog": "DDOG",
"datadog inc": "DDOG",
"crowdstrike": "CRWD",
"crowdstrike holdings": "CRWD",
"okta": "OKTA",
"okta inc": "OKTA",
"cloudflare": "NET",
"cloudflare inc": "NET",
"mongodb": "MDB",
"mongodb inc": "MDB",
"twilio": "TWLO",
"twilio inc": "TWLO",
"servicenow": "NOW",
"servicenow inc": "NOW",
"workday": "WDAY",
"workday inc": "WDAY",
"splunk": "SPLK",
"splunk inc": "SPLK",
"vmware": "VMW",
"vmware inc": "VMW",
"ibm": "IBM",
"international business machines": "IBM",
"hp": "HPQ",
"hewlett-packard": "HPQ",
"hewlett packard": "HPQ",
"dell": "DELL",
"dell technologies": "DELL",
"lenovo": "LNVGY",
"boeing": "BA",
"boeing company": "BA",
"the boeing company": "BA",
"lockheed martin": "LMT",
"lockheed": "LMT",
"raytheon": "RTX",
"rtx": "RTX",
"general dynamics": "GD",
"northrop grumman": "NOC",
"northrop": "NOC",
"general electric": "GE",
"ge": "GE",
"honeywell": "HON",
"honeywell international": "HON",
"3m": "MMM",
"3m company": "MMM",
"caterpillar": "CAT",
"caterpillar inc": "CAT",
"deere": "DE",
"john deere": "DE",
"deere & company": "DE",
"union pacific": "UNP",
"ups": "UPS",
"united parcel service": "UPS",
"fedex": "FDX",
"federal express": "FDX",
"delta": "DAL",
"delta air lines": "DAL",
"delta airlines": "DAL",
"united airlines": "UAL",
"united": "UAL",
"american airlines": "AAL",
"southwest": "LUV",
"southwest airlines": "LUV",
"ford": "F",
"ford motor": "F",
"ford motor company": "F",
"general motors": "GM",
"gm": "GM",
"toyota": "TM",
"toyota motor": "TM",
"honda": "HMC",
"honda motor": "HMC",
"volkswagen": "VWAGY",
"vw": "VWAGY",
"ferrari": "RACE",
"rivian": "RIVN",
"rivian automotive": "RIVN",
"lucid": "LCID",
"lucid motors": "LCID",
"lucid group": "LCID",
"nio": "NIO",
"nio inc": "NIO",
"moderna": "MRNA",
"moderna inc": "MRNA",
"biontech": "BNTX",
"cvs": "CVS",
"cvs health": "CVS",
"walgreens": "WBA",
"walgreens boots alliance": "WBA",
"mckesson": "MCK",
"mckesson corp": "MCK",
"cardinal health": "CAH",
"humana": "HUM",
"humana inc": "HUM",
"cigna": "CI",
"cigna group": "CI",
"anthem": "ELV",
"elevance health": "ELV",
"starbucks": "SBUX",
"starbucks corp": "SBUX",
"starbucks corporation": "SBUX",
"mcdonalds": "MCD",
"mcdonald's": "MCD",
"chipotle": "CMG",
"chipotle mexican grill": "CMG",
"yum brands": "YUM",
"yum": "YUM",
"dominos": "DPZ",
"domino's": "DPZ",
"domino's pizza": "DPZ",
"nike": "NKE",
"nike inc": "NKE",
"adidas": "ADDYY",
"lululemon": "LULU",
"lululemon athletica": "LULU",
"target": "TGT",
"target corp": "TGT",
"target corporation": "TGT",
"dollar general": "DG",
"dollar tree": "DLTR",
"ross stores": "ROST",
"ross": "ROST",
"tjx": "TJX",
"tjx companies": "TJX",
"tj maxx": "TJX",
"lowes": "LOW",
"lowe's": "LOW",
"lowe's companies": "LOW",
"autozone": "AZO",
"o'reilly": "ORLY",
"o'reilly automotive": "ORLY",
"carmax": "KMX",
"estee lauder": "EL",
"colgate": "CL",
"colgate-palmolive": "CL",
"colgate palmolive": "CL",
"kimberly-clark": "KMB",
"kimberly clark": "KMB",
"clorox": "CLX",
"clorox company": "CLX",
"kraft heinz": "KHC",
"kraft": "KHC",
"heinz": "KHC",
"general mills": "GIS",
"kellogg": "K",
"kellogg's": "K",
"mondelez": "MDLZ",
"mondelez international": "MDLZ",
"hershey": "HSY",
"the hershey company": "HSY",
"tyson": "TSN",
"tyson foods": "TSN",
"beyond meat": "BYND",
"conagra": "CAG",
"conagra brands": "CAG",
"constellation brands": "STZ",
"anheuser-busch": "BUD",
"anheuser busch": "BUD",
"ab inbev": "BUD",
"diageo": "DEO",
"philip morris": "PM",
"philip morris international": "PM",
"altria": "MO",
"altria group": "MO",
"constellation energy": "CEG",
"nextera": "NEE",
"nextera energy": "NEE",
"duke energy": "DUK",
"southern company": "SO",
"dominion": "D",
"dominion energy": "D",
"sempra": "SRE",
"sempra energy": "SRE",
"conocophillips": "COP",
"conoco": "COP",
"schlumberger": "SLB",
"halliburton": "HAL",
"baker hughes": "BKR",
"marathon": "MPC",
"marathon petroleum": "MPC",
"valero": "VLO",
"valero energy": "VLO",
"phillips 66": "PSX",
"occidental": "OXY",
"occidental petroleum": "OXY",
"pioneer": "PXD",
"pioneer natural resources": "PXD",
"devon energy": "DVN",
"devon": "DVN",
"coinbase": "COIN",
"coinbase global": "COIN",
"robinhood": "HOOD",
"robinhood markets": "HOOD",
"sofi": "SOFI",
"sofi technologies": "SOFI",
"affirm": "AFRM",
"affirm holdings": "AFRM",
"marqeta": "MQ",
"toast": "TOST",
"toast inc": "TOST",
"docusign": "DOCU",
"docusign inc": "DOCU",
"asana": "ASAN",
"monday.com": "MNDY",
"monday": "MNDY",
"atlassian": "TEAM",
"atlassian corp": "TEAM",
"intuit": "INTU",
"intuit inc": "INTU",
"autodesk": "ADSK",
"autodesk inc": "ADSK",
"synopsys": "SNPS",
"cadence": "CDNS",
"cadence design": "CDNS",
"ansys": "ANSS",
"roper": "ROP",
"roper technologies": "ROP",
"fortinet": "FTNT",
"palo alto": "PANW",
"palo alto networks": "PANW",
"zscaler": "ZS",
"sentinelone": "S",
"veeva": "VEEV",
"veeva systems": "VEEV",
}
US_EXCHANGE_CODES = {
"NYQ",
"NMS",
"NGM",
"NCM",
"ASE",
"PCX",
"BTS",
"NYSE",
"NASDAQ",
"AMEX",
"NYS",
"NAS",
"NIM",
"NAQ",
}
SUFFIX_PATTERNS = [
r"\s+inc\.?$",
r"\s+corp\.?$",
r"\s+corporation$",
r"\s+co\.?$",
r"\s+company$",
r"\s+llc$",
r"\s+ltd\.?$",
r"\s+limited$",
r"\s+plc$",
r"\s+holdings?$",
r"\s+group$",
r"\s+technologies$",
r"\s+enterprises?$",
]
def _normalize_company_name(name: str) -> str:
normalized = name.lower().strip()
for pattern in SUFFIX_PATTERNS:
normalized = re.sub(pattern, "", normalized, flags=re.IGNORECASE)
normalized = normalized.strip()
return normalized
def _search_yfinance_ticker(company_name: str) -> Optional[str]:
try:
search_result = yf.Ticker(company_name)
info = search_result.info
if info and "symbol" in info:
return info["symbol"]
except Exception as e:
logger.debug("yfinance search failed for %s: %s", company_name, str(e))
try:
search = yf.Search(company_name, max_results=5)
if hasattr(search, "quotes") and search.quotes:
for quote in search.quotes:
if "symbol" in quote:
return quote["symbol"]
except Exception as e:
logger.debug("yfinance Search failed for %s: %s", company_name, str(e))
return None
def validate_us_ticker(ticker: str) -> bool:
try:
ticker_obj = yf.Ticker(ticker.upper())
info = ticker_obj.info
if not info:
logger.warning("Validation failed for %s: no info available", ticker)
return False
exchange = info.get("exchange", "")
if exchange in US_EXCHANGE_CODES:
return True
exchange_lower = exchange.lower()
if any(us_ex.lower() in exchange_lower for us_ex in ["nyse", "nasdaq", "amex", "nys", "nms", "ngm"]):
return True
logger.warning("Validation failed for %s: exchange %s is not a US exchange", ticker, exchange)
return False
except Exception as e:
logger.warning("Validation failed for %s: %s", ticker, str(e))
return False
def resolve_ticker(company_name: str) -> Optional[str]:
if not company_name or not company_name.strip():
return None
normalized = company_name.lower().strip()
if normalized in COMPANY_TO_TICKER:
return COMPANY_TO_TICKER[normalized]
normalized_stripped = _normalize_company_name(company_name)
if normalized_stripped in COMPANY_TO_TICKER:
return COMPANY_TO_TICKER[normalized_stripped]
if company_name.upper() in [v for v in COMPANY_TO_TICKER.values()]:
if validate_us_ticker(company_name.upper()):
return company_name.upper()
logger.info("Using yfinance fallback for company: %s", company_name)
yf_ticker = _search_yfinance_ticker(company_name)
if yf_ticker:
if validate_us_ticker(yf_ticker):
logger.info("Resolved %s to %s via yfinance", company_name, yf_ticker)
return yf_ticker
else:
logger.warning("Ticker %s for %s failed US exchange validation", yf_ticker, company_name)
return None
logger.warning("Could not resolve ticker for company: %s", company_name)
return None
def validate_tradeable(ticker: str) -> bool:
return validate_us_ticker(ticker)