import logging import re from typing import Optional import yfinance as yf logger = logging.getLogger(__name__) COMPANY_TO_TICKER = { "apple": "AAPL", "apple inc": "AAPL", "apple inc.": "AAPL", "apple corporation": "AAPL", "the iphone maker": "AAPL", "iphone maker": "AAPL", "microsoft": "MSFT", "microsoft inc": "MSFT", "microsoft inc.": "MSFT", "microsoft corp": "MSFT", "microsoft corp.": "MSFT", "microsoft corporation": "MSFT", "google": "GOOGL", "alphabet": "GOOGL", "alphabet inc": "GOOGL", "alphabet inc.": "GOOGL", "the search giant": "GOOGL", "amazon": "AMZN", "amazon inc": "AMZN", "amazon inc.": "AMZN", "amazon.com": "AMZN", "amazon.com inc": "AMZN", "the e-commerce giant": "AMZN", "e-commerce giant": "AMZN", "meta": "META", "meta platforms": "META", "meta platforms inc": "META", "meta platforms inc.": "META", "facebook": "META", "facebook inc": "META", "facebook inc.": "META", "tesla": "TSLA", "tesla inc": "TSLA", "tesla inc.": "TSLA", "tesla motors": "TSLA", "ev maker tesla": "TSLA", "nvidia": "NVDA", "nvidia corp": "NVDA", "nvidia corp.": "NVDA", "nvidia corporation": "NVDA", "berkshire hathaway": "BRK-B", "berkshire": "BRK-B", "jpmorgan": "JPM", "jpmorgan chase": "JPM", "jp morgan": "JPM", "jp morgan chase": "JPM", "johnson & johnson": "JNJ", "johnson and johnson": "JNJ", "j&j": "JNJ", "unitedhealth": "UNH", "unitedhealth group": "UNH", "visa": "V", "visa inc": "V", "visa inc.": "V", "procter & gamble": "PG", "procter and gamble": "PG", "p&g": "PG", "mastercard": "MA", "mastercard inc": "MA", "mastercard inc.": "MA", "home depot": "HD", "the home depot": "HD", "chevron": "CVX", "chevron corp": "CVX", "chevron corporation": "CVX", "exxon": "XOM", "exxon mobil": "XOM", "exxonmobil": "XOM", "pfizer": "PFE", "pfizer inc": "PFE", "pfizer inc.": "PFE", "abbvie": "ABBV", "abbvie inc": "ABBV", "abbvie inc.": "ABBV", "coca-cola": "KO", "coca cola": "KO", "coke": "KO", "the coca-cola company": "KO", "pepsico": "PEP", "pepsi": "PEP", "pepsi co": "PEP", "costco": "COST", "costco wholesale": "COST", "walmart": "WMT", "wal-mart": "WMT", "walmart inc": "WMT", "bank of america": "BAC", "bofa": "BAC", "merck": "MRK", "merck & co": "MRK", "merck and co": "MRK", "eli lilly": "LLY", "lilly": "LLY", "eli lilly and company": "LLY", "adobe": "ADBE", "adobe inc": "ADBE", "adobe inc.": "ADBE", "adobe systems": "ADBE", "salesforce": "CRM", "salesforce inc": "CRM", "salesforce.com": "CRM", "cisco": "CSCO", "cisco systems": "CSCO", "cisco systems inc": "CSCO", "netflix": "NFLX", "netflix inc": "NFLX", "netflix inc.": "NFLX", "oracle": "ORCL", "oracle corp": "ORCL", "oracle corporation": "ORCL", "intel": "INTC", "intel corp": "INTC", "intel corporation": "INTC", "amd": "AMD", "advanced micro devices": "AMD", "qualcomm": "QCOM", "qualcomm inc": "QCOM", "qualcomm inc.": "QCOM", "broadcom": "AVGO", "broadcom inc": "AVGO", "broadcom inc.": "AVGO", "texas instruments": "TXN", "ti": "TXN", "disney": "DIS", "walt disney": "DIS", "the walt disney company": "DIS", "walt disney company": "DIS", "comcast": "CMCSA", "comcast corp": "CMCSA", "comcast corporation": "CMCSA", "verizon": "VZ", "verizon communications": "VZ", "at&t": "T", "att": "T", "t-mobile": "TMUS", "tmobile": "TMUS", "t-mobile us": "TMUS", "american express": "AXP", "amex": "AXP", "goldman sachs": "GS", "goldman": "GS", "morgan stanley": "MS", "wells fargo": "WFC", "wells": "WFC", "citigroup": "C", "citi": "C", "citibank": "C", "charles schwab": "SCHW", "schwab": "SCHW", "blackrock": "BLK", "blackrock inc": "BLK", "paypal": "PYPL", "paypal holdings": "PYPL", "paypal inc": "PYPL", "square": "SQ", "block": "SQ", "block inc": "SQ", "shopify": "SHOP", "shopify inc": "SHOP", "uber": "UBER", "uber technologies": "UBER", "lyft": "LYFT", "lyft inc": "LYFT", "airbnb": "ABNB", "airbnb inc": "ABNB", "doordash": "DASH", "doordash inc": "DASH", "snap": "SNAP", "snap inc": "SNAP", "snapchat": "SNAP", "pinterest": "PINS", "pinterest inc": "PINS", "linkedin": "MSFT", "zoom": "ZM", "zoom video": "ZM", "zoom video communications": "ZM", "slack": "CRM", "slack technologies": "CRM", "palantir": "PLTR", "palantir technologies": "PLTR", "snowflake": "SNOW", "snowflake inc": "SNOW", "datadog": "DDOG", "datadog inc": "DDOG", "crowdstrike": "CRWD", "crowdstrike holdings": "CRWD", "okta": "OKTA", "okta inc": "OKTA", "cloudflare": "NET", "cloudflare inc": "NET", "mongodb": "MDB", "mongodb inc": "MDB", "twilio": "TWLO", "twilio inc": "TWLO", "servicenow": "NOW", "servicenow inc": "NOW", "workday": "WDAY", "workday inc": "WDAY", "splunk": "SPLK", "splunk inc": "SPLK", "vmware": "VMW", "vmware inc": "VMW", "ibm": "IBM", "international business machines": "IBM", "hp": "HPQ", "hewlett-packard": "HPQ", "hewlett packard": "HPQ", "dell": "DELL", "dell technologies": "DELL", "lenovo": "LNVGY", "boeing": "BA", "boeing company": "BA", "the boeing company": "BA", "lockheed martin": "LMT", "lockheed": "LMT", "raytheon": "RTX", "rtx": "RTX", "general dynamics": "GD", "northrop grumman": "NOC", "northrop": "NOC", "general electric": "GE", "ge": "GE", "honeywell": "HON", "honeywell international": "HON", "3m": "MMM", "3m company": "MMM", "caterpillar": "CAT", "caterpillar inc": "CAT", "deere": "DE", "john deere": "DE", "deere & company": "DE", "union pacific": "UNP", "ups": "UPS", "united parcel service": "UPS", "fedex": "FDX", "federal express": "FDX", "delta": "DAL", "delta air lines": "DAL", "delta airlines": "DAL", "united airlines": "UAL", "united": "UAL", "american airlines": "AAL", "southwest": "LUV", "southwest airlines": "LUV", "ford": "F", "ford motor": "F", "ford motor company": "F", "general motors": "GM", "gm": "GM", "toyota": "TM", "toyota motor": "TM", "honda": "HMC", "honda motor": "HMC", "volkswagen": "VWAGY", "vw": "VWAGY", "ferrari": "RACE", "rivian": "RIVN", "rivian automotive": "RIVN", "lucid": "LCID", "lucid motors": "LCID", "lucid group": "LCID", "nio": "NIO", "nio inc": "NIO", "moderna": "MRNA", "moderna inc": "MRNA", "biontech": "BNTX", "cvs": "CVS", "cvs health": "CVS", "walgreens": "WBA", "walgreens boots alliance": "WBA", "mckesson": "MCK", "mckesson corp": "MCK", "cardinal health": "CAH", "humana": "HUM", "humana inc": "HUM", "cigna": "CI", "cigna group": "CI", "anthem": "ELV", "elevance health": "ELV", "starbucks": "SBUX", "starbucks corp": "SBUX", "starbucks corporation": "SBUX", "mcdonalds": "MCD", "mcdonald's": "MCD", "chipotle": "CMG", "chipotle mexican grill": "CMG", "yum brands": "YUM", "yum": "YUM", "dominos": "DPZ", "domino's": "DPZ", "domino's pizza": "DPZ", "nike": "NKE", "nike inc": "NKE", "adidas": "ADDYY", "lululemon": "LULU", "lululemon athletica": "LULU", "target": "TGT", "target corp": "TGT", "target corporation": "TGT", "dollar general": "DG", "dollar tree": "DLTR", "ross stores": "ROST", "ross": "ROST", "tjx": "TJX", "tjx companies": "TJX", "tj maxx": "TJX", "lowes": "LOW", "lowe's": "LOW", "lowe's companies": "LOW", "autozone": "AZO", "o'reilly": "ORLY", "o'reilly automotive": "ORLY", "carmax": "KMX", "estee lauder": "EL", "colgate": "CL", "colgate-palmolive": "CL", "colgate palmolive": "CL", "kimberly-clark": "KMB", "kimberly clark": "KMB", "clorox": "CLX", "clorox company": "CLX", "kraft heinz": "KHC", "kraft": "KHC", "heinz": "KHC", "general mills": "GIS", "kellogg": "K", "kellogg's": "K", "mondelez": "MDLZ", "mondelez international": "MDLZ", "hershey": "HSY", "the hershey company": "HSY", "tyson": "TSN", "tyson foods": "TSN", "beyond meat": "BYND", "conagra": "CAG", "conagra brands": "CAG", "constellation brands": "STZ", "anheuser-busch": "BUD", "anheuser busch": "BUD", "ab inbev": "BUD", "diageo": "DEO", "philip morris": "PM", "philip morris international": "PM", "altria": "MO", "altria group": "MO", "constellation energy": "CEG", "nextera": "NEE", "nextera energy": "NEE", "duke energy": "DUK", "southern company": "SO", "dominion": "D", "dominion energy": "D", "sempra": "SRE", "sempra energy": "SRE", "conocophillips": "COP", "conoco": "COP", "schlumberger": "SLB", "halliburton": "HAL", "baker hughes": "BKR", "marathon": "MPC", "marathon petroleum": "MPC", "valero": "VLO", "valero energy": "VLO", "phillips 66": "PSX", "occidental": "OXY", "occidental petroleum": "OXY", "pioneer": "PXD", "pioneer natural resources": "PXD", "devon energy": "DVN", "devon": "DVN", "coinbase": "COIN", "coinbase global": "COIN", "robinhood": "HOOD", "robinhood markets": "HOOD", "sofi": "SOFI", "sofi technologies": "SOFI", "affirm": "AFRM", "affirm holdings": "AFRM", "marqeta": "MQ", "toast": "TOST", "toast inc": "TOST", "docusign": "DOCU", "docusign inc": "DOCU", "asana": "ASAN", "monday.com": "MNDY", "monday": "MNDY", "atlassian": "TEAM", "atlassian corp": "TEAM", "intuit": "INTU", "intuit inc": "INTU", "autodesk": "ADSK", "autodesk inc": "ADSK", "synopsys": "SNPS", "cadence": "CDNS", "cadence design": "CDNS", "ansys": "ANSS", "roper": "ROP", "roper technologies": "ROP", "fortinet": "FTNT", "palo alto": "PANW", "palo alto networks": "PANW", "zscaler": "ZS", "sentinelone": "S", "veeva": "VEEV", "veeva systems": "VEEV", } US_EXCHANGE_CODES = { "NYQ", "NMS", "NGM", "NCM", "ASE", "PCX", "BTS", "NYSE", "NASDAQ", "AMEX", "NYS", "NAS", "NIM", "NAQ", } SUFFIX_PATTERNS = [ r"\s+inc\.?$", r"\s+corp\.?$", r"\s+corporation$", r"\s+co\.?$", r"\s+company$", r"\s+llc$", r"\s+ltd\.?$", r"\s+limited$", r"\s+plc$", r"\s+holdings?$", r"\s+group$", r"\s+technologies$", r"\s+enterprises?$", ] def _normalize_company_name(name: str) -> str: normalized = name.lower().strip() for pattern in SUFFIX_PATTERNS: normalized = re.sub(pattern, "", normalized, flags=re.IGNORECASE) normalized = normalized.strip() return normalized def _search_yfinance_ticker(company_name: str) -> Optional[str]: try: search_result = yf.Ticker(company_name) info = search_result.info if info and "symbol" in info: return info["symbol"] except Exception as e: logger.debug("yfinance search failed for %s: %s", company_name, str(e)) try: search = yf.Search(company_name, max_results=5) if hasattr(search, "quotes") and search.quotes: for quote in search.quotes: if "symbol" in quote: return quote["symbol"] except Exception as e: logger.debug("yfinance Search failed for %s: %s", company_name, str(e)) return None def validate_us_ticker(ticker: str) -> bool: try: ticker_obj = yf.Ticker(ticker.upper()) info = ticker_obj.info if not info: logger.warning("Validation failed for %s: no info available", ticker) return False exchange = info.get("exchange", "") if exchange in US_EXCHANGE_CODES: return True exchange_lower = exchange.lower() if any(us_ex.lower() in exchange_lower for us_ex in ["nyse", "nasdaq", "amex", "nys", "nms", "ngm"]): return True logger.warning("Validation failed for %s: exchange %s is not a US exchange", ticker, exchange) return False except Exception as e: logger.warning("Validation failed for %s: %s", ticker, str(e)) return False def resolve_ticker(company_name: str) -> Optional[str]: if not company_name or not company_name.strip(): return None normalized = company_name.lower().strip() if normalized in COMPANY_TO_TICKER: return COMPANY_TO_TICKER[normalized] normalized_stripped = _normalize_company_name(company_name) if normalized_stripped in COMPANY_TO_TICKER: return COMPANY_TO_TICKER[normalized_stripped] if company_name.upper() in [v for v in COMPANY_TO_TICKER.values()]: if validate_us_ticker(company_name.upper()): return company_name.upper() logger.info("Using yfinance fallback for company: %s", company_name) yf_ticker = _search_yfinance_ticker(company_name) if yf_ticker: if validate_us_ticker(yf_ticker): logger.info("Resolved %s to %s via yfinance", company_name, yf_ticker) return yf_ticker else: logger.warning("Ticker %s for %s failed US exchange validation", yf_ticker, company_name) return None logger.warning("Could not resolve ticker for company: %s", company_name) return None def validate_tradeable(ticker: str) -> bool: return validate_us_ticker(ticker)