TradingAgents/tradingagents/instruments.py

import re

# Quotes commonly used for crypto trading pairs.
CRYPTO_QUOTES = {
    "USD",
    "USDT",
    "USDC",
    "BUSD",
    "DAI",
    "FDUSD",
    "TUSD",
    "BTC",
    "ETH",
    "BNB",
    "EUR",
    "JPY",
}

# Stablecoin quotes are normalized to USD for Yahoo Finance compatibility.
STABLECOIN_QUOTES = {"USDT", "USDC", "BUSD", "DAI", "FDUSD", "TUSD"}

# Popular crypto symbols used for auto-normalization when a quote is omitted.
MAJOR_CRYPTO_BASES = {
    "BTC",
    "ETH",
    "ONT",
    "SOL",
    "XRP",
    "BNB",
    "DOGE",
    "ADA",
    "TRX",
    "AVAX",
    "DOT",
    "MATIC",
    "LTC",
    "BCH",
    "LINK",
    "ATOM",
    "UNI",
    "AAVE",
    "ETC",
    "XLM",
    "NEAR",
    "FIL",
}

CONCAT_QUOTE_SUFFIXES = tuple(
    sorted(
        CRYPTO_QUOTES,
        key=len,
        reverse=True,
    )
)

EXCHANGE_SUFFIX_PATTERN = re.compile(r"^[A-Z0-9]+(?:\.[A-Z0-9]{1,5})+$")


def _normalize_quote(quote: str) -> str:
    if quote in STABLECOIN_QUOTES:
        return "USD"
    return quote


def normalize_instrument_symbol(symbol: str) -> str:
    """Normalize ticker-like input while preserving equity suffixes and crypto pairs.

    Examples:
    - " cnc.to "   -> "CNC.TO"
    - "btc-usdt"   -> "BTC-USD"
    - "eth/usdt"   -> "ETH-USD"
    - "BTCUSDT"    -> "BTC-USD"
    - "btc"        -> "BTC-USD"
    """
    normalized = symbol.strip().upper().replace(" ", "")
    if not normalized:
        return normalized

    # TradingView-like venue prefixes (e.g. BINANCE:BTCUSDT).
    if ":" in normalized and normalized.count(":") == 1:
        _, normalized = normalized.split(":", 1)

    # Preserve exchange-qualified equity symbols such as 7203.T or CNC.TO.
    if EXCHANGE_SUFFIX_PATTERN.match(normalized):
        return normalized

    # Pair formats: BTC/USDT, BTC_USDT, BTC-USD.
    pair_candidate = normalized.replace("_", "/")
    if pair_candidate.count("/") == 1:
        base, quote = pair_candidate.split("/")
        if base.isalnum() and quote.isalnum():
            return f"{base}-{_normalize_quote(quote)}"

    if normalized.count("-") == 1:
        base, quote = normalized.split("-")
        if base.isalnum() and quote.isalnum():
            return f"{base}-{_normalize_quote(quote)}"

    # Concatenated pair format: BTCUSDT, ETHUSD, SOLBTC.
    for suffix in CONCAT_QUOTE_SUFFIXES:
        if normalized.endswith(suffix) and len(normalized) > len(suffix) + 1:
            base = normalized[: -len(suffix)]
            if base.isalnum():
                return f"{base}-{_normalize_quote(suffix)}"

    # Bare major crypto symbols default to USD quote.
    if normalized in MAJOR_CRYPTO_BASES:
        return f"{normalized}-USD"

    return normalized


def is_crypto_symbol(symbol: str) -> bool:
    """Heuristic crypto detector based on normalized pair semantics."""
    normalized = normalize_instrument_symbol(symbol)
    if not normalized:
        return False

    if normalized.endswith("-USD") and normalized[:-4] in MAJOR_CRYPTO_BASES:
        return True

    if normalized.count("-") == 1 and "." not in normalized:
        base, quote = normalized.split("-")
        if base.isalnum() and quote in CRYPTO_QUOTES:
            return True

    return False


def get_asset_class(symbol: str) -> str:
    """Return 'crypto' for cryptocurrency symbols, otherwise 'equity'."""
    return "crypto" if is_crypto_symbol(symbol) else "equity"