133 lines
3.3 KiB
Python
133 lines
3.3 KiB
Python
import re
|
|
|
|
# Quotes commonly used for crypto trading pairs.
|
|
CRYPTO_QUOTES = {
|
|
"USD",
|
|
"USDT",
|
|
"USDC",
|
|
"BUSD",
|
|
"DAI",
|
|
"FDUSD",
|
|
"TUSD",
|
|
"BTC",
|
|
"ETH",
|
|
"BNB",
|
|
"EUR",
|
|
"JPY",
|
|
}
|
|
|
|
# Stablecoin quotes are normalized to USD for Yahoo Finance compatibility.
|
|
STABLECOIN_QUOTES = {"USDT", "USDC", "BUSD", "DAI", "FDUSD", "TUSD"}
|
|
|
|
# Popular crypto symbols used for auto-normalization when a quote is omitted.
|
|
MAJOR_CRYPTO_BASES = {
|
|
"BTC",
|
|
"ETH",
|
|
"ONT",
|
|
"SOL",
|
|
"XRP",
|
|
"BNB",
|
|
"DOGE",
|
|
"ADA",
|
|
"TRX",
|
|
"AVAX",
|
|
"DOT",
|
|
"MATIC",
|
|
"LTC",
|
|
"BCH",
|
|
"LINK",
|
|
"ATOM",
|
|
"UNI",
|
|
"AAVE",
|
|
"ETC",
|
|
"XLM",
|
|
"NEAR",
|
|
"FIL",
|
|
}
|
|
|
|
CONCAT_QUOTE_SUFFIXES = tuple(
|
|
sorted(
|
|
CRYPTO_QUOTES,
|
|
key=len,
|
|
reverse=True,
|
|
)
|
|
)
|
|
|
|
EXCHANGE_SUFFIX_PATTERN = re.compile(r"^[A-Z0-9]+(?:\.[A-Z0-9]{1,5})+$")
|
|
|
|
|
|
def _normalize_quote(quote: str) -> str:
|
|
if quote in STABLECOIN_QUOTES:
|
|
return "USD"
|
|
return quote
|
|
|
|
|
|
def normalize_instrument_symbol(symbol: str) -> str:
|
|
"""Normalize ticker-like input while preserving equity suffixes and crypto pairs.
|
|
|
|
Examples:
|
|
- " cnc.to " -> "CNC.TO"
|
|
- "btc-usdt" -> "BTC-USD"
|
|
- "eth/usdt" -> "ETH-USD"
|
|
- "BTCUSDT" -> "BTC-USD"
|
|
- "btc" -> "BTC-USD"
|
|
"""
|
|
normalized = symbol.strip().upper().replace(" ", "")
|
|
if not normalized:
|
|
return normalized
|
|
|
|
# TradingView-like venue prefixes (e.g. BINANCE:BTCUSDT).
|
|
if ":" in normalized and normalized.count(":") == 1:
|
|
_, normalized = normalized.split(":", 1)
|
|
|
|
# Preserve exchange-qualified equity symbols such as 7203.T or CNC.TO.
|
|
if EXCHANGE_SUFFIX_PATTERN.match(normalized):
|
|
return normalized
|
|
|
|
# Pair formats: BTC/USDT, BTC_USDT, BTC-USD.
|
|
pair_candidate = normalized.replace("_", "/")
|
|
if pair_candidate.count("/") == 1:
|
|
base, quote = pair_candidate.split("/")
|
|
if base.isalnum() and quote.isalnum():
|
|
return f"{base}-{_normalize_quote(quote)}"
|
|
|
|
if normalized.count("-") == 1:
|
|
base, quote = normalized.split("-")
|
|
if base.isalnum() and quote.isalnum():
|
|
return f"{base}-{_normalize_quote(quote)}"
|
|
|
|
# Concatenated pair format: BTCUSDT, ETHUSD, SOLBTC.
|
|
for suffix in CONCAT_QUOTE_SUFFIXES:
|
|
if normalized.endswith(suffix) and len(normalized) > len(suffix) + 1:
|
|
base = normalized[: -len(suffix)]
|
|
if base.isalnum():
|
|
return f"{base}-{_normalize_quote(suffix)}"
|
|
|
|
# Bare major crypto symbols default to USD quote.
|
|
if normalized in MAJOR_CRYPTO_BASES:
|
|
return f"{normalized}-USD"
|
|
|
|
return normalized
|
|
|
|
|
|
def is_crypto_symbol(symbol: str) -> bool:
|
|
"""Heuristic crypto detector based on normalized pair semantics."""
|
|
normalized = normalize_instrument_symbol(symbol)
|
|
if not normalized:
|
|
return False
|
|
|
|
if normalized.endswith("-USD") and normalized[:-4] in MAJOR_CRYPTO_BASES:
|
|
return True
|
|
|
|
if normalized.count("-") == 1 and "." not in normalized:
|
|
base, quote = normalized.split("-")
|
|
if base.isalnum() and quote in CRYPTO_QUOTES:
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def get_asset_class(symbol: str) -> str:
|
|
"""Return 'crypto' for cryptocurrency symbols, otherwise 'equity'."""
|
|
return "crypto" if is_crypto_symbol(symbol) else "equity"
|