TradingAgents/tradingagents/api/services/validators.py

304 lines
8.1 KiB
Python

"""Validators for user profile fields.
This module provides validation functions for:
- Timezones (IANA timezone database)
- Tax jurisdictions (country codes and state/province codes)
All validators return True/False and are designed to be used
in Pydantic models and database constraints.
"""
from typing import Set
from zoneinfo import ZoneInfo, available_timezones
# Valid tax jurisdictions (ISO 3166-1 alpha-2 country codes + state/province)
# Format: "CC" for country-level, "CC-SS" for state/province-level
# This is a comprehensive list covering major jurisdictions
VALID_TAX_JURISDICTIONS: Set[str] = {
# Country-level codes (ISO 3166-1 alpha-2)
"US", # United States
"CA", # Canada
"GB", # United Kingdom
"AU", # Australia
"DE", # Germany
"FR", # France
"IT", # Italy
"ES", # Spain
"NL", # Netherlands
"BE", # Belgium
"CH", # Switzerland
"AT", # Austria
"SE", # Sweden
"NO", # Norway
"DK", # Denmark
"FI", # Finland
"IE", # Ireland
"PT", # Portugal
"GR", # Greece
"PL", # Poland
"CZ", # Czech Republic
"HU", # Hungary
"RO", # Romania
"JP", # Japan
"CN", # China
"KR", # South Korea
"IN", # India
"SG", # Singapore
"HK", # Hong Kong
"NZ", # New Zealand
"MX", # Mexico
"BR", # Brazil
"AR", # Argentina
"CL", # Chile
"ZA", # South Africa
"AE", # United Arab Emirates
"SA", # Saudi Arabia
"IL", # Israel
"TR", # Turkey
"RU", # Russia
"UA", # Ukraine
"TH", # Thailand
"MY", # Malaysia
"ID", # Indonesia
"PH", # Philippines
"VN", # Vietnam
"TW", # Taiwan
# United States - State level
"US-AL", # Alabama
"US-AK", # Alaska
"US-AZ", # Arizona
"US-AR", # Arkansas
"US-CA", # California
"US-CO", # Colorado
"US-CT", # Connecticut
"US-DE", # Delaware
"US-FL", # Florida
"US-GA", # Georgia
"US-HI", # Hawaii
"US-ID", # Idaho
"US-IL", # Illinois
"US-IN", # Indiana
"US-IA", # Iowa
"US-KS", # Kansas
"US-KY", # Kentucky
"US-LA", # Louisiana
"US-ME", # Maine
"US-MD", # Maryland
"US-MA", # Massachusetts
"US-MI", # Michigan
"US-MN", # Minnesota
"US-MS", # Mississippi
"US-MO", # Missouri
"US-MT", # Montana
"US-NE", # Nebraska
"US-NV", # Nevada
"US-NH", # New Hampshire
"US-NJ", # New Jersey
"US-NM", # New Mexico
"US-NY", # New York
"US-NC", # North Carolina
"US-ND", # North Dakota
"US-OH", # Ohio
"US-OK", # Oklahoma
"US-OR", # Oregon
"US-PA", # Pennsylvania
"US-RI", # Rhode Island
"US-SC", # South Carolina
"US-SD", # South Dakota
"US-TN", # Tennessee
"US-TX", # Texas
"US-UT", # Utah
"US-VT", # Vermont
"US-VA", # Virginia
"US-WA", # Washington
"US-WV", # West Virginia
"US-WI", # Wisconsin
"US-WY", # Wyoming
"US-DC", # District of Columbia
# Canada - Province/Territory level
"CA-AB", # Alberta
"CA-BC", # British Columbia
"CA-MB", # Manitoba
"CA-NB", # New Brunswick
"CA-NL", # Newfoundland and Labrador
"CA-NS", # Nova Scotia
"CA-NT", # Northwest Territories
"CA-NU", # Nunavut
"CA-ON", # Ontario
"CA-PE", # Prince Edward Island
"CA-QC", # Quebec
"CA-SK", # Saskatchewan
"CA-YT", # Yukon
# Australia - State/Territory level
"AU-NSW", # New South Wales
"AU-VIC", # Victoria
"AU-QLD", # Queensland
"AU-SA", # South Australia
"AU-WA", # Western Australia
"AU-TAS", # Tasmania
"AU-NT", # Northern Territory
"AU-ACT", # Australian Capital Territory
}
def validate_timezone(timezone: str) -> bool:
"""
Validate timezone against IANA timezone database.
Checks if the provided timezone string is a valid IANA timezone
identifier. Uses Python's zoneinfo module which is based on the
IANA timezone database (tzdata).
Args:
timezone: Timezone identifier (e.g., "America/New_York", "UTC")
Returns:
bool: True if valid IANA timezone, False otherwise
Valid Examples:
- "UTC"
- "GMT"
- "America/New_York"
- "Europe/London"
- "Asia/Tokyo"
- "Australia/Sydney"
Invalid Examples:
- "PST" (abbreviation, not IANA identifier)
- "EST" (abbreviation)
- "New York" (wrong format)
- "america/new_york" (wrong case)
Example:
>>> validate_timezone("America/New_York")
True
>>> validate_timezone("UTC")
True
>>> validate_timezone("PST")
False
>>> validate_timezone("Invalid/Zone")
False
Note:
- Case-sensitive (must match IANA database exactly)
- Use available_timezones() to get full list of valid zones
- Rejects timezone abbreviations (PST, EST, etc.)
"""
if not timezone or not isinstance(timezone, str):
return False
# Check if timezone exists in IANA database
# This is more efficient than trying to create a ZoneInfo object
return timezone in available_timezones()
def validate_tax_jurisdiction(jurisdiction: str) -> bool:
"""
Validate tax jurisdiction code.
Checks if the provided jurisdiction is in the list of valid
tax jurisdictions. Supports both country-level and state/province-level
jurisdictions.
Format:
- Country level: "CC" (2-letter ISO 3166-1 alpha-2)
- State/Province level: "CC-SS" (country-state with hyphen)
Args:
jurisdiction: Tax jurisdiction code
Returns:
bool: True if valid jurisdiction, False otherwise
Valid Examples:
- "US" (United States)
- "CA" (Canada)
- "GB" (United Kingdom)
- "US-CA" (California, USA)
- "US-NY" (New York, USA)
- "CA-ON" (Ontario, Canada)
- "AU-NSW" (New South Wales, Australia)
Invalid Examples:
- "us" (lowercase)
- "USA" (3 letters)
- "US_CA" (underscore instead of hyphen)
- "US/CA" (slash instead of hyphen)
- "XX" (non-existent country)
Example:
>>> validate_tax_jurisdiction("US")
True
>>> validate_tax_jurisdiction("US-CA")
True
>>> validate_tax_jurisdiction("us")
False
>>> validate_tax_jurisdiction("XX-YY")
False
Note:
- Case-sensitive (must be uppercase)
- Hyphen separator for state/province codes
- List is comprehensive but not exhaustive
- Add new jurisdictions to VALID_TAX_JURISDICTIONS set as needed
"""
if not jurisdiction or not isinstance(jurisdiction, str):
return False
return jurisdiction in VALID_TAX_JURISDICTIONS
def get_available_timezones() -> Set[str]:
"""
Get set of all available IANA timezones.
Returns the complete set of valid timezone identifiers from
the IANA timezone database.
Returns:
Set[str]: Set of valid timezone identifiers
Example:
>>> timezones = get_available_timezones()
>>> "America/New_York" in timezones
True
>>> len(timezones) > 500 # Hundreds of valid timezones
True
Note:
- This is a cached call (zoneinfo caches available_timezones)
- Use for populating dropdowns or validation lists
- Contains all IANA timezone database entries
"""
return available_timezones()
def get_available_tax_jurisdictions() -> Set[str]:
"""
Get set of all available tax jurisdictions.
Returns the complete set of valid tax jurisdiction codes.
Returns:
Set[str]: Set of valid tax jurisdiction codes
Example:
>>> jurisdictions = get_available_tax_jurisdictions()
>>> "US" in jurisdictions
True
>>> "US-CA" in jurisdictions
True
>>> len(jurisdictions) > 50 # Many jurisdictions supported
True
Note:
- Returns a copy to prevent external modification
- Use for populating dropdowns or validation lists
- Includes both country and state/province level codes
"""
return VALID_TAX_JURISDICTIONS.copy()