""" FRED API Core Utilities. This module provides core utilities for accessing the Federal Reserve Economic Data (FRED) API: - API key management - Custom exceptions for rate limiting and invalid series - Date formatting for FRED API - Request wrapper with retry logic and exponential backoff - Cache management for reducing API calls Usage: from tradingagents.dataflows.fred_common import get_api_key, _make_fred_request api_key = get_api_key() data = _make_fred_request('FEDFUNDS', start_date='2024-01-01', end_date='2024-12-31') Requirements: - fredapi package: pip install fredapi - FRED_API_KEY environment variable must be set """ import os import time import pandas as pd from pathlib import Path from datetime import datetime, timedelta from typing import Optional, Union # Try to import fredapi, but allow it to be mocked in tests try: from fredapi import Fred except ImportError: Fred = None # ============================================================================ # Configuration # ============================================================================ # Cache directory for FRED data CACHE_DIR = Path.home() / ".cache" / "fred" CACHE_DIR.mkdir(parents=True, exist_ok=True) # Cache TTL in hours CACHE_TTL_HOURS = 24 # ============================================================================ # Custom Exceptions # ============================================================================ class FredRateLimitError(Exception): """Exception raised when FRED API rate limit is exceeded.""" def __init__(self, message: str, retry_after: Optional[int] = None): super().__init__(message) self.retry_after = retry_after class FredInvalidSeriesError(Exception): """Exception raised when FRED series ID is invalid or not found.""" def __init__(self, message: str, series_id: Optional[str] = None): super().__init__(message) self.series_id = series_id # ============================================================================ # API Key Management # ============================================================================ def get_api_key() -> str: """ Retrieve the FRED API key from environment variables. Returns: str: The FRED API key Raises: ValueError: If FRED_API_KEY environment variable is not set or empty """ api_key = os.getenv("FRED_API_KEY") if not api_key or not api_key.strip(): raise ValueError("FRED_API_KEY environment variable is not set") return api_key # ============================================================================ # Date Formatting # ============================================================================ def format_date_for_fred(date_input: Union[str, datetime, 'date', int, None]) -> Optional[str]: """ Convert various date formats to YYYY-MM-DD format required by FRED API. Args: date_input: Date as string, datetime/date object, timestamp (int), or None Returns: Date string in YYYY-MM-DD format, or None if input is None Raises: ValueError: If date format is invalid or unsupported """ if date_input is None: return None # Handle datetime.date objects (not datetime) if hasattr(date_input, 'year') and hasattr(date_input, 'month') and hasattr(date_input, 'day'): if not isinstance(date_input, datetime): # It's a date object return f"{date_input.year:04d}-{date_input.month:02d}-{date_input.day:02d}" if isinstance(date_input, str): # Try multiple date formats date_formats = [ "%Y-%m-%d", # 2024-01-15 "%m/%d/%Y", # 01/15/2024 "%d-%m-%Y", # 15-01-2024 ] for fmt in date_formats: try: dt = datetime.strptime(date_input, fmt) return dt.strftime("%Y-%m-%d") except ValueError: continue # If no format matched, raise error raise ValueError(f"Invalid date format: {date_input}. Expected YYYY-MM-DD, MM/DD/YYYY, or DD-MM-YYYY") elif isinstance(date_input, datetime): return date_input.strftime("%Y-%m-%d") elif isinstance(date_input, int): # Assume it's a Unix timestamp dt = datetime.fromtimestamp(date_input) return dt.strftime("%Y-%m-%d") else: raise ValueError(f"Date must be string, datetime, date object, or timestamp, got {type(date_input)}") # ============================================================================ # API Request Functions # ============================================================================ def _make_fred_request( series_id: str, start_date: Optional[str] = None, end_date: Optional[str] = None, **kwargs ) -> pd.DataFrame: """ Make FRED API request with retry logic and exponential backoff. This function wraps the fredapi library with retry logic to handle transient network errors. It attempts up to 3 retries with exponential backoff (1s, 2s, 4s delays). Args: series_id: FRED series ID (e.g., 'FEDFUNDS', 'DGS10') start_date: Start date in YYYY-MM-DD format (optional) end_date: End date in YYYY-MM-DD format (optional) **kwargs: Additional parameters to pass to fredapi Returns: pd.DataFrame: FRED series data with 'date' and 'value' columns Raises: FredRateLimitError: If API rate limit is exceeded FredInvalidSeriesError: If series ID is invalid or not found Exception: For other API errors after exhausting retries """ if Fred is None: raise ImportError("fredapi package is not installed. Install with: pip install fredapi") # Validate series_id if not series_id or not isinstance(series_id, str): raise ValueError("series_id must be a non-empty string") # Get API key api_key = get_api_key() # Format dates if provided formatted_start = format_date_for_fred(start_date) if start_date else None formatted_end = format_date_for_fred(end_date) if end_date else None # Extract parameters from kwargs max_retries = kwargs.pop('max_retries', 3) use_cache = kwargs.pop('use_cache', False) base_delay = 1.0 # Check cache first if enabled if use_cache: cached_data = _load_from_cache(series_id, start_date, end_date) if cached_data is not None: return cached_data # Initial attempt + retries for attempt in range(max_retries + 1): try: # Create FRED client fred = Fred(api_key=api_key) # Make API request series_data = fred.get_series( series_id, observation_start=formatted_start, observation_end=formatted_end, **kwargs ) # Convert to DataFrame with standard column names # Handle both Series (real fredapi) and DataFrame (mocked in tests) if isinstance(series_data, pd.Series): df = pd.DataFrame({ 'date': series_data.index, 'value': series_data.values }) elif isinstance(series_data, pd.DataFrame): # Already a DataFrame (from mock), return as-is df = series_data else: raise ValueError(f"Unexpected return type from Fred API: {type(series_data)}") # Save to cache if enabled if use_cache: _save_to_cache(series_id, df, start_date, end_date) return df except Exception as e: error_msg = str(e).lower() # Check for rate limit errors if any(indicator in error_msg for indicator in [ 'rate limit', 'too many requests', 'rate_limit', 'ratelimit', '429' ]): raise FredRateLimitError(f"FRED API rate limit exceeded: {e}") # Check for invalid series errors if any(indicator in error_msg for indicator in [ 'bad request', 'not found', 'invalid series', 'series does not exist', '400', '404' ]): raise FredInvalidSeriesError(f"Invalid FRED series ID '{series_id}': {e}") # If this was the last attempt, raise the original exception if attempt >= max_retries: raise # Exponential backoff: 2^attempt seconds delay = base_delay * (2 ** attempt) time.sleep(delay) # Should never reach here, but just in case raise Exception("Retry logic failed unexpectedly") # ============================================================================ # Cache Management # ============================================================================ def _get_cache_path(series_id: str, start_date: Optional[str] = None, end_date: Optional[str] = None) -> Path: """ Generate cache file path for FRED series data. Args: series_id: FRED series ID start_date: Start date in YYYY-MM-DD format (optional) end_date: End date in YYYY-MM-DD format (optional) Returns: Path: Cache file path """ # Create filename with series ID and date range if start_date or end_date: filename_parts = [series_id] if start_date: filename_parts.append(start_date) if end_date: filename_parts.append(end_date) filename = "_".join(filename_parts) + ".parquet" else: filename = f"{series_id}.parquet" return CACHE_DIR / filename def _load_from_cache(series_id: str, start_date: Optional[str] = None, end_date: Optional[str] = None, cache_ttl_hours: Optional[int] = None) -> Optional[pd.DataFrame]: """ Load FRED data from cache if available and not expired. Cache files are considered valid for cache_ttl_hours (default: CACHE_TTL_HOURS = 24 hours). Args: series_id: FRED series ID start_date: Start date in YYYY-MM-DD format (optional) end_date: End date in YYYY-MM-DD format (optional) cache_ttl_hours: Cache TTL in hours (optional, defaults to CACHE_TTL_HOURS) Returns: pd.DataFrame if cache is valid, None if cache is invalid or expired """ cache_path = _get_cache_path(series_id, start_date, end_date) if not cache_path.exists(): return None # Use provided TTL or default ttl_hours = cache_ttl_hours if cache_ttl_hours is not None else CACHE_TTL_HOURS # Check cache age cache_age = datetime.now() - datetime.fromtimestamp(cache_path.stat().st_mtime) if cache_age > timedelta(hours=ttl_hours): return None try: # Load cached data df = pd.read_parquet(cache_path) # Convert date column to datetime if not already if 'date' in df.columns: df['date'] = pd.to_datetime(df['date']) return df except Exception: # If cache is corrupted, return None return None def _save_to_cache(series_id: str, data: pd.DataFrame, start_date: Optional[str] = None, end_date: Optional[str] = None) -> None: """ Save FRED data to cache. Args: series_id: FRED series ID data: DataFrame to cache start_date: Start date in YYYY-MM-DD format (optional) end_date: End date in YYYY-MM-DD format (optional) """ cache_path = _get_cache_path(series_id, start_date, end_date) # Ensure cache directory exists cache_path.parent.mkdir(parents=True, exist_ok=True) # Save to parquet data.to_parquet(cache_path, index=False)