import logging import pandas as pd import yfinance as yf from stockstats import wrap from typing import Annotated import os from .config import get_config logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Public exception — lets callers catch stockstats/yfinance failures by type # --------------------------------------------------------------------------- class YFinanceError(Exception): """Raised when yfinance or stockstats data fetching/processing fails.""" pass # --------------------------------------------------------------------------- # Internal helpers # --------------------------------------------------------------------------- def _clean_dataframe(data: pd.DataFrame) -> pd.DataFrame: """Normalize a stock DataFrame for stockstats: parse dates, drop invalid rows, fill price gaps. Ensure DataFrame has lowercase columns for stockstats.""" df = data.copy() df.columns = [str(c).lower() for c in df.columns] if "date" in df.columns: df["date"] = pd.to_datetime(df["date"], errors="coerce") df = df.dropna(subset=["date"]) price_cols = [c for c in ["open", "high", "low", "close", "volume"] if c in df.columns] if price_cols: df[price_cols] = df[price_cols].apply(pd.to_numeric, errors="coerce") if "close" in df.columns: df = df.dropna(subset=["close"]) if price_cols: df[price_cols] = df[price_cols].ffill().bfill() return df def _load_or_fetch_ohlcv(symbol: str) -> pd.DataFrame: """Single authority for loading OHLCV data: cache → yfinance download → normalize. Cache filename is always derived from today's date (15-year window) so the cache key never goes stale. If a cached file exists but is corrupt (too few rows to be useful), it is deleted and re-fetched rather than silently returning bad data. Raises: YFinanceError: if the download returns an empty DataFrame or fails. """ config = get_config() today_date = pd.Timestamp.today() start_date = today_date - pd.DateOffset(years=15) start_date_str = start_date.strftime("%Y-%m-%d") end_date_str = today_date.strftime("%Y-%m-%d") os.makedirs(config["data_cache_dir"], exist_ok=True) data_file = os.path.join( config["data_cache_dir"], f"{symbol}-YFin-data-{start_date_str}-{end_date_str}.csv", ) # ── Try to load from cache ──────────────────────────────────────────────── if os.path.exists(data_file): try: data = pd.read_csv(data_file) # no on_bad_lines="skip" — we want to know about corruption except Exception as exc: logger.warning( "Corrupt cache file for %s (%s) — deleting and re-fetching.", symbol, exc ) os.remove(data_file) data = None else: # Validate: a 15-year daily file should have well over 100 rows if len(data) < 50: logger.warning( "Cache file for %s has only %d rows — likely truncated, re-fetching.", symbol, len(data), ) os.remove(data_file) data = None else: data = None # ── Download from yfinance if cache miss / corrupt ──────────────────────── if data is None: raw = yf.download( symbol, start=start_date_str, end=end_date_str, multi_level_index=False, progress=False, auto_adjust=True, ) if raw.empty: raise YFinanceError( f"yfinance returned no data for symbol '{symbol}' " f"({start_date_str} → {end_date_str})" ) data = raw.reset_index() data.to_csv(data_file, index=False) logger.debug("Downloaded and cached OHLCV for %s → %s", symbol, data_file) return data # --------------------------------------------------------------------------- # Public API # --------------------------------------------------------------------------- class StockstatsUtils: @staticmethod def get_stock_stats( symbol: Annotated[str, "ticker symbol for the company"], indicator: Annotated[ str, "quantitative indicators based off of the stock data for the company" ], curr_date: Annotated[ str, "curr date for retrieving stock price data, YYYY-mm-dd" ], ): curr_date_dt = pd.to_datetime(curr_date) curr_date_str = curr_date_dt.strftime("%Y-%m-%d") data = _load_or_fetch_ohlcv(symbol) data = _clean_dataframe(data) df = wrap(data) # After wrap(), the date column becomes the datetime index (named 'date'). # Access via df.index, not df["Date"] which stockstats would try to parse as an indicator. df[indicator] # trigger stockstats to calculate the indicator date_index_strs = df.index.strftime("%Y-%m-%d") matching_rows = df[date_index_strs == curr_date_str] if not matching_rows.empty: return matching_rows[indicator].values[0] else: return "N/A: Not a trading day (weekend or holiday)"