TradingAgents/tradingagents/dataflows/stockstats_utils.py

105 lines
3.8 KiB
Python

import pandas as pd
import yfinance as yf
from stockstats import wrap
from typing import Annotated
import os
from .config import get_config
class StockstatsUtils:
@staticmethod
def get_stock_stats(
symbol: Annotated[str, "ticker symbol for the company"],
indicator: Annotated[
str, "quantitative indicators based off of the stock data for the company"
],
curr_date: Annotated[
str, "curr date for retrieving stock price data, YYYY-mm-dd"
],
data_dir: Annotated[
str,
"directory where the stock data is stored.",
],
online: Annotated[
bool,
"whether to use online tools to fetch data or offline tools. If True, will use online tools.",
] = False,
):
df = None
data = None
if not online:
try:
data = pd.read_csv(
os.path.join(
data_dir,
f"{symbol}-YFin-data-2015-01-01-2025-03-25.csv",
),
on_bad_lines='skip',
engine='python'
)
df = wrap(data)
except FileNotFoundError:
raise Exception("Stockstats fail: Yahoo Finance data not fetched yet!")
else:
# Get today's date as YYYY-mm-dd to add to cache
today_date = pd.Timestamp.today()
curr_date = pd.to_datetime(curr_date)
end_date = today_date
start_date = today_date - pd.DateOffset(years=15)
start_date = start_date.strftime("%Y-%m-%d")
end_date = end_date.strftime("%Y-%m-%d")
# Get config and ensure cache directory exists
config = get_config()
os.makedirs(config["data_cache_dir"], exist_ok=True)
data_file = os.path.join(
config["data_cache_dir"],
f"{symbol}-YFin-data-{start_date}-{end_date}.csv",
)
if os.path.exists(data_file):
data = pd.read_csv(data_file, on_bad_lines='skip', engine='python')
# Handle date parsing with error handling for corrupted dates
data["Date"] = pd.to_datetime(data["Date"], errors='coerce', format='mixed')
# Remove rows with invalid dates
data = data.dropna(subset=['Date'])
else:
data = yf.download(
symbol,
start=start_date,
end=end_date,
multi_level_index=False,
progress=False,
auto_adjust=True,
)
data = data.reset_index()
data.to_csv(data_file, index=False)
df = wrap(data)
df["Date"] = df["Date"].dt.strftime("%Y-%m-%d")
curr_date = curr_date.strftime("%Y-%m-%d")
# Clean data before calculating indicator to avoid NaN masking errors
df = df.dropna(subset=['close']) # Remove rows with NaN close prices
try:
df[indicator] # trigger stockstats to calculate the indicator
except Exception as e:
if "Cannot mask with non-boolean array containing NA / NaN values" in str(e):
# Additional cleanup for stubborn NaN values
df = df.fillna(method='ffill').fillna(method='bfill')
df[indicator] # retry calculation
else:
raise e
matching_rows = df[df["Date"].str.startswith(curr_date)]
if not matching_rows.empty:
indicator_value = matching_rows[indicator].values[0]
return indicator_value
else:
return "N/A: Not a trading day (weekend or holiday)"