fix: harden stock data parsing against malformed CSV and NaN values
Add _clean_dataframe() to normalize stock DataFrames before stockstats: coerce invalid dates/prices, drop rows missing Close, fill price gaps. Also add on_bad_lines="skip" to all cached CSV reads.
This commit is contained in:
parent
9cc283ac22
commit
b0f9d180f9
|
|
@ -6,6 +6,19 @@ import os
|
|||
from .config import get_config
|
||||
|
||||
|
||||
def _clean_dataframe(data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Normalize a stock DataFrame for stockstats: parse dates, drop invalid rows, fill price gaps."""
|
||||
data["Date"] = pd.to_datetime(data["Date"], errors="coerce")
|
||||
data = data.dropna(subset=["Date"])
|
||||
|
||||
price_cols = [c for c in ["Open", "High", "Low", "Close", "Volume"] if c in data.columns]
|
||||
data[price_cols] = data[price_cols].apply(pd.to_numeric, errors="coerce")
|
||||
data = data.dropna(subset=["Close"])
|
||||
data[price_cols] = data[price_cols].ffill().bfill()
|
||||
|
||||
return data
|
||||
|
||||
|
||||
class StockstatsUtils:
|
||||
@staticmethod
|
||||
def get_stock_stats(
|
||||
|
|
@ -36,8 +49,7 @@ class StockstatsUtils:
|
|||
)
|
||||
|
||||
if os.path.exists(data_file):
|
||||
data = pd.read_csv(data_file)
|
||||
data["Date"] = pd.to_datetime(data["Date"])
|
||||
data = pd.read_csv(data_file, on_bad_lines="skip")
|
||||
else:
|
||||
data = yf.download(
|
||||
symbol,
|
||||
|
|
@ -50,6 +62,7 @@ class StockstatsUtils:
|
|||
data = data.reset_index()
|
||||
data.to_csv(data_file, index=False)
|
||||
|
||||
data = _clean_dataframe(data)
|
||||
df = wrap(data)
|
||||
df["Date"] = df["Date"].dt.strftime("%Y-%m-%d")
|
||||
curr_date_str = curr_date_dt.strftime("%Y-%m-%d")
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from datetime import datetime
|
|||
from dateutil.relativedelta import relativedelta
|
||||
import yfinance as yf
|
||||
import os
|
||||
from .stockstats_utils import StockstatsUtils
|
||||
from .stockstats_utils import StockstatsUtils, _clean_dataframe
|
||||
|
||||
def get_YFin_data_online(
|
||||
symbol: Annotated[str, "ticker symbol of the company"],
|
||||
|
|
@ -209,9 +209,9 @@ def _get_stock_stats_bulk(
|
|||
os.path.join(
|
||||
config.get("data_cache_dir", "data"),
|
||||
f"{symbol}-YFin-data-2015-01-01-2025-03-25.csv",
|
||||
),
|
||||
on_bad_lines="skip",
|
||||
)
|
||||
)
|
||||
df = wrap(data)
|
||||
except FileNotFoundError:
|
||||
raise Exception("Stockstats fail: Yahoo Finance data not fetched yet!")
|
||||
else:
|
||||
|
|
@ -232,8 +232,7 @@ def _get_stock_stats_bulk(
|
|||
)
|
||||
|
||||
if os.path.exists(data_file):
|
||||
data = pd.read_csv(data_file)
|
||||
data["Date"] = pd.to_datetime(data["Date"])
|
||||
data = pd.read_csv(data_file, on_bad_lines="skip")
|
||||
else:
|
||||
data = yf.download(
|
||||
symbol,
|
||||
|
|
@ -246,6 +245,7 @@ def _get_stock_stats_bulk(
|
|||
data = data.reset_index()
|
||||
data.to_csv(data_file, index=False)
|
||||
|
||||
data = _clean_dataframe(data)
|
||||
df = wrap(data)
|
||||
df["Date"] = df["Date"].dt.strftime("%Y-%m-%d")
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue