Fix the bad data issue
This commit is contained in:
parent
a438acdbbd
commit
204772b736
|
|
@ -517,7 +517,9 @@ def get_stock_stats_indicators_window(
|
||||||
os.path.join(
|
os.path.join(
|
||||||
DATA_DIR,
|
DATA_DIR,
|
||||||
f"market_data/price_data/{symbol}-YFin-data-2015-01-01-2025-03-25.csv",
|
f"market_data/price_data/{symbol}-YFin-data-2015-01-01-2025-03-25.csv",
|
||||||
)
|
),
|
||||||
|
on_bad_lines='skip',
|
||||||
|
engine='python'
|
||||||
)
|
)
|
||||||
data["Date"] = pd.to_datetime(data["Date"], utc=True)
|
data["Date"] = pd.to_datetime(data["Date"], utc=True)
|
||||||
dates_in_df = data["Date"].astype(str).str[:10]
|
dates_in_df = data["Date"].astype(str).str[:10]
|
||||||
|
|
@ -599,7 +601,9 @@ def get_YFin_data_window(
|
||||||
os.path.join(
|
os.path.join(
|
||||||
DATA_DIR,
|
DATA_DIR,
|
||||||
f"market_data/price_data/{symbol}-YFin-data-2015-01-01-2025-03-25.csv",
|
f"market_data/price_data/{symbol}-YFin-data-2015-01-01-2025-03-25.csv",
|
||||||
)
|
),
|
||||||
|
on_bad_lines='skip',
|
||||||
|
engine='python'
|
||||||
)
|
)
|
||||||
|
|
||||||
# Extract just the date part for comparison
|
# Extract just the date part for comparison
|
||||||
|
|
@ -677,7 +681,9 @@ def get_YFin_data(
|
||||||
os.path.join(
|
os.path.join(
|
||||||
DATA_DIR,
|
DATA_DIR,
|
||||||
f"market_data/price_data/{symbol}-YFin-data-2015-01-01-2025-03-25.csv",
|
f"market_data/price_data/{symbol}-YFin-data-2015-01-01-2025-03-25.csv",
|
||||||
)
|
),
|
||||||
|
on_bad_lines='skip',
|
||||||
|
engine='python'
|
||||||
)
|
)
|
||||||
|
|
||||||
if end_date > "2025-03-25":
|
if end_date > "2025-03-25":
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,9 @@ class StockstatsUtils:
|
||||||
os.path.join(
|
os.path.join(
|
||||||
data_dir,
|
data_dir,
|
||||||
f"{symbol}-YFin-data-2015-01-01-2025-03-25.csv",
|
f"{symbol}-YFin-data-2015-01-01-2025-03-25.csv",
|
||||||
)
|
),
|
||||||
|
on_bad_lines='skip',
|
||||||
|
engine='python'
|
||||||
)
|
)
|
||||||
df = wrap(data)
|
df = wrap(data)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
|
|
@ -59,8 +61,11 @@ class StockstatsUtils:
|
||||||
)
|
)
|
||||||
|
|
||||||
if os.path.exists(data_file):
|
if os.path.exists(data_file):
|
||||||
data = pd.read_csv(data_file)
|
data = pd.read_csv(data_file, on_bad_lines='skip', engine='python')
|
||||||
data["Date"] = pd.to_datetime(data["Date"])
|
# Handle date parsing with error handling for corrupted dates
|
||||||
|
data["Date"] = pd.to_datetime(data["Date"], errors='coerce', format='mixed')
|
||||||
|
# Remove rows with invalid dates
|
||||||
|
data = data.dropna(subset=['Date'])
|
||||||
else:
|
else:
|
||||||
data = yf.download(
|
data = yf.download(
|
||||||
symbol,
|
symbol,
|
||||||
|
|
@ -77,7 +82,19 @@ class StockstatsUtils:
|
||||||
df["Date"] = df["Date"].dt.strftime("%Y-%m-%d")
|
df["Date"] = df["Date"].dt.strftime("%Y-%m-%d")
|
||||||
curr_date = curr_date.strftime("%Y-%m-%d")
|
curr_date = curr_date.strftime("%Y-%m-%d")
|
||||||
|
|
||||||
df[indicator] # trigger stockstats to calculate the indicator
|
# Clean data before calculating indicator to avoid NaN masking errors
|
||||||
|
df = df.dropna(subset=['close']) # Remove rows with NaN close prices
|
||||||
|
|
||||||
|
try:
|
||||||
|
df[indicator] # trigger stockstats to calculate the indicator
|
||||||
|
except Exception as e:
|
||||||
|
if "Cannot mask with non-boolean array containing NA / NaN values" in str(e):
|
||||||
|
# Additional cleanup for stubborn NaN values
|
||||||
|
df = df.fillna(method='ffill').fillna(method='bfill')
|
||||||
|
df[indicator] # retry calculation
|
||||||
|
else:
|
||||||
|
raise e
|
||||||
|
|
||||||
matching_rows = df[df["Date"].str.startswith(curr_date)]
|
matching_rows = df[df["Date"].str.startswith(curr_date)]
|
||||||
|
|
||||||
if not matching_rows.empty:
|
if not matching_rows.empty:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue