421 lines
15 KiB
Python
421 lines
15 KiB
Python
"""
|
|
Cached API Wrappers for Financial Data
|
|
Integrates the TimeSeriesCache with existing financial APIs
|
|
"""
|
|
|
|
import pandas as pd
|
|
import yfinance as yf
|
|
from datetime import datetime, timedelta
|
|
from typing import Optional, Dict, Any
|
|
import logging
|
|
|
|
from .time_series_cache import (
|
|
get_cache, DataType,
|
|
fetch_ohlcv_with_cache, fetch_news_with_cache, fetch_fundamentals_with_cache
|
|
)
|
|
from .interface import get_data_in_range
|
|
from .googlenews_utils import getNewsData
|
|
from .config import get_config, DATA_DIR
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# YFinance OHLCV Data Caching
|
|
def fetch_yfinance_data_cached(symbol: str, start_date: datetime, end_date: datetime) -> pd.DataFrame:
|
|
"""
|
|
Fetch YFinance OHLCV data with intelligent caching
|
|
|
|
Args:
|
|
symbol: Stock ticker symbol
|
|
start_date: Start date for data
|
|
end_date: End date for data
|
|
|
|
Returns:
|
|
DataFrame with OHLCV data
|
|
"""
|
|
|
|
def _fetch_yfinance_api(symbol: str, start_date: datetime, end_date: datetime) -> pd.DataFrame:
|
|
"""Internal function to fetch from YFinance API"""
|
|
try:
|
|
ticker = yf.Ticker(symbol)
|
|
|
|
# Add one day to end_date to make it inclusive
|
|
end_date_inclusive = end_date + timedelta(days=1)
|
|
|
|
data = ticker.history(
|
|
start=start_date.strftime('%Y-%m-%d'),
|
|
end=end_date_inclusive.strftime('%Y-%m-%d'),
|
|
auto_adjust=True,
|
|
progress=False
|
|
)
|
|
|
|
if data.empty:
|
|
logger.warning(f"No YFinance data found for {symbol} from {start_date.date()} to {end_date.date()}")
|
|
return pd.DataFrame()
|
|
|
|
# Reset index to make Date a column
|
|
data = data.reset_index()
|
|
|
|
# Standardize column names and add date column
|
|
data['date'] = data['Date']
|
|
data['symbol'] = symbol
|
|
|
|
# Round numeric columns
|
|
numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
|
|
for col in numeric_cols:
|
|
if col in data.columns:
|
|
data[col] = data[col].round(4)
|
|
|
|
return data
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to fetch YFinance data for {symbol}: {e}")
|
|
return pd.DataFrame()
|
|
|
|
return fetch_ohlcv_with_cache(symbol, start_date, end_date, _fetch_yfinance_api)
|
|
|
|
|
|
def fetch_yfinance_window_cached(symbol: str, curr_date: datetime, look_back_days: int) -> pd.DataFrame:
|
|
"""
|
|
Fetch YFinance data for a window of days before current date with caching
|
|
|
|
Args:
|
|
symbol: Stock ticker symbol
|
|
curr_date: Current/end date
|
|
look_back_days: Number of days to look back
|
|
|
|
Returns:
|
|
DataFrame with OHLCV data
|
|
"""
|
|
start_date = curr_date - timedelta(days=look_back_days)
|
|
return fetch_yfinance_data_cached(symbol, start_date, curr_date)
|
|
|
|
|
|
# News Data Caching
|
|
def fetch_finnhub_news_cached(symbol: str, start_date: datetime, end_date: datetime) -> pd.DataFrame:
|
|
"""
|
|
Fetch Finnhub news data with caching
|
|
|
|
Args:
|
|
symbol: Stock ticker symbol
|
|
start_date: Start date for news
|
|
end_date: End date for news
|
|
|
|
Returns:
|
|
DataFrame with news data
|
|
"""
|
|
|
|
def _fetch_finnhub_news_api(symbol: str, start_date: datetime, end_date: datetime) -> pd.DataFrame:
|
|
"""Internal function to fetch Finnhub news from cached files"""
|
|
try:
|
|
# Use existing get_data_in_range function
|
|
data = get_data_in_range(
|
|
symbol,
|
|
start_date.strftime('%Y-%m-%d'),
|
|
end_date.strftime('%Y-%m-%d'),
|
|
"news_data",
|
|
DATA_DIR
|
|
)
|
|
|
|
if not data:
|
|
return pd.DataFrame()
|
|
|
|
# Convert to DataFrame format
|
|
news_records = []
|
|
for date_str, news_list in data.items():
|
|
for news_item in news_list:
|
|
record = {
|
|
'date': pd.to_datetime(date_str),
|
|
'symbol': symbol,
|
|
'headline': news_item.get('headline', ''),
|
|
'summary': news_item.get('summary', ''),
|
|
'source': news_item.get('source', ''),
|
|
'url': news_item.get('url', ''),
|
|
'datetime': pd.to_datetime(news_item.get('datetime', date_str))
|
|
}
|
|
news_records.append(record)
|
|
|
|
return pd.DataFrame(news_records)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to fetch Finnhub news for {symbol}: {e}")
|
|
return pd.DataFrame()
|
|
|
|
return fetch_news_with_cache(symbol, start_date, end_date, _fetch_finnhub_news_api)
|
|
|
|
|
|
def fetch_google_news_cached(query: str, start_date: datetime, end_date: datetime) -> pd.DataFrame:
|
|
"""
|
|
Fetch Google News data with caching
|
|
|
|
Args:
|
|
query: Search query
|
|
start_date: Start date for news
|
|
end_date: End date for news
|
|
|
|
Returns:
|
|
DataFrame with news data
|
|
"""
|
|
|
|
def _fetch_google_news_api(query: str, start_date: datetime, end_date: datetime) -> pd.DataFrame:
|
|
"""Internal function to fetch from Google News API"""
|
|
try:
|
|
query_formatted = query.replace(" ", "+")
|
|
news_results = getNewsData(
|
|
query_formatted,
|
|
start_date.strftime('%Y-%m-%d'),
|
|
end_date.strftime('%Y-%m-%d')
|
|
)
|
|
|
|
if not news_results:
|
|
return pd.DataFrame()
|
|
|
|
# Convert to DataFrame
|
|
news_records = []
|
|
for news_item in news_results:
|
|
record = {
|
|
'date': pd.to_datetime(news_item.get('date', start_date)),
|
|
'query': query,
|
|
'title': news_item.get('title', ''),
|
|
'snippet': news_item.get('snippet', ''),
|
|
'source': news_item.get('source', ''),
|
|
'url': news_item.get('url', ''),
|
|
'published': pd.to_datetime(news_item.get('published', start_date))
|
|
}
|
|
news_records.append(record)
|
|
|
|
return pd.DataFrame(news_records)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to fetch Google News for query '{query}': {e}")
|
|
return pd.DataFrame()
|
|
|
|
return fetch_news_with_cache(query, start_date, end_date, _fetch_google_news_api)
|
|
|
|
|
|
# Technical Indicators Caching
|
|
def fetch_technical_indicators_cached(symbol: str, indicator: str, start_date: datetime, end_date: datetime, **kwargs) -> pd.DataFrame:
|
|
"""
|
|
Fetch technical indicators with caching
|
|
|
|
Args:
|
|
symbol: Stock ticker symbol
|
|
indicator: Technical indicator name
|
|
start_date: Start date
|
|
end_date: End date
|
|
**kwargs: Additional parameters for indicator calculation
|
|
|
|
Returns:
|
|
DataFrame with indicator data
|
|
"""
|
|
|
|
def _fetch_indicator_api(symbol: str, start_date: datetime, end_date: datetime, **kwargs) -> pd.DataFrame:
|
|
"""Internal function to calculate technical indicators"""
|
|
try:
|
|
from .stockstats_utils import StockstatsUtils
|
|
|
|
# First get the underlying price data
|
|
price_data = fetch_yfinance_data_cached(symbol, start_date, end_date)
|
|
|
|
if price_data.empty:
|
|
return pd.DataFrame()
|
|
|
|
# Calculate indicator for each date
|
|
indicator_records = []
|
|
for _, row in price_data.iterrows():
|
|
try:
|
|
curr_date = row['date'].strftime('%Y-%m-%d')
|
|
indicator_value = StockstatsUtils.get_stock_stats(
|
|
symbol,
|
|
indicator,
|
|
curr_date,
|
|
DATA_DIR,
|
|
online=True
|
|
)
|
|
|
|
record = {
|
|
'date': row['date'],
|
|
'symbol': symbol,
|
|
'indicator': indicator,
|
|
'value': float(indicator_value) if indicator_value else None,
|
|
**kwargs
|
|
}
|
|
indicator_records.append(record)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Failed to calculate {indicator} for {symbol} on {curr_date}: {e}")
|
|
continue
|
|
|
|
return pd.DataFrame(indicator_records)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to fetch indicators for {symbol}: {e}")
|
|
return pd.DataFrame()
|
|
|
|
cache = get_cache()
|
|
return cache.fetch_with_cache(symbol, DataType.INDICATORS, start_date, end_date, _fetch_indicator_api, indicator=indicator, **kwargs)
|
|
|
|
|
|
# Insider Trading Data Caching
|
|
def fetch_insider_data_cached(symbol: str, start_date: datetime, end_date: datetime, data_type: str = "insider_trans") -> pd.DataFrame:
|
|
"""
|
|
Fetch insider trading data with caching
|
|
|
|
Args:
|
|
symbol: Stock ticker symbol
|
|
start_date: Start date
|
|
end_date: End date
|
|
data_type: Type of insider data ('insider_trans' or 'insider_senti')
|
|
|
|
Returns:
|
|
DataFrame with insider data
|
|
"""
|
|
|
|
def _fetch_insider_api(symbol: str, start_date: datetime, end_date: datetime, data_type: str = "insider_trans") -> pd.DataFrame:
|
|
"""Internal function to fetch insider data"""
|
|
try:
|
|
data = get_data_in_range(
|
|
symbol,
|
|
start_date.strftime('%Y-%m-%d'),
|
|
end_date.strftime('%Y-%m-%d'),
|
|
data_type,
|
|
DATA_DIR
|
|
)
|
|
|
|
if not data:
|
|
return pd.DataFrame()
|
|
|
|
# Convert to DataFrame
|
|
records = []
|
|
for date_str, items in data.items():
|
|
for item in items:
|
|
record = {
|
|
'date': pd.to_datetime(date_str),
|
|
'symbol': symbol,
|
|
'data_type': data_type,
|
|
**item # Include all fields from the insider data
|
|
}
|
|
records.append(record)
|
|
|
|
return pd.DataFrame(records)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to fetch insider data for {symbol}: {e}")
|
|
return pd.DataFrame()
|
|
|
|
cache = get_cache()
|
|
cache_data_type = DataType.INSIDER if data_type == "insider_trans" else DataType.SENTIMENT
|
|
return cache.fetch_with_cache(symbol, cache_data_type, start_date, end_date, _fetch_insider_api, data_type=data_type)
|
|
|
|
|
|
# Convenience Functions for Integration
|
|
def get_cached_price_data(symbol: str, start_date: str, end_date: str) -> str:
|
|
"""
|
|
Get cached price data in string format (compatible with existing interface)
|
|
|
|
Args:
|
|
symbol: Stock ticker symbol
|
|
start_date: Start date in 'YYYY-MM-DD' format
|
|
end_date: End date in 'YYYY-MM-DD' format
|
|
|
|
Returns:
|
|
Formatted string with price data
|
|
"""
|
|
try:
|
|
start_dt = datetime.strptime(start_date, '%Y-%m-%d')
|
|
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
|
|
|
|
df = fetch_yfinance_data_cached(symbol, start_dt, end_dt)
|
|
|
|
if df.empty:
|
|
return f"No data found for {symbol} between {start_date} and {end_date}"
|
|
|
|
# Format similar to existing interface
|
|
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None):
|
|
df_string = df.to_string(index=False)
|
|
|
|
return f"## Cached Market Data for {symbol} from {start_date} to {end_date}:\n\n{df_string}"
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get cached price data: {e}")
|
|
return f"Error retrieving cached data for {symbol}: {e}"
|
|
|
|
|
|
def get_cached_news_data(symbol: str, curr_date: str, look_back_days: int = 7) -> str:
|
|
"""
|
|
Get cached news data in string format (compatible with existing interface)
|
|
|
|
Args:
|
|
symbol: Stock ticker symbol
|
|
curr_date: Current date in 'YYYY-MM-DD' format
|
|
look_back_days: Number of days to look back
|
|
|
|
Returns:
|
|
Formatted string with news data
|
|
"""
|
|
try:
|
|
curr_dt = datetime.strptime(curr_date, '%Y-%m-%d')
|
|
start_dt = curr_dt - timedelta(days=look_back_days)
|
|
|
|
df = fetch_finnhub_news_cached(symbol, start_dt, curr_dt)
|
|
|
|
if df.empty:
|
|
return f"No cached news found for {symbol}"
|
|
|
|
# Format similar to existing interface
|
|
news_str = ""
|
|
for _, row in df.iterrows():
|
|
news_str += f"### {row['headline']} ({row['date'].strftime('%Y-%m-%d')})\n{row['summary']}\n\n"
|
|
|
|
return f"## {symbol} Cached News, from {start_dt.strftime('%Y-%m-%d')} to {curr_date}:\n{news_str}"
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get cached news data: {e}")
|
|
return f"Error retrieving cached news for {symbol}: {e}"
|
|
|
|
|
|
# Cache Management Functions
|
|
def get_cache_summary() -> Dict[str, Any]:
|
|
"""Get comprehensive cache statistics"""
|
|
cache = get_cache()
|
|
return cache.get_cache_stats()
|
|
|
|
|
|
def clear_old_cache_data(days: int = 30) -> int:
|
|
"""Clear cache data older than specified days"""
|
|
cache = get_cache()
|
|
return cache.clear_cache(older_than_days=days)
|
|
|
|
|
|
def clear_symbol_cache(symbol: str) -> int:
|
|
"""Clear all cached data for a specific symbol"""
|
|
cache = get_cache()
|
|
total_cleared = 0
|
|
for data_type in DataType:
|
|
cleared = cache.clear_cache(symbol=symbol, data_type=data_type)
|
|
total_cleared += cleared
|
|
return total_cleared
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Example usage
|
|
print("Testing cached API wrappers...")
|
|
|
|
# Test OHLCV caching
|
|
symbol = "AAPL"
|
|
end_date = datetime.now()
|
|
start_date = end_date - timedelta(days=30)
|
|
|
|
print(f"Fetching {symbol} data from {start_date.date()} to {end_date.date()}")
|
|
|
|
# First call - should fetch from API
|
|
data1 = fetch_yfinance_data_cached(symbol, start_date, end_date)
|
|
print(f"First call: {len(data1)} records")
|
|
|
|
# Second call - should use cache
|
|
data2 = fetch_yfinance_data_cached(symbol, start_date, end_date)
|
|
print(f"Second call: {len(data2)} records")
|
|
|
|
# Print cache stats
|
|
stats = get_cache_summary()
|
|
print(f"Cache stats: {stats}") |