693 lines
26 KiB
Python
693 lines
26 KiB
Python
"""
|
|
Fundamental Data Service for aggregating and analyzing financial statement data.
|
|
"""
|
|
|
|
import logging
|
|
from datetime import date, datetime
|
|
from typing import Any
|
|
|
|
from tradingagents.clients import FinnhubClient
|
|
from tradingagents.models.context import (
|
|
DataQuality,
|
|
FinancialStatement,
|
|
FundamentalContext,
|
|
)
|
|
from tradingagents.repositories.fundamental_repository import FundamentalDataRepository
|
|
from tradingagents.services.base import BaseService
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class FundamentalDataService(BaseService):
|
|
"""Service for fundamental financial data aggregation and analysis."""
|
|
|
|
def __init__(
|
|
self,
|
|
finnhub_client: FinnhubClient,
|
|
repository: FundamentalDataRepository,
|
|
data_dir: str = "data",
|
|
**kwargs,
|
|
):
|
|
"""Initialize Fundamental Data Service.
|
|
|
|
Args:
|
|
finnhub_client: Client for Finnhub/financial API access
|
|
repository: Repository for cached fundamental data
|
|
data_dir: Directory for data storage
|
|
"""
|
|
super().__init__(online_mode=True, data_dir=data_dir, **kwargs)
|
|
self.finnhub_client = finnhub_client
|
|
self.repository = repository
|
|
|
|
def get_fundamental_context(
|
|
self,
|
|
symbol: str,
|
|
start_date: str,
|
|
end_date: str,
|
|
frequency: str = "quarterly",
|
|
force_refresh: bool = False,
|
|
**kwargs,
|
|
) -> FundamentalContext:
|
|
"""Get fundamental analysis context for a company.
|
|
|
|
Args:
|
|
symbol: Stock ticker symbol
|
|
start_date: Start date in YYYY-MM-DD format
|
|
end_date: End date in YYYY-MM-DD format
|
|
frequency: Reporting frequency ('quarterly' or 'annual')
|
|
force_refresh: If True, skip local data and fetch fresh from APIs
|
|
|
|
Returns:
|
|
FundamentalContext with financial statements and key ratios
|
|
"""
|
|
# Validate date strings first
|
|
try:
|
|
start_dt = date.fromisoformat(start_date)
|
|
end_dt = date.fromisoformat(end_date)
|
|
except ValueError as e:
|
|
raise ValueError(f"Invalid date format: {e}")
|
|
|
|
# Check date order
|
|
if end_dt < start_dt:
|
|
raise ValueError(f"End date {end_date} is before start date {start_date}")
|
|
|
|
balance_sheet = None
|
|
income_statement = None
|
|
cash_flow = None
|
|
error_info = {}
|
|
errors = []
|
|
data_source = "unknown"
|
|
|
|
try:
|
|
# Local-first data strategy with force refresh option
|
|
if force_refresh:
|
|
# Skip local data, fetch fresh from APIs
|
|
balance_sheet, income_statement, cash_flow, data_source = (
|
|
self._fetch_and_cache_fresh_fundamental_data(
|
|
symbol, start_date, end_date, frequency
|
|
)
|
|
)
|
|
else:
|
|
# Check local data first, fetch missing if needed
|
|
balance_sheet, income_statement, cash_flow, data_source = (
|
|
self._get_fundamental_data_local_first(
|
|
symbol, start_date, end_date, frequency
|
|
)
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error fetching fundamental data: {e}")
|
|
errors.append(str(e))
|
|
|
|
# Add error info if there were any errors
|
|
if errors:
|
|
error_info = {"error": "; ".join(errors)}
|
|
|
|
# Calculate key financial ratios
|
|
key_ratios = self._calculate_key_ratios(
|
|
balance_sheet, income_statement, cash_flow
|
|
)
|
|
|
|
# Determine data quality based on data source
|
|
data_quality = self._determine_data_quality(
|
|
data_source=data_source,
|
|
statement_count=sum(
|
|
[
|
|
balance_sheet is not None,
|
|
income_statement is not None,
|
|
cash_flow is not None,
|
|
]
|
|
),
|
|
has_errors=bool(errors),
|
|
)
|
|
|
|
# Handle partial data scenarios gracefully
|
|
context = self._handle_partial_statements(
|
|
symbol=symbol,
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
frequency=frequency,
|
|
balance_sheet=balance_sheet,
|
|
income_statement=income_statement,
|
|
cash_flow=cash_flow,
|
|
key_ratios=key_ratios,
|
|
data_quality=data_quality,
|
|
data_source=data_source,
|
|
force_refresh=force_refresh,
|
|
error_info=error_info,
|
|
)
|
|
|
|
return context
|
|
|
|
def get_context(
|
|
self,
|
|
symbol: str,
|
|
start_date: str,
|
|
end_date: str,
|
|
frequency: str = "quarterly",
|
|
**kwargs,
|
|
) -> FundamentalContext:
|
|
"""Alias for get_fundamental_context for consistency with other services."""
|
|
return self.get_fundamental_context(
|
|
symbol=symbol,
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
frequency=frequency,
|
|
**kwargs,
|
|
)
|
|
|
|
def _get_balance_sheet(
|
|
self, symbol: str, frequency: str, report_date: date
|
|
) -> FinancialStatement | None:
|
|
"""Get balance sheet data from client."""
|
|
try:
|
|
data = self.finnhub_client.get_balance_sheet(symbol, frequency, report_date)
|
|
return self._convert_to_financial_statement(data)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to get balance sheet for {symbol}: {e}")
|
|
return None
|
|
|
|
def _get_income_statement(
|
|
self, symbol: str, frequency: str, report_date: date
|
|
) -> FinancialStatement | None:
|
|
"""Get income statement data from client."""
|
|
try:
|
|
data = self.finnhub_client.get_income_statement(
|
|
symbol, frequency, report_date
|
|
)
|
|
return self._convert_to_financial_statement(data)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to get income statement for {symbol}: {e}")
|
|
return None
|
|
|
|
def _get_cash_flow(
|
|
self, symbol: str, frequency: str, report_date: date
|
|
) -> FinancialStatement | None:
|
|
"""Get cash flow statement data from client."""
|
|
try:
|
|
data = self.finnhub_client.get_cash_flow(symbol, frequency, report_date)
|
|
return self._convert_to_financial_statement(data)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to get cash flow for {symbol}: {e}")
|
|
return None
|
|
|
|
def _convert_to_financial_statement(
|
|
self, data: dict[str, Any]
|
|
) -> FinancialStatement | None:
|
|
"""Convert raw financial data to FinancialStatement object."""
|
|
if not data or "data" not in data or not data["data"]:
|
|
return None
|
|
|
|
try:
|
|
return FinancialStatement(
|
|
period=data.get("period", "Unknown"),
|
|
report_date=data.get("report_date", ""),
|
|
publish_date=data.get("publish_date", ""),
|
|
currency=data.get("currency", "USD"),
|
|
data=data["data"],
|
|
)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to convert financial statement: {e}")
|
|
return None
|
|
|
|
def _parse_cached_statements(self, cached_data: dict[str, Any]) -> tuple:
|
|
"""Parse cached repository data into financial statements."""
|
|
balance_sheet = None
|
|
income_statement = None
|
|
cash_flow = None
|
|
|
|
if cached_data and "financial_statements" in cached_data:
|
|
statements = cached_data["financial_statements"]
|
|
|
|
if "balance_sheet" in statements:
|
|
balance_sheet = FinancialStatement(**statements["balance_sheet"])
|
|
if "income_statement" in statements:
|
|
income_statement = FinancialStatement(**statements["income_statement"])
|
|
if "cash_flow" in statements:
|
|
cash_flow = FinancialStatement(**statements["cash_flow"])
|
|
|
|
return balance_sheet, income_statement, cash_flow
|
|
|
|
def _get_fundamental_data_local_first(
|
|
self, symbol: str, start_date: str, end_date: str, frequency: str
|
|
) -> tuple[
|
|
FinancialStatement | None,
|
|
FinancialStatement | None,
|
|
FinancialStatement | None,
|
|
str,
|
|
]:
|
|
"""Get fundamental data using local-first strategy: check local data first, fetch missing if needed."""
|
|
try:
|
|
# Check if we have sufficient local data
|
|
if self.repository.has_data_for_period(
|
|
symbol, start_date, end_date, frequency=frequency
|
|
):
|
|
logger.info(
|
|
f"Using local fundamental data for {symbol} ({start_date} to {end_date})"
|
|
)
|
|
cached_data = self.repository.get_data(
|
|
symbol=symbol,
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
frequency=frequency,
|
|
)
|
|
balance_sheet, income_statement, cash_flow = (
|
|
self._parse_cached_statements(cached_data)
|
|
)
|
|
return balance_sheet, income_statement, cash_flow, "local_cache"
|
|
|
|
# We don't have sufficient local data - need to fetch from APIs
|
|
logger.info(
|
|
f"Local data insufficient, fetching from APIs for {symbol} ({start_date} to {end_date})"
|
|
)
|
|
balance_sheet, income_statement, cash_flow, _ = (
|
|
self._fetch_fresh_fundamental_data(
|
|
symbol, start_date, end_date, frequency
|
|
)
|
|
)
|
|
|
|
# Cache the fresh data
|
|
if any([balance_sheet, income_statement, cash_flow]):
|
|
try:
|
|
cache_data = {
|
|
"symbol": symbol,
|
|
"frequency": frequency,
|
|
"financial_statements": {},
|
|
"metadata": {"cached_at": datetime.utcnow().isoformat()},
|
|
}
|
|
|
|
if balance_sheet:
|
|
cache_data["financial_statements"]["balance_sheet"] = (
|
|
balance_sheet.model_dump()
|
|
)
|
|
if income_statement:
|
|
cache_data["financial_statements"]["income_statement"] = (
|
|
income_statement.model_dump()
|
|
)
|
|
if cash_flow:
|
|
cache_data["financial_statements"]["cash_flow"] = (
|
|
cash_flow.model_dump()
|
|
)
|
|
|
|
self.repository.store_data(symbol, cache_data, frequency=frequency)
|
|
logger.debug(f"Cached fresh fundamental data for {symbol}")
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"Failed to cache fundamental data for {symbol}: {e}"
|
|
)
|
|
|
|
return balance_sheet, income_statement, cash_flow, "live_api"
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error fetching fundamental data for {symbol}: {e}")
|
|
return None, None, None, "error"
|
|
|
|
def _fetch_and_cache_fresh_fundamental_data(
|
|
self, symbol: str, start_date: str, end_date: str, frequency: str
|
|
) -> tuple[
|
|
FinancialStatement | None,
|
|
FinancialStatement | None,
|
|
FinancialStatement | None,
|
|
str,
|
|
]:
|
|
"""Force fetch fresh fundamental data from APIs and cache it, bypassing local data."""
|
|
try:
|
|
logger.info(
|
|
f"Force refreshing fundamental data from APIs for {symbol} ({start_date} to {end_date})"
|
|
)
|
|
|
|
# Clear existing data
|
|
try:
|
|
self.repository.clear_data(
|
|
symbol, start_date, end_date, frequency=frequency
|
|
)
|
|
logger.debug(f"Cleared existing fundamental data for {symbol}")
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"Failed to clear existing fundamental data for {symbol}: {e}"
|
|
)
|
|
|
|
# Fetch fresh data
|
|
balance_sheet, income_statement, cash_flow, _ = (
|
|
self._fetch_fresh_fundamental_data(
|
|
symbol, start_date, end_date, frequency
|
|
)
|
|
)
|
|
|
|
# Cache the fresh data
|
|
if any([balance_sheet, income_statement, cash_flow]):
|
|
try:
|
|
cache_data = {
|
|
"symbol": symbol,
|
|
"frequency": frequency,
|
|
"financial_statements": {},
|
|
"metadata": {"refreshed_at": datetime.utcnow().isoformat()},
|
|
}
|
|
|
|
if balance_sheet:
|
|
cache_data["financial_statements"]["balance_sheet"] = (
|
|
balance_sheet.model_dump()
|
|
)
|
|
if income_statement:
|
|
cache_data["financial_statements"]["income_statement"] = (
|
|
income_statement.model_dump()
|
|
)
|
|
if cash_flow:
|
|
cache_data["financial_statements"]["cash_flow"] = (
|
|
cash_flow.model_dump()
|
|
)
|
|
|
|
self.repository.store_data(
|
|
symbol, cache_data, frequency=frequency, overwrite=True
|
|
)
|
|
logger.debug(f"Cached refreshed fundamental data for {symbol}")
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"Failed to cache refreshed fundamental data for {symbol}: {e}"
|
|
)
|
|
|
|
return balance_sheet, income_statement, cash_flow, "live_api_refresh"
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error force refreshing fundamental data for {symbol}: {e}")
|
|
return None, None, None, "refresh_error"
|
|
|
|
def _fetch_fresh_fundamental_data(
|
|
self, symbol: str, start_date: str, end_date: str, frequency: str
|
|
) -> tuple[
|
|
FinancialStatement | None,
|
|
FinancialStatement | None,
|
|
FinancialStatement | None,
|
|
str,
|
|
]:
|
|
"""Fetch fresh fundamental data from APIs."""
|
|
balance_sheet = None
|
|
income_statement = None
|
|
cash_flow = None
|
|
|
|
if self.is_online() and self.finnhub_client:
|
|
# Parse end_date string to date object for client calls
|
|
try:
|
|
end_date_obj = date.fromisoformat(end_date)
|
|
except ValueError as e:
|
|
logger.error(f"Invalid end_date format '{end_date}': {e}")
|
|
return balance_sheet, income_statement, cash_flow, "date_error"
|
|
|
|
# Get financial statements from Finnhub client
|
|
balance_sheet = self._get_balance_sheet(symbol, frequency, end_date_obj)
|
|
income_statement = self._get_income_statement(
|
|
symbol, frequency, end_date_obj
|
|
)
|
|
cash_flow = self._get_cash_flow(symbol, frequency, end_date_obj)
|
|
|
|
return balance_sheet, income_statement, cash_flow, "live_api"
|
|
|
|
def _calculate_key_ratios(
|
|
self,
|
|
balance_sheet: FinancialStatement | None,
|
|
income_statement: FinancialStatement | None,
|
|
cash_flow: FinancialStatement | None,
|
|
) -> dict[str, float]:
|
|
"""Calculate key financial ratios from financial statements."""
|
|
ratios = {}
|
|
|
|
try:
|
|
# Extract data from statements
|
|
bs_data = balance_sheet.data if balance_sheet else {}
|
|
is_data = income_statement.data if income_statement else {}
|
|
|
|
# Liquidity Ratios
|
|
if (
|
|
"Total Current Assets" in bs_data
|
|
and "Total Current Liabilities" in bs_data
|
|
):
|
|
current_liabilities = bs_data["Total Current Liabilities"]
|
|
if current_liabilities > 0:
|
|
ratios["current_ratio"] = (
|
|
bs_data["Total Current Assets"] / current_liabilities
|
|
)
|
|
|
|
# Quick ratio (more conservative)
|
|
if all(
|
|
k in bs_data
|
|
for k in [
|
|
"Cash and Cash Equivalents",
|
|
"Short-term Investments",
|
|
"Accounts Receivable",
|
|
"Total Current Liabilities",
|
|
]
|
|
):
|
|
quick_assets = (
|
|
bs_data["Cash and Cash Equivalents"]
|
|
+ bs_data.get("Short-term Investments", 0)
|
|
+ bs_data["Accounts Receivable"]
|
|
)
|
|
current_liabilities = bs_data["Total Current Liabilities"]
|
|
if current_liabilities > 0:
|
|
ratios["quick_ratio"] = quick_assets / current_liabilities
|
|
|
|
# Cash ratio
|
|
if (
|
|
"Cash and Cash Equivalents" in bs_data
|
|
and "Total Current Liabilities" in bs_data
|
|
):
|
|
current_liabilities = bs_data["Total Current Liabilities"]
|
|
if current_liabilities > 0:
|
|
cash_and_equivalents = bs_data[
|
|
"Cash and Cash Equivalents"
|
|
] + bs_data.get("Short-term Investments", 0)
|
|
ratios["cash_ratio"] = cash_and_equivalents / current_liabilities
|
|
|
|
# Leverage Ratios
|
|
if "Long-term Debt" in bs_data and "Total Shareholders Equity" in bs_data:
|
|
equity = bs_data["Total Shareholders Equity"]
|
|
if equity > 0:
|
|
ratios["debt_to_equity"] = bs_data["Long-term Debt"] / equity
|
|
|
|
if "Long-term Debt" in bs_data and "Total Assets" in bs_data:
|
|
assets = bs_data["Total Assets"]
|
|
if assets > 0:
|
|
ratios["debt_to_assets"] = bs_data["Long-term Debt"] / assets
|
|
|
|
if "Total Assets" in bs_data and "Total Shareholders Equity" in bs_data:
|
|
equity = bs_data["Total Shareholders Equity"]
|
|
if equity > 0:
|
|
ratios["equity_multiplier"] = bs_data["Total Assets"] / equity
|
|
|
|
# Profitability Ratios
|
|
if "Total Revenue" in is_data and "Cost of Revenue" in is_data:
|
|
revenue = is_data["Total Revenue"]
|
|
if revenue > 0:
|
|
ratios["gross_margin"] = (
|
|
revenue - is_data["Cost of Revenue"]
|
|
) / revenue
|
|
|
|
if "Operating Income" in is_data and "Total Revenue" in is_data:
|
|
revenue = is_data["Total Revenue"]
|
|
if revenue > 0:
|
|
ratios["operating_margin"] = is_data["Operating Income"] / revenue
|
|
|
|
if "Net Income" in is_data and "Total Revenue" in is_data:
|
|
revenue = is_data["Total Revenue"]
|
|
if revenue > 0:
|
|
ratios["net_margin"] = is_data["Net Income"] / revenue
|
|
|
|
# Return on Equity (ROE)
|
|
if "Net Income" in is_data and "Total Shareholders Equity" in bs_data:
|
|
equity = bs_data["Total Shareholders Equity"]
|
|
if equity > 0:
|
|
ratios["roe"] = is_data["Net Income"] / equity
|
|
|
|
# Return on Assets (ROA)
|
|
if "Net Income" in is_data and "Total Assets" in bs_data:
|
|
assets = bs_data["Total Assets"]
|
|
if assets > 0:
|
|
ratios["roa"] = is_data["Net Income"] / assets
|
|
|
|
# Efficiency Ratios
|
|
if "Total Revenue" in is_data and "Total Assets" in bs_data:
|
|
assets = bs_data["Total Assets"]
|
|
if assets > 0:
|
|
ratios["asset_turnover"] = is_data["Total Revenue"] / assets
|
|
|
|
# Inventory turnover
|
|
if "Cost of Revenue" in is_data and "Inventory" in bs_data:
|
|
inventory = bs_data["Inventory"]
|
|
if inventory > 0:
|
|
ratios["inventory_turnover"] = (
|
|
is_data["Cost of Revenue"] / inventory
|
|
)
|
|
|
|
# Receivables turnover
|
|
if "Total Revenue" in is_data and "Accounts Receivable" in bs_data:
|
|
receivables = bs_data["Accounts Receivable"]
|
|
if receivables > 0:
|
|
ratios["receivables_turnover"] = (
|
|
is_data["Total Revenue"] / receivables
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error calculating financial ratios: {e}")
|
|
|
|
return ratios
|
|
|
|
def _handle_partial_statements(
|
|
self,
|
|
symbol: str,
|
|
start_date: str,
|
|
end_date: str,
|
|
frequency: str,
|
|
balance_sheet: FinancialStatement | None,
|
|
income_statement: FinancialStatement | None,
|
|
cash_flow: FinancialStatement | None,
|
|
key_ratios: dict[str, float],
|
|
data_quality: DataQuality,
|
|
data_source: str,
|
|
force_refresh: bool,
|
|
error_info: dict[str, Any],
|
|
) -> FundamentalContext:
|
|
"""Create context even if some statements are missing.
|
|
|
|
- If all statements fail: Raise exception
|
|
- If some statements succeed: Return partial context
|
|
- Mark missing statements in metadata
|
|
"""
|
|
statement_count = sum(
|
|
[
|
|
balance_sheet is not None,
|
|
income_statement is not None,
|
|
cash_flow is not None,
|
|
]
|
|
)
|
|
|
|
# If all statements failed, raise exception
|
|
if statement_count == 0 and data_source not in ["local_cache"]:
|
|
error_msg = f"Failed to fetch any financial statements for {symbol}"
|
|
if error_info:
|
|
error_msg += f": {error_info.get('error', 'Unknown error')}"
|
|
raise ValueError(error_msg)
|
|
|
|
# Create metadata with partial data information
|
|
metadata = {
|
|
"data_quality": data_quality,
|
|
"service": "fundamental_data",
|
|
"online_mode": self.is_online(),
|
|
"frequency": frequency,
|
|
"data_source": data_source,
|
|
"force_refresh": force_refresh,
|
|
"has_balance_sheet": balance_sheet is not None,
|
|
"has_income_statement": income_statement is not None,
|
|
"has_cash_flow": cash_flow is not None,
|
|
"partial_data": statement_count < 3,
|
|
"statement_count": statement_count,
|
|
**error_info,
|
|
}
|
|
|
|
return FundamentalContext(
|
|
symbol=symbol,
|
|
period={"start": start_date, "end": end_date},
|
|
balance_sheet=balance_sheet,
|
|
income_statement=income_statement,
|
|
cash_flow=cash_flow,
|
|
key_ratios=key_ratios,
|
|
metadata=metadata,
|
|
)
|
|
|
|
def detect_fundamental_gaps(
|
|
self, symbol: str, start_date: str, end_date: str, frequency: str
|
|
) -> list[str]:
|
|
"""
|
|
Returns list of report dates that need fetching.
|
|
|
|
Example: If requesting quarterly from 2024-01-01 to 2024-12-31
|
|
and cache has Q1 and Q3, returns ["2024-06-30", "2024-09-30", "2024-12-31"]
|
|
|
|
For quarterly: Check for Q1 (Mar 31), Q2 (Jun 30), Q3 (Sep 30), Q4 (Dec 31)
|
|
For annual: Check for fiscal year ends
|
|
"""
|
|
try:
|
|
start_dt = date.fromisoformat(start_date)
|
|
end_dt = date.fromisoformat(end_date)
|
|
except ValueError:
|
|
logger.error(
|
|
f"Invalid date format in gap detection: {start_date}, {end_date}"
|
|
)
|
|
return []
|
|
|
|
# Get existing data from repository
|
|
try:
|
|
cached_data = self.repository.get_data(
|
|
symbol, start_date, end_date, frequency
|
|
)
|
|
existing_dates = set()
|
|
|
|
if cached_data and "financial_statements" in cached_data:
|
|
for statement_type in [
|
|
"balance_sheet",
|
|
"income_statement",
|
|
"cash_flow",
|
|
]:
|
|
if statement_type in cached_data["financial_statements"]:
|
|
stmt = cached_data["financial_statements"][statement_type]
|
|
if "report_date" in stmt:
|
|
existing_dates.add(stmt["report_date"])
|
|
except Exception as e:
|
|
logger.warning(f"Error checking cached data for gap detection: {e}")
|
|
existing_dates = set()
|
|
|
|
# Calculate expected report dates based on frequency
|
|
expected_dates = []
|
|
current_year = start_dt.year
|
|
end_year = end_dt.year
|
|
|
|
if frequency == "quarterly":
|
|
# Standard quarterly dates: Mar 31, Jun 30, Sep 30, Dec 31
|
|
quarter_dates = [
|
|
(3, 31), # Q1
|
|
(6, 30), # Q2
|
|
(9, 30), # Q3
|
|
(12, 31), # Q4
|
|
]
|
|
|
|
for year in range(current_year, end_year + 1):
|
|
for month, day in quarter_dates:
|
|
report_date = date(year, month, day)
|
|
if start_dt <= report_date <= end_dt:
|
|
expected_dates.append(report_date.isoformat())
|
|
|
|
elif frequency == "annual":
|
|
# Standard fiscal year end: Dec 31
|
|
for year in range(current_year, end_year + 1):
|
|
report_date = date(year, 12, 31)
|
|
if start_dt <= report_date <= end_dt:
|
|
expected_dates.append(report_date.isoformat())
|
|
|
|
# Return dates that are expected but not in cache
|
|
missing_dates = [d for d in expected_dates if d not in existing_dates]
|
|
|
|
if missing_dates:
|
|
logger.info(
|
|
f"Gap detection for {symbol}: missing {len(missing_dates)} report periods"
|
|
)
|
|
|
|
return missing_dates
|
|
|
|
def _determine_data_quality(
|
|
self, data_source: str, statement_count: int, has_errors: bool = False
|
|
) -> DataQuality:
|
|
"""Determine data quality based on source, statement count, and errors."""
|
|
if has_errors or statement_count == 0:
|
|
return DataQuality.LOW
|
|
|
|
if data_source in ["local_cache", "error", "refresh_error"]:
|
|
return DataQuality.LOW
|
|
elif data_source in ["live_api", "live_api_refresh"]:
|
|
if statement_count == 3:
|
|
return DataQuality.HIGH # All three statements available
|
|
elif statement_count == 2:
|
|
return DataQuality.MEDIUM # Two statements available
|
|
else:
|
|
return DataQuality.LOW # One or no statements
|
|
else:
|
|
return DataQuality.MEDIUM
|