71 lines
3.1 KiB
Python
71 lines
3.1 KiB
Python
"""SimFin data provider for fundamental financial statements."""
|
|
|
|
import os
|
|
|
|
import pandas as pd
|
|
|
|
|
|
def _setup():
|
|
"""Configure SimFin API key and data directory, return module."""
|
|
import simfin as sf
|
|
sf.set_api_key(os.environ.get("SIMFIN_API_KEY", ""))
|
|
sf.set_data_dir("/tmp/simfin_data/")
|
|
return sf
|
|
|
|
|
|
def _filter_and_format(df: pd.DataFrame, ticker: str, curr_date: str, freq: str, label: str, description: str) -> str:
|
|
"""Filter a SimFin DataFrame by ticker and publish date, return formatted string."""
|
|
df["Report Date"] = pd.to_datetime(df["Report Date"], utc=True).dt.normalize()
|
|
df["Publish Date"] = pd.to_datetime(df["Publish Date"], utc=True).dt.normalize()
|
|
curr_date_dt = pd.to_datetime(curr_date, utc=True).normalize()
|
|
|
|
filtered = df[(df["Ticker"] == ticker) & (df["Publish Date"] <= curr_date_dt)]
|
|
|
|
if filtered.empty:
|
|
return f"No {label} available for {ticker} before {curr_date}."
|
|
|
|
latest = filtered.loc[filtered["Publish Date"].idxmax()]
|
|
if "SimFinId" in latest.index:
|
|
latest = latest.drop("SimFinId")
|
|
|
|
publish_date = str(latest["Publish Date"])[:10]
|
|
return (
|
|
f"## {freq} {label} for {ticker} released on {publish_date}:\n"
|
|
+ str(latest)
|
|
+ f"\n\n{description}"
|
|
)
|
|
|
|
|
|
def get_balance_sheet(ticker: str, freq: str, curr_date: str) -> str:
|
|
"""Retrieve balance sheet from SimFin."""
|
|
df = _setup().load_balance(variant=freq, market="us")
|
|
return _filter_and_format(
|
|
df, ticker, curr_date, freq, "balance sheet",
|
|
"This includes metadata like reporting dates and currency, share details, and a breakdown of assets, "
|
|
"liabilities, and equity. Assets are grouped as current (liquid items like cash and receivables) and "
|
|
"noncurrent (long-term investments and property). Liabilities are split between short-term obligations "
|
|
"and long-term debts, while equity reflects shareholder funds such as paid-in capital and retained earnings.",
|
|
)
|
|
|
|
|
|
def get_cashflow(ticker: str, freq: str, curr_date: str) -> str:
|
|
"""Retrieve cash flow statement from SimFin."""
|
|
df = _setup().load_cashflow(variant=freq, market="us")
|
|
return _filter_and_format(
|
|
df, ticker, curr_date, freq, "cash flow statement",
|
|
"Operating activities show cash generated from core business operations. Investing activities cover asset "
|
|
"acquisitions/disposals. Financing activities include debt transactions and dividend payments. The net change "
|
|
"in cash represents the overall increase or decrease in the company's cash position.",
|
|
)
|
|
|
|
|
|
def get_income_statement(ticker: str, freq: str, curr_date: str) -> str:
|
|
"""Retrieve income statement from SimFin."""
|
|
df = _setup().load_income(variant=freq, market="us")
|
|
return _filter_and_format(
|
|
df, ticker, curr_date, freq, "income statement",
|
|
"Starting with Revenue, it shows Cost of Revenue and resulting Gross Profit. Operating Expenses are detailed, "
|
|
"including SG&A, R&D, and Depreciation. The statement shows Operating Income, followed by non-operating items "
|
|
"leading to Pretax Income. After accounting for Income Tax, it concludes with Net Income.",
|
|
)
|