feat: integrate Finnhub and SimFin data providers for news and financial statements

This commit is contained in:
Kane Lin 2026-04-12 13:54:19 +08:00
parent 10c136f49c
commit dac4581d10
5 changed files with 183 additions and 2 deletions

View File

@ -26,6 +26,9 @@ dependencies = [
"rich>=14.0.0",
"typer>=0.21.0",
"setuptools>=80.9.0",
"finnhub-python>=2.4.20",
"python-dateutil>=2.9.0",
"simfin>=1.0.3",
"stockstats>=0.6.5",
"tqdm>=4.67.1",
"typing-extensions>=4.14.0",

View File

@ -0,0 +1,90 @@
"""Finnhub data provider for news and insider data."""
import os
from datetime import datetime
from dateutil.relativedelta import relativedelta
def _get_client():
"""Lazily create the Finnhub client."""
import finnhub
return finnhub.Client(api_key=os.environ.get("FINNHUB_API_KEY", ""))
def get_news(
ticker: str,
start_date: str,
end_date: str,
) -> str:
"""Retrieve company news from Finnhub for a date range."""
result = _get_client().company_news(ticker, _from=start_date, to=end_date)
if not result:
return ""
news_entries = []
for entry in result:
timestamp = entry.get("datetime", 0)
date_str = datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d") if timestamp else "Unknown Date"
headline = entry.get("headline", "No headline")
summary = entry.get("summary", "No summary")
news_entries.append(f"### {headline} ({date_str})\n{summary}")
return f"## {ticker} News, from {start_date} to {end_date}:\n" + "\n\n".join(news_entries)
def get_global_news(
curr_date: str,
look_back_days: int = 7,
limit: int = 5,
) -> str:
"""Retrieve general market news from Finnhub."""
result = _get_client().general_news("general", min_id=0)
if not result:
return ""
news_entries = []
for entry in result[:limit]:
timestamp = entry.get("datetime", 0)
date_str = datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d") if timestamp else "Unknown Date"
headline = entry.get("headline", "No headline")
summary = entry.get("summary", "No summary")
news_entries.append(f"### {headline} ({date_str})\n{summary}")
return "## General Market News:\n" + "\n\n".join(news_entries)
def get_insider_transactions(
symbol: str,
) -> str:
"""Retrieve insider transactions from Finnhub (last 90 days)."""
curr_dt = datetime.now()
before_str = (curr_dt - relativedelta(days=90)).strftime("%Y-%m-%d")
curr_str = curr_dt.strftime("%Y-%m-%d")
data = _get_client().stock_insider_transactions(symbol, before_str, curr_str)
if not data or "data" not in data or not data["data"]:
return ""
result_str = ""
seen = []
for entry in data["data"]:
if entry not in seen:
result_str += (
f"### Filing Date: {entry['filingDate']}, {entry['name']}:\n"
f"Change: {entry['change']}\n"
f"Shares: {entry['share']}\n"
f"Transaction Price: {entry['transactionPrice']}\n"
f"Transaction Code: {entry['transactionCode']}\n\n"
)
seen.append(entry)
return (
f"## {symbol} insider transactions from {before_str} to {curr_str}:\n"
+ result_str
+ "The change field reflects the variation in share count—a negative number indicates a reduction in holdings. "
"The transactionCode (e.g., S for sale) clarifies the nature of the transaction."
)

View File

@ -11,6 +11,16 @@ from .y_finance import (
get_insider_transactions as get_yfinance_insider_transactions,
)
from .yfinance_news import get_news_yfinance, get_global_news_yfinance
from .finnhub_news import (
get_news as get_finnhub_news,
get_global_news as get_finnhub_global_news,
get_insider_transactions as get_finnhub_insider_transactions,
)
from .simfin import (
get_balance_sheet as get_simfin_balance_sheet,
get_cashflow as get_simfin_cashflow,
get_income_statement as get_simfin_income_statement,
)
from .alpha_vantage import (
get_stock as get_alpha_vantage_stock,
get_indicator as get_alpha_vantage_indicator,
@ -63,6 +73,8 @@ TOOLS_CATEGORIES = {
VENDOR_LIST = [
"yfinance",
"alpha_vantage",
"finnhub",
"simfin",
]
# Mapping of methods to their vendor-specific implementations
@ -85,27 +97,33 @@ VENDOR_METHODS = {
"get_balance_sheet": {
"alpha_vantage": get_alpha_vantage_balance_sheet,
"yfinance": get_yfinance_balance_sheet,
"simfin": get_simfin_balance_sheet,
},
"get_cashflow": {
"alpha_vantage": get_alpha_vantage_cashflow,
"yfinance": get_yfinance_cashflow,
"simfin": get_simfin_cashflow,
},
"get_income_statement": {
"alpha_vantage": get_alpha_vantage_income_statement,
"yfinance": get_yfinance_income_statement,
"simfin": get_simfin_income_statement,
},
# news_data
"get_news": {
"alpha_vantage": get_alpha_vantage_news,
"yfinance": get_news_yfinance,
"finnhub": get_finnhub_news,
},
"get_global_news": {
"yfinance": get_global_news_yfinance,
"alpha_vantage": get_alpha_vantage_global_news,
"finnhub": get_finnhub_global_news,
},
"get_insider_transactions": {
"alpha_vantage": get_alpha_vantage_insider_transactions,
"yfinance": get_yfinance_insider_transactions,
"finnhub": get_finnhub_insider_transactions,
},
}

View File

@ -0,0 +1,70 @@
"""SimFin data provider for fundamental financial statements."""
import os
import pandas as pd
def _setup():
"""Configure SimFin API key and data directory, return module."""
import simfin as sf
sf.set_api_key(os.environ.get("SIMFIN_API_KEY", ""))
sf.set_data_dir("/tmp/simfin_data/")
return sf
def _filter_and_format(df: pd.DataFrame, ticker: str, curr_date: str, freq: str, label: str, description: str) -> str:
"""Filter a SimFin DataFrame by ticker and publish date, return formatted string."""
df["Report Date"] = pd.to_datetime(df["Report Date"], utc=True).dt.normalize()
df["Publish Date"] = pd.to_datetime(df["Publish Date"], utc=True).dt.normalize()
curr_date_dt = pd.to_datetime(curr_date, utc=True).normalize()
filtered = df[(df["Ticker"] == ticker) & (df["Publish Date"] <= curr_date_dt)]
if filtered.empty:
return f"No {label} available for {ticker} before {curr_date}."
latest = filtered.loc[filtered["Publish Date"].idxmax()]
if "SimFinId" in latest.index:
latest = latest.drop("SimFinId")
publish_date = str(latest["Publish Date"])[:10]
return (
f"## {freq} {label} for {ticker} released on {publish_date}:\n"
+ str(latest)
+ f"\n\n{description}"
)
def get_balance_sheet(ticker: str, freq: str, curr_date: str) -> str:
"""Retrieve balance sheet from SimFin."""
df = _setup().load_balance(variant=freq, market="us")
return _filter_and_format(
df, ticker, curr_date, freq, "balance sheet",
"This includes metadata like reporting dates and currency, share details, and a breakdown of assets, "
"liabilities, and equity. Assets are grouped as current (liquid items like cash and receivables) and "
"noncurrent (long-term investments and property). Liabilities are split between short-term obligations "
"and long-term debts, while equity reflects shareholder funds such as paid-in capital and retained earnings.",
)
def get_cashflow(ticker: str, freq: str, curr_date: str) -> str:
"""Retrieve cash flow statement from SimFin."""
df = _setup().load_cashflow(variant=freq, market="us")
return _filter_and_format(
df, ticker, curr_date, freq, "cash flow statement",
"Operating activities show cash generated from core business operations. Investing activities cover asset "
"acquisitions/disposals. Financing activities include debt transactions and dividend payments. The net change "
"in cash represents the overall increase or decrease in the company's cash position.",
)
def get_income_statement(ticker: str, freq: str, curr_date: str) -> str:
"""Retrieve income statement from SimFin."""
df = _setup().load_income(variant=freq, market="us")
return _filter_and_format(
df, ticker, curr_date, freq, "income statement",
"Starting with Revenue, it shows Cost of Revenue and resulting Gross Profit. Operating Expenses are detailed, "
"including SG&A, R&D, and Depreciation. The statement shows Operating Income, followed by non-operating items "
"leading to Pretax Income. After accounting for Income Tax, it concludes with Net Income.",
)

View File

@ -28,8 +28,8 @@ DEFAULT_CONFIG = {
"data_vendors": {
"core_stock_apis": "yfinance", # Options: alpha_vantage, yfinance
"technical_indicators": "yfinance", # Options: alpha_vantage, yfinance
"fundamental_data": "yfinance", # Options: alpha_vantage, yfinance
"news_data": "yfinance", # Options: alpha_vantage, yfinance
"fundamental_data": "yfinance", # Options: alpha_vantage, yfinance, simfin
"news_data": "yfinance", # Options: alpha_vantage, yfinance, finnhub
},
# Tool-level configuration (takes precedence over category-level)
"tool_vendors": {