feat: integrate Finnhub and SimFin data providers for news and financial statements
This commit is contained in:
parent
10c136f49c
commit
dac4581d10
|
|
@ -26,6 +26,9 @@ dependencies = [
|
|||
"rich>=14.0.0",
|
||||
"typer>=0.21.0",
|
||||
"setuptools>=80.9.0",
|
||||
"finnhub-python>=2.4.20",
|
||||
"python-dateutil>=2.9.0",
|
||||
"simfin>=1.0.3",
|
||||
"stockstats>=0.6.5",
|
||||
"tqdm>=4.67.1",
|
||||
"typing-extensions>=4.14.0",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,90 @@
|
|||
"""Finnhub data provider for news and insider data."""
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
|
||||
def _get_client():
|
||||
"""Lazily create the Finnhub client."""
|
||||
import finnhub
|
||||
return finnhub.Client(api_key=os.environ.get("FINNHUB_API_KEY", ""))
|
||||
|
||||
|
||||
def get_news(
|
||||
ticker: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
) -> str:
|
||||
"""Retrieve company news from Finnhub for a date range."""
|
||||
result = _get_client().company_news(ticker, _from=start_date, to=end_date)
|
||||
|
||||
if not result:
|
||||
return ""
|
||||
|
||||
news_entries = []
|
||||
for entry in result:
|
||||
timestamp = entry.get("datetime", 0)
|
||||
date_str = datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d") if timestamp else "Unknown Date"
|
||||
headline = entry.get("headline", "No headline")
|
||||
summary = entry.get("summary", "No summary")
|
||||
news_entries.append(f"### {headline} ({date_str})\n{summary}")
|
||||
|
||||
return f"## {ticker} News, from {start_date} to {end_date}:\n" + "\n\n".join(news_entries)
|
||||
|
||||
|
||||
def get_global_news(
|
||||
curr_date: str,
|
||||
look_back_days: int = 7,
|
||||
limit: int = 5,
|
||||
) -> str:
|
||||
"""Retrieve general market news from Finnhub."""
|
||||
result = _get_client().general_news("general", min_id=0)
|
||||
|
||||
if not result:
|
||||
return ""
|
||||
|
||||
news_entries = []
|
||||
for entry in result[:limit]:
|
||||
timestamp = entry.get("datetime", 0)
|
||||
date_str = datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d") if timestamp else "Unknown Date"
|
||||
headline = entry.get("headline", "No headline")
|
||||
summary = entry.get("summary", "No summary")
|
||||
news_entries.append(f"### {headline} ({date_str})\n{summary}")
|
||||
|
||||
return "## General Market News:\n" + "\n\n".join(news_entries)
|
||||
|
||||
|
||||
def get_insider_transactions(
|
||||
symbol: str,
|
||||
) -> str:
|
||||
"""Retrieve insider transactions from Finnhub (last 90 days)."""
|
||||
curr_dt = datetime.now()
|
||||
before_str = (curr_dt - relativedelta(days=90)).strftime("%Y-%m-%d")
|
||||
curr_str = curr_dt.strftime("%Y-%m-%d")
|
||||
|
||||
data = _get_client().stock_insider_transactions(symbol, before_str, curr_str)
|
||||
|
||||
if not data or "data" not in data or not data["data"]:
|
||||
return ""
|
||||
|
||||
result_str = ""
|
||||
seen = []
|
||||
for entry in data["data"]:
|
||||
if entry not in seen:
|
||||
result_str += (
|
||||
f"### Filing Date: {entry['filingDate']}, {entry['name']}:\n"
|
||||
f"Change: {entry['change']}\n"
|
||||
f"Shares: {entry['share']}\n"
|
||||
f"Transaction Price: {entry['transactionPrice']}\n"
|
||||
f"Transaction Code: {entry['transactionCode']}\n\n"
|
||||
)
|
||||
seen.append(entry)
|
||||
|
||||
return (
|
||||
f"## {symbol} insider transactions from {before_str} to {curr_str}:\n"
|
||||
+ result_str
|
||||
+ "The change field reflects the variation in share count—a negative number indicates a reduction in holdings. "
|
||||
"The transactionCode (e.g., S for sale) clarifies the nature of the transaction."
|
||||
)
|
||||
|
|
@ -11,6 +11,16 @@ from .y_finance import (
|
|||
get_insider_transactions as get_yfinance_insider_transactions,
|
||||
)
|
||||
from .yfinance_news import get_news_yfinance, get_global_news_yfinance
|
||||
from .finnhub_news import (
|
||||
get_news as get_finnhub_news,
|
||||
get_global_news as get_finnhub_global_news,
|
||||
get_insider_transactions as get_finnhub_insider_transactions,
|
||||
)
|
||||
from .simfin import (
|
||||
get_balance_sheet as get_simfin_balance_sheet,
|
||||
get_cashflow as get_simfin_cashflow,
|
||||
get_income_statement as get_simfin_income_statement,
|
||||
)
|
||||
from .alpha_vantage import (
|
||||
get_stock as get_alpha_vantage_stock,
|
||||
get_indicator as get_alpha_vantage_indicator,
|
||||
|
|
@ -63,6 +73,8 @@ TOOLS_CATEGORIES = {
|
|||
VENDOR_LIST = [
|
||||
"yfinance",
|
||||
"alpha_vantage",
|
||||
"finnhub",
|
||||
"simfin",
|
||||
]
|
||||
|
||||
# Mapping of methods to their vendor-specific implementations
|
||||
|
|
@ -85,27 +97,33 @@ VENDOR_METHODS = {
|
|||
"get_balance_sheet": {
|
||||
"alpha_vantage": get_alpha_vantage_balance_sheet,
|
||||
"yfinance": get_yfinance_balance_sheet,
|
||||
"simfin": get_simfin_balance_sheet,
|
||||
},
|
||||
"get_cashflow": {
|
||||
"alpha_vantage": get_alpha_vantage_cashflow,
|
||||
"yfinance": get_yfinance_cashflow,
|
||||
"simfin": get_simfin_cashflow,
|
||||
},
|
||||
"get_income_statement": {
|
||||
"alpha_vantage": get_alpha_vantage_income_statement,
|
||||
"yfinance": get_yfinance_income_statement,
|
||||
"simfin": get_simfin_income_statement,
|
||||
},
|
||||
# news_data
|
||||
"get_news": {
|
||||
"alpha_vantage": get_alpha_vantage_news,
|
||||
"yfinance": get_news_yfinance,
|
||||
"finnhub": get_finnhub_news,
|
||||
},
|
||||
"get_global_news": {
|
||||
"yfinance": get_global_news_yfinance,
|
||||
"alpha_vantage": get_alpha_vantage_global_news,
|
||||
"finnhub": get_finnhub_global_news,
|
||||
},
|
||||
"get_insider_transactions": {
|
||||
"alpha_vantage": get_alpha_vantage_insider_transactions,
|
||||
"yfinance": get_yfinance_insider_transactions,
|
||||
"finnhub": get_finnhub_insider_transactions,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,70 @@
|
|||
"""SimFin data provider for fundamental financial statements."""
|
||||
|
||||
import os
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def _setup():
|
||||
"""Configure SimFin API key and data directory, return module."""
|
||||
import simfin as sf
|
||||
sf.set_api_key(os.environ.get("SIMFIN_API_KEY", ""))
|
||||
sf.set_data_dir("/tmp/simfin_data/")
|
||||
return sf
|
||||
|
||||
|
||||
def _filter_and_format(df: pd.DataFrame, ticker: str, curr_date: str, freq: str, label: str, description: str) -> str:
|
||||
"""Filter a SimFin DataFrame by ticker and publish date, return formatted string."""
|
||||
df["Report Date"] = pd.to_datetime(df["Report Date"], utc=True).dt.normalize()
|
||||
df["Publish Date"] = pd.to_datetime(df["Publish Date"], utc=True).dt.normalize()
|
||||
curr_date_dt = pd.to_datetime(curr_date, utc=True).normalize()
|
||||
|
||||
filtered = df[(df["Ticker"] == ticker) & (df["Publish Date"] <= curr_date_dt)]
|
||||
|
||||
if filtered.empty:
|
||||
return f"No {label} available for {ticker} before {curr_date}."
|
||||
|
||||
latest = filtered.loc[filtered["Publish Date"].idxmax()]
|
||||
if "SimFinId" in latest.index:
|
||||
latest = latest.drop("SimFinId")
|
||||
|
||||
publish_date = str(latest["Publish Date"])[:10]
|
||||
return (
|
||||
f"## {freq} {label} for {ticker} released on {publish_date}:\n"
|
||||
+ str(latest)
|
||||
+ f"\n\n{description}"
|
||||
)
|
||||
|
||||
|
||||
def get_balance_sheet(ticker: str, freq: str, curr_date: str) -> str:
|
||||
"""Retrieve balance sheet from SimFin."""
|
||||
df = _setup().load_balance(variant=freq, market="us")
|
||||
return _filter_and_format(
|
||||
df, ticker, curr_date, freq, "balance sheet",
|
||||
"This includes metadata like reporting dates and currency, share details, and a breakdown of assets, "
|
||||
"liabilities, and equity. Assets are grouped as current (liquid items like cash and receivables) and "
|
||||
"noncurrent (long-term investments and property). Liabilities are split between short-term obligations "
|
||||
"and long-term debts, while equity reflects shareholder funds such as paid-in capital and retained earnings.",
|
||||
)
|
||||
|
||||
|
||||
def get_cashflow(ticker: str, freq: str, curr_date: str) -> str:
|
||||
"""Retrieve cash flow statement from SimFin."""
|
||||
df = _setup().load_cashflow(variant=freq, market="us")
|
||||
return _filter_and_format(
|
||||
df, ticker, curr_date, freq, "cash flow statement",
|
||||
"Operating activities show cash generated from core business operations. Investing activities cover asset "
|
||||
"acquisitions/disposals. Financing activities include debt transactions and dividend payments. The net change "
|
||||
"in cash represents the overall increase or decrease in the company's cash position.",
|
||||
)
|
||||
|
||||
|
||||
def get_income_statement(ticker: str, freq: str, curr_date: str) -> str:
|
||||
"""Retrieve income statement from SimFin."""
|
||||
df = _setup().load_income(variant=freq, market="us")
|
||||
return _filter_and_format(
|
||||
df, ticker, curr_date, freq, "income statement",
|
||||
"Starting with Revenue, it shows Cost of Revenue and resulting Gross Profit. Operating Expenses are detailed, "
|
||||
"including SG&A, R&D, and Depreciation. The statement shows Operating Income, followed by non-operating items "
|
||||
"leading to Pretax Income. After accounting for Income Tax, it concludes with Net Income.",
|
||||
)
|
||||
|
|
@ -28,8 +28,8 @@ DEFAULT_CONFIG = {
|
|||
"data_vendors": {
|
||||
"core_stock_apis": "yfinance", # Options: alpha_vantage, yfinance
|
||||
"technical_indicators": "yfinance", # Options: alpha_vantage, yfinance
|
||||
"fundamental_data": "yfinance", # Options: alpha_vantage, yfinance
|
||||
"news_data": "yfinance", # Options: alpha_vantage, yfinance
|
||||
"fundamental_data": "yfinance", # Options: alpha_vantage, yfinance, simfin
|
||||
"news_data": "yfinance", # Options: alpha_vantage, yfinance, finnhub
|
||||
},
|
||||
# Tool-level configuration (takes precedence over category-level)
|
||||
"tool_vendors": {
|
||||
|
|
|
|||
Loading…
Reference in New Issue