TradingAgents/tradingagents/dataflows/yfinance_news.py

278 lines
9.5 KiB
Python

"""yfinance-based news, macro, and sentiment helpers."""
from __future__ import annotations
from datetime import datetime, timezone
from dateutil.relativedelta import relativedelta
import yfinance as yf
from .news_models import (
NewsItem,
dedupe_news_items,
filter_news_items_by_date,
format_news_items_report,
normalize_datetime,
)
from .stockstats_utils import yf_retry
_TICKER_NEWS_FETCH_COUNTS = (20, 50, 100)
_MAX_FILTERED_TICKER_ARTICLES = 25
_GLOBAL_QUERY_PRESETS = {
"US": [
"stock market economy",
"Federal Reserve interest rates",
"inflation economic outlook",
"global markets trading",
],
"KR": [
"한국 증시",
"한국은행 기준금리",
"원달러 환율",
"반도체 수출",
],
"GLOBAL": [
"stock market economy",
"global markets trading",
"economy monetary policy",
"inflation growth outlook",
],
}
def _extract_article_fields(article: dict) -> dict:
"""Extract article data from yfinance news format."""
if "content" in article:
content = article["content"]
provider = content.get("provider", {})
url_obj = content.get("canonicalUrl") or content.get("clickThroughUrl") or {}
return {
"title": content.get("title", "No title"),
"summary": content.get("summary", ""),
"publisher": provider.get("displayName", "Unknown"),
"link": url_obj.get("url", ""),
"pub_date": normalize_datetime(content.get("pubDate")),
"raw_symbols": content.get("relatedTickers") or [],
}
return {
"title": article.get("title", "No title"),
"summary": article.get("summary", ""),
"publisher": article.get("publisher", "Unknown"),
"link": article.get("link", ""),
"pub_date": normalize_datetime(article.get("providerPublishTime")),
"raw_symbols": article.get("relatedTickers") or [],
}
def normalize_yfinance_article(article: dict, *, fallback_symbol: str | None = None, country: str | None = None) -> NewsItem:
data = _extract_article_fields(article)
symbols = [str(symbol).upper() for symbol in data["raw_symbols"] if str(symbol).strip()]
if fallback_symbol and fallback_symbol.upper() not in symbols:
symbols.append(fallback_symbol.upper())
return NewsItem(
title=data["title"],
source=data["publisher"],
published_at=data["pub_date"],
language=None,
country=country,
symbols=symbols,
topic_tags=[],
sentiment=None,
relevance=None,
reliability=None,
url=data["link"],
summary=data["summary"],
raw_vendor="yfinance",
)
def _collect_ticker_news(
ticker: str,
start_dt: datetime,
) -> tuple[list[NewsItem], datetime | None, datetime | None]:
"""Fetch increasingly larger ticker feeds until the requested window is covered."""
collected: list[NewsItem] = []
oldest_pub_date = None
newest_pub_date = None
for count in _TICKER_NEWS_FETCH_COUNTS:
news = yf_retry(lambda batch_size=count: yf.Ticker(ticker).get_news(count=batch_size))
if not news:
continue
batch = dedupe_news_items(
[normalize_yfinance_article(article, fallback_symbol=ticker) for article in news]
)
for item in batch:
collected.append(item)
pub_date = item.published_at
if pub_date:
if newest_pub_date is None or pub_date > newest_pub_date:
newest_pub_date = pub_date
if oldest_pub_date is None or pub_date < oldest_pub_date:
oldest_pub_date = pub_date
if oldest_pub_date and oldest_pub_date.replace(tzinfo=None) <= start_dt:
break
if len(news) < count:
break
collected = dedupe_news_items(collected)
collected.sort(
key=lambda article: article.published_at.timestamp() if article.published_at else float("-inf"),
reverse=True,
)
return collected, oldest_pub_date, newest_pub_date
def _format_coverage_note(oldest_pub_date: datetime | None, newest_pub_date: datetime | None) -> str:
if oldest_pub_date and newest_pub_date:
return (
"; the current yfinance ticker feed only covered "
f"{oldest_pub_date.strftime('%Y-%m-%d')} to {newest_pub_date.strftime('%Y-%m-%d')} at query time"
)
if oldest_pub_date:
return f"; the current yfinance ticker feed only reached back to {oldest_pub_date.strftime('%Y-%m-%d')}"
if newest_pub_date:
return f"; the current yfinance ticker feed only returned articles up to {newest_pub_date.strftime('%Y-%m-%d')}"
return ""
def fetch_company_news_yfinance(
ticker: str,
start_date: str,
end_date: str,
) -> tuple[list[NewsItem], datetime | None, datetime | None]:
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
end_dt = datetime.strptime(end_date, "%Y-%m-%d") + relativedelta(days=1)
articles, oldest_pub_date, newest_pub_date = _collect_ticker_news(ticker, start_dt)
filtered = filter_news_items_by_date(articles, start_date=start_dt, end_date=end_dt)
return filtered[:_MAX_FILTERED_TICKER_ARTICLES], oldest_pub_date, newest_pub_date
def get_company_news_yfinance(
ticker: str,
start_date: str,
end_date: str,
) -> str:
try:
filtered, oldest_pub_date, newest_pub_date = fetch_company_news_yfinance(ticker, start_date, end_date)
if not filtered:
coverage_note = _format_coverage_note(oldest_pub_date, newest_pub_date)
return f"No news found for {ticker} between {start_date} and {end_date}{coverage_note}"
return format_news_items_report(
f"{ticker} Company News, from {start_date} to {end_date}",
filtered,
max_items=_MAX_FILTERED_TICKER_ARTICLES,
)
except Exception as exc:
return f"Error fetching news for {ticker}: {exc}"
def _get_query_preset(region: str | None) -> list[str]:
if not region:
return _GLOBAL_QUERY_PRESETS["GLOBAL"]
return _GLOBAL_QUERY_PRESETS.get(region.upper(), _GLOBAL_QUERY_PRESETS["GLOBAL"])
def fetch_macro_news_yfinance(
curr_date: str,
look_back_days: int = 7,
limit: int = 10,
region: str | None = None,
language: str | None = None,
) -> list[NewsItem]:
curr_dt = datetime.strptime(curr_date, "%Y-%m-%d")
start_dt = curr_dt - relativedelta(days=look_back_days)
country = (region or "GLOBAL").upper()
all_news: list[NewsItem] = []
for query in _get_query_preset(region):
search = yf_retry(
lambda q=query: yf.Search(
query=q if not language else f"{q} {language}",
news_count=limit,
enable_fuzzy_query=True,
)
)
search_news = getattr(search, "news", None) or []
batch = [normalize_yfinance_article(article, country=country) for article in search_news]
all_news.extend(batch)
if len(all_news) >= limit * len(_get_query_preset(region)):
break
filtered = []
for item in dedupe_news_items(all_news):
if item.published_at:
published = item.published_at.replace(tzinfo=None)
if published < start_dt or published > curr_dt + relativedelta(days=1):
continue
filtered.append(item)
filtered.sort(
key=lambda article: article.published_at.timestamp() if article.published_at else float("-inf"),
reverse=True,
)
return filtered[:limit]
def get_macro_news_yfinance(
curr_date: str,
look_back_days: int = 7,
limit: int = 10,
region: str | None = None,
language: str | None = None,
) -> str:
try:
items = fetch_macro_news_yfinance(
curr_date,
look_back_days=look_back_days,
limit=limit,
region=region,
language=language,
)
if not items:
return f"No global news found for {curr_date}"
start_date = (datetime.strptime(curr_date, "%Y-%m-%d") - relativedelta(days=look_back_days)).strftime("%Y-%m-%d")
region_label = (region or "GLOBAL").upper()
return format_news_items_report(
f"{region_label} Macro News, from {start_date} to {curr_date}",
items,
max_items=limit,
)
except Exception as exc:
return f"Error fetching global news: {exc}"
def get_social_sentiment_yfinance(
symbol: str,
start_date: str,
end_date: str,
) -> str:
articles, _, _ = fetch_company_news_yfinance(symbol, start_date, end_date)
if not articles:
return (
f"Dedicated social provider unavailable; no news-derived sentiment was found for {symbol} "
f"between {start_date} and {end_date}."
)
report_lines = [
f"Dedicated social provider unavailable; using news-derived sentiment for {symbol} from {start_date} to {end_date}.",
"Use this as public-narrative context rather than a literal social-media feed.",
"",
]
for item in articles[:10]:
date_prefix = item.published_at.strftime("%Y-%m-%d") if item.published_at else "undated"
summary = item.summary or "No summary available."
report_lines.append(f"- {date_prefix}: {item.title} ({item.source})")
report_lines.append(f" Narrative: {summary}")
return "\n".join(report_lines)
# Backward-compatible aliases
get_news_yfinance = get_company_news_yfinance
get_global_news_yfinance = get_macro_news_yfinance