278 lines
9.5 KiB
Python
278 lines
9.5 KiB
Python
"""yfinance-based news, macro, and sentiment helpers."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime, timezone
|
|
|
|
from dateutil.relativedelta import relativedelta
|
|
import yfinance as yf
|
|
|
|
from .news_models import (
|
|
NewsItem,
|
|
dedupe_news_items,
|
|
filter_news_items_by_date,
|
|
format_news_items_report,
|
|
normalize_datetime,
|
|
)
|
|
from .stockstats_utils import yf_retry
|
|
|
|
|
|
_TICKER_NEWS_FETCH_COUNTS = (20, 50, 100)
|
|
_MAX_FILTERED_TICKER_ARTICLES = 25
|
|
_GLOBAL_QUERY_PRESETS = {
|
|
"US": [
|
|
"stock market economy",
|
|
"Federal Reserve interest rates",
|
|
"inflation economic outlook",
|
|
"global markets trading",
|
|
],
|
|
"KR": [
|
|
"한국 증시",
|
|
"한국은행 기준금리",
|
|
"원달러 환율",
|
|
"반도체 수출",
|
|
],
|
|
"GLOBAL": [
|
|
"stock market economy",
|
|
"global markets trading",
|
|
"economy monetary policy",
|
|
"inflation growth outlook",
|
|
],
|
|
}
|
|
|
|
|
|
def _extract_article_fields(article: dict) -> dict:
|
|
"""Extract article data from yfinance news format."""
|
|
if "content" in article:
|
|
content = article["content"]
|
|
provider = content.get("provider", {})
|
|
url_obj = content.get("canonicalUrl") or content.get("clickThroughUrl") or {}
|
|
return {
|
|
"title": content.get("title", "No title"),
|
|
"summary": content.get("summary", ""),
|
|
"publisher": provider.get("displayName", "Unknown"),
|
|
"link": url_obj.get("url", ""),
|
|
"pub_date": normalize_datetime(content.get("pubDate")),
|
|
"raw_symbols": content.get("relatedTickers") or [],
|
|
}
|
|
|
|
return {
|
|
"title": article.get("title", "No title"),
|
|
"summary": article.get("summary", ""),
|
|
"publisher": article.get("publisher", "Unknown"),
|
|
"link": article.get("link", ""),
|
|
"pub_date": normalize_datetime(article.get("providerPublishTime")),
|
|
"raw_symbols": article.get("relatedTickers") or [],
|
|
}
|
|
|
|
|
|
def normalize_yfinance_article(article: dict, *, fallback_symbol: str | None = None, country: str | None = None) -> NewsItem:
|
|
data = _extract_article_fields(article)
|
|
symbols = [str(symbol).upper() for symbol in data["raw_symbols"] if str(symbol).strip()]
|
|
if fallback_symbol and fallback_symbol.upper() not in symbols:
|
|
symbols.append(fallback_symbol.upper())
|
|
return NewsItem(
|
|
title=data["title"],
|
|
source=data["publisher"],
|
|
published_at=data["pub_date"],
|
|
language=None,
|
|
country=country,
|
|
symbols=symbols,
|
|
topic_tags=[],
|
|
sentiment=None,
|
|
relevance=None,
|
|
reliability=None,
|
|
url=data["link"],
|
|
summary=data["summary"],
|
|
raw_vendor="yfinance",
|
|
)
|
|
|
|
|
|
def _collect_ticker_news(
|
|
ticker: str,
|
|
start_dt: datetime,
|
|
) -> tuple[list[NewsItem], datetime | None, datetime | None]:
|
|
"""Fetch increasingly larger ticker feeds until the requested window is covered."""
|
|
collected: list[NewsItem] = []
|
|
oldest_pub_date = None
|
|
newest_pub_date = None
|
|
|
|
for count in _TICKER_NEWS_FETCH_COUNTS:
|
|
news = yf_retry(lambda batch_size=count: yf.Ticker(ticker).get_news(count=batch_size))
|
|
if not news:
|
|
continue
|
|
|
|
batch = dedupe_news_items(
|
|
[normalize_yfinance_article(article, fallback_symbol=ticker) for article in news]
|
|
)
|
|
|
|
for item in batch:
|
|
collected.append(item)
|
|
pub_date = item.published_at
|
|
if pub_date:
|
|
if newest_pub_date is None or pub_date > newest_pub_date:
|
|
newest_pub_date = pub_date
|
|
if oldest_pub_date is None or pub_date < oldest_pub_date:
|
|
oldest_pub_date = pub_date
|
|
|
|
if oldest_pub_date and oldest_pub_date.replace(tzinfo=None) <= start_dt:
|
|
break
|
|
if len(news) < count:
|
|
break
|
|
|
|
collected = dedupe_news_items(collected)
|
|
collected.sort(
|
|
key=lambda article: article.published_at.timestamp() if article.published_at else float("-inf"),
|
|
reverse=True,
|
|
)
|
|
return collected, oldest_pub_date, newest_pub_date
|
|
|
|
|
|
def _format_coverage_note(oldest_pub_date: datetime | None, newest_pub_date: datetime | None) -> str:
|
|
if oldest_pub_date and newest_pub_date:
|
|
return (
|
|
"; the current yfinance ticker feed only covered "
|
|
f"{oldest_pub_date.strftime('%Y-%m-%d')} to {newest_pub_date.strftime('%Y-%m-%d')} at query time"
|
|
)
|
|
if oldest_pub_date:
|
|
return f"; the current yfinance ticker feed only reached back to {oldest_pub_date.strftime('%Y-%m-%d')}"
|
|
if newest_pub_date:
|
|
return f"; the current yfinance ticker feed only returned articles up to {newest_pub_date.strftime('%Y-%m-%d')}"
|
|
return ""
|
|
|
|
|
|
def fetch_company_news_yfinance(
|
|
ticker: str,
|
|
start_date: str,
|
|
end_date: str,
|
|
) -> tuple[list[NewsItem], datetime | None, datetime | None]:
|
|
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
|
|
end_dt = datetime.strptime(end_date, "%Y-%m-%d") + relativedelta(days=1)
|
|
articles, oldest_pub_date, newest_pub_date = _collect_ticker_news(ticker, start_dt)
|
|
filtered = filter_news_items_by_date(articles, start_date=start_dt, end_date=end_dt)
|
|
return filtered[:_MAX_FILTERED_TICKER_ARTICLES], oldest_pub_date, newest_pub_date
|
|
|
|
|
|
def get_company_news_yfinance(
|
|
ticker: str,
|
|
start_date: str,
|
|
end_date: str,
|
|
) -> str:
|
|
try:
|
|
filtered, oldest_pub_date, newest_pub_date = fetch_company_news_yfinance(ticker, start_date, end_date)
|
|
if not filtered:
|
|
coverage_note = _format_coverage_note(oldest_pub_date, newest_pub_date)
|
|
return f"No news found for {ticker} between {start_date} and {end_date}{coverage_note}"
|
|
return format_news_items_report(
|
|
f"{ticker} Company News, from {start_date} to {end_date}",
|
|
filtered,
|
|
max_items=_MAX_FILTERED_TICKER_ARTICLES,
|
|
)
|
|
except Exception as exc:
|
|
return f"Error fetching news for {ticker}: {exc}"
|
|
|
|
|
|
def _get_query_preset(region: str | None) -> list[str]:
|
|
if not region:
|
|
return _GLOBAL_QUERY_PRESETS["GLOBAL"]
|
|
return _GLOBAL_QUERY_PRESETS.get(region.upper(), _GLOBAL_QUERY_PRESETS["GLOBAL"])
|
|
|
|
|
|
def fetch_macro_news_yfinance(
|
|
curr_date: str,
|
|
look_back_days: int = 7,
|
|
limit: int = 10,
|
|
region: str | None = None,
|
|
language: str | None = None,
|
|
) -> list[NewsItem]:
|
|
curr_dt = datetime.strptime(curr_date, "%Y-%m-%d")
|
|
start_dt = curr_dt - relativedelta(days=look_back_days)
|
|
country = (region or "GLOBAL").upper()
|
|
|
|
all_news: list[NewsItem] = []
|
|
for query in _get_query_preset(region):
|
|
search = yf_retry(
|
|
lambda q=query: yf.Search(
|
|
query=q if not language else f"{q} {language}",
|
|
news_count=limit,
|
|
enable_fuzzy_query=True,
|
|
)
|
|
)
|
|
search_news = getattr(search, "news", None) or []
|
|
batch = [normalize_yfinance_article(article, country=country) for article in search_news]
|
|
all_news.extend(batch)
|
|
if len(all_news) >= limit * len(_get_query_preset(region)):
|
|
break
|
|
|
|
filtered = []
|
|
for item in dedupe_news_items(all_news):
|
|
if item.published_at:
|
|
published = item.published_at.replace(tzinfo=None)
|
|
if published < start_dt or published > curr_dt + relativedelta(days=1):
|
|
continue
|
|
filtered.append(item)
|
|
|
|
filtered.sort(
|
|
key=lambda article: article.published_at.timestamp() if article.published_at else float("-inf"),
|
|
reverse=True,
|
|
)
|
|
return filtered[:limit]
|
|
|
|
|
|
def get_macro_news_yfinance(
|
|
curr_date: str,
|
|
look_back_days: int = 7,
|
|
limit: int = 10,
|
|
region: str | None = None,
|
|
language: str | None = None,
|
|
) -> str:
|
|
try:
|
|
items = fetch_macro_news_yfinance(
|
|
curr_date,
|
|
look_back_days=look_back_days,
|
|
limit=limit,
|
|
region=region,
|
|
language=language,
|
|
)
|
|
if not items:
|
|
return f"No global news found for {curr_date}"
|
|
start_date = (datetime.strptime(curr_date, "%Y-%m-%d") - relativedelta(days=look_back_days)).strftime("%Y-%m-%d")
|
|
region_label = (region or "GLOBAL").upper()
|
|
return format_news_items_report(
|
|
f"{region_label} Macro News, from {start_date} to {curr_date}",
|
|
items,
|
|
max_items=limit,
|
|
)
|
|
except Exception as exc:
|
|
return f"Error fetching global news: {exc}"
|
|
|
|
|
|
def get_social_sentiment_yfinance(
|
|
symbol: str,
|
|
start_date: str,
|
|
end_date: str,
|
|
) -> str:
|
|
articles, _, _ = fetch_company_news_yfinance(symbol, start_date, end_date)
|
|
if not articles:
|
|
return (
|
|
f"Dedicated social provider unavailable; no news-derived sentiment was found for {symbol} "
|
|
f"between {start_date} and {end_date}."
|
|
)
|
|
|
|
report_lines = [
|
|
f"Dedicated social provider unavailable; using news-derived sentiment for {symbol} from {start_date} to {end_date}.",
|
|
"Use this as public-narrative context rather than a literal social-media feed.",
|
|
"",
|
|
]
|
|
for item in articles[:10]:
|
|
date_prefix = item.published_at.strftime("%Y-%m-%d") if item.published_at else "undated"
|
|
summary = item.summary or "No summary available."
|
|
report_lines.append(f"- {date_prefix}: {item.title} ({item.source})")
|
|
report_lines.append(f" Narrative: {summary}")
|
|
return "\n".join(report_lines)
|
|
|
|
|
|
# Backward-compatible aliases
|
|
get_news_yfinance = get_company_news_yfinance
|
|
get_global_news_yfinance = get_macro_news_yfinance
|