TradingAgents/tradingagents/dataflows/naver_news.py

115 lines
4.2 KiB
Python

from __future__ import annotations
import html
import re
from datetime import datetime, timedelta
from email.utils import parsedate_to_datetime
import requests
from tradingagents.agents.utils.instrument_resolver import resolve_instrument
from .api_keys import get_api_key
from .config import get_config
from .news_models import NewsItem, dedupe_news_items, filter_news_items_by_date, format_news_items_report
from .vendor_exceptions import VendorConfigurationError, VendorMalformedResponseError, VendorTransientError
_NAVER_NEWS_ENDPOINT = "https://openapi.naver.com/v1/search/news.json"
def _strip_html(text: str) -> str:
return re.sub(r"<[^>]+>", "", html.unescape(text or "")).strip()
def _get_headers() -> dict[str, str]:
client_id = get_api_key("NAVER_CLIENT_ID")
client_secret = get_api_key("NAVER_CLIENT_SECRET")
if not client_id or not client_secret:
raise VendorConfigurationError("Naver News credentials are not configured.")
return {
"X-Naver-Client-Id": client_id,
"X-Naver-Client-Secret": client_secret,
}
def normalize_naver_article(article: dict, *, fallback_symbol: str) -> NewsItem:
published_at = None
if article.get("pubDate"):
try:
published_at = parsedate_to_datetime(article["pubDate"])
except (TypeError, ValueError, IndexError):
published_at = None
return NewsItem(
title=_strip_html(article.get("title", "No title")),
source="Naver News",
published_at=published_at,
language="ko",
country="KR",
symbols=[fallback_symbol.upper()],
topic_tags=[],
sentiment=None,
relevance=None,
reliability=None,
url=article.get("originallink") or article.get("link") or "",
summary=_strip_html(article.get("description", "")),
raw_vendor="naver",
)
def fetch_company_news_naver(symbol: str, start_date: str, end_date: str, display: int = 20) -> list[NewsItem]:
profile = resolve_instrument(symbol)
search_query = profile.display_name if profile.country == "KR" else symbol
try:
response = requests.get(
_NAVER_NEWS_ENDPOINT,
headers=_get_headers(),
params={"query": search_query, "display": display, "sort": "date"},
timeout=float(get_config().get("vendor_timeout", 15)),
)
response.raise_for_status()
except requests.RequestException as exc:
raise VendorTransientError(f"Naver News request failed: {exc}") from exc
payload = response.json()
items = payload.get("items")
if not isinstance(items, list):
raise VendorMalformedResponseError("Naver News payload did not include an items list.")
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
end_dt = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)
normalized = dedupe_news_items(
[normalize_naver_article(article, fallback_symbol=profile.primary_symbol) for article in items]
)
return filter_news_items_by_date(normalized, start_date=start_dt, end_date=end_dt)
def get_company_news_naver(symbol: str, start_date: str, end_date: str) -> str:
items = fetch_company_news_naver(symbol, start_date, end_date)
if not items:
return f"No news found for {symbol} between {start_date} and {end_date}"
return format_news_items_report(
f"{symbol} Company News, from {start_date} to {end_date}",
items,
max_items=15,
)
def get_social_sentiment_naver(symbol: str, start_date: str, end_date: str) -> str:
items = fetch_company_news_naver(symbol, start_date, end_date, display=10)
if not items:
return (
f"Dedicated social provider unavailable; Naver company-news sentiment was unavailable for {symbol} "
f"between {start_date} and {end_date}."
)
lines = [
f"Dedicated social provider unavailable; using Korean news-derived public narrative for {symbol} from {start_date} to {end_date}.",
"",
]
for item in items[:10]:
stamp = item.published_at.strftime("%Y-%m-%d") if item.published_at else "undated"
lines.append(f"- {stamp}: {item.title}")
if item.summary:
lines.append(f" Narrative: {item.summary}")
return "\n".join(lines)