115 lines
4.2 KiB
Python
115 lines
4.2 KiB
Python
from __future__ import annotations
|
|
|
|
import html
|
|
import re
|
|
from datetime import datetime, timedelta
|
|
from email.utils import parsedate_to_datetime
|
|
|
|
import requests
|
|
|
|
from tradingagents.agents.utils.instrument_resolver import resolve_instrument
|
|
|
|
from .api_keys import get_api_key
|
|
from .config import get_config
|
|
from .news_models import NewsItem, dedupe_news_items, filter_news_items_by_date, format_news_items_report
|
|
from .vendor_exceptions import VendorConfigurationError, VendorMalformedResponseError, VendorTransientError
|
|
|
|
|
|
_NAVER_NEWS_ENDPOINT = "https://openapi.naver.com/v1/search/news.json"
|
|
|
|
|
|
def _strip_html(text: str) -> str:
|
|
return re.sub(r"<[^>]+>", "", html.unescape(text or "")).strip()
|
|
|
|
|
|
def _get_headers() -> dict[str, str]:
|
|
client_id = get_api_key("NAVER_CLIENT_ID")
|
|
client_secret = get_api_key("NAVER_CLIENT_SECRET")
|
|
if not client_id or not client_secret:
|
|
raise VendorConfigurationError("Naver News credentials are not configured.")
|
|
return {
|
|
"X-Naver-Client-Id": client_id,
|
|
"X-Naver-Client-Secret": client_secret,
|
|
}
|
|
|
|
|
|
def normalize_naver_article(article: dict, *, fallback_symbol: str) -> NewsItem:
|
|
published_at = None
|
|
if article.get("pubDate"):
|
|
try:
|
|
published_at = parsedate_to_datetime(article["pubDate"])
|
|
except (TypeError, ValueError, IndexError):
|
|
published_at = None
|
|
return NewsItem(
|
|
title=_strip_html(article.get("title", "No title")),
|
|
source="Naver News",
|
|
published_at=published_at,
|
|
language="ko",
|
|
country="KR",
|
|
symbols=[fallback_symbol.upper()],
|
|
topic_tags=[],
|
|
sentiment=None,
|
|
relevance=None,
|
|
reliability=None,
|
|
url=article.get("originallink") or article.get("link") or "",
|
|
summary=_strip_html(article.get("description", "")),
|
|
raw_vendor="naver",
|
|
)
|
|
|
|
|
|
def fetch_company_news_naver(symbol: str, start_date: str, end_date: str, display: int = 20) -> list[NewsItem]:
|
|
profile = resolve_instrument(symbol)
|
|
search_query = profile.display_name if profile.country == "KR" else symbol
|
|
try:
|
|
response = requests.get(
|
|
_NAVER_NEWS_ENDPOINT,
|
|
headers=_get_headers(),
|
|
params={"query": search_query, "display": display, "sort": "date"},
|
|
timeout=float(get_config().get("vendor_timeout", 15)),
|
|
)
|
|
response.raise_for_status()
|
|
except requests.RequestException as exc:
|
|
raise VendorTransientError(f"Naver News request failed: {exc}") from exc
|
|
|
|
payload = response.json()
|
|
items = payload.get("items")
|
|
if not isinstance(items, list):
|
|
raise VendorMalformedResponseError("Naver News payload did not include an items list.")
|
|
|
|
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
|
|
end_dt = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)
|
|
normalized = dedupe_news_items(
|
|
[normalize_naver_article(article, fallback_symbol=profile.primary_symbol) for article in items]
|
|
)
|
|
return filter_news_items_by_date(normalized, start_date=start_dt, end_date=end_dt)
|
|
|
|
|
|
def get_company_news_naver(symbol: str, start_date: str, end_date: str) -> str:
|
|
items = fetch_company_news_naver(symbol, start_date, end_date)
|
|
if not items:
|
|
return f"No news found for {symbol} between {start_date} and {end_date}"
|
|
return format_news_items_report(
|
|
f"{symbol} Company News, from {start_date} to {end_date}",
|
|
items,
|
|
max_items=15,
|
|
)
|
|
|
|
|
|
def get_social_sentiment_naver(symbol: str, start_date: str, end_date: str) -> str:
|
|
items = fetch_company_news_naver(symbol, start_date, end_date, display=10)
|
|
if not items:
|
|
return (
|
|
f"Dedicated social provider unavailable; Naver company-news sentiment was unavailable for {symbol} "
|
|
f"between {start_date} and {end_date}."
|
|
)
|
|
lines = [
|
|
f"Dedicated social provider unavailable; using Korean news-derived public narrative for {symbol} from {start_date} to {end_date}.",
|
|
"",
|
|
]
|
|
for item in items[:10]:
|
|
stamp = item.published_at.strftime("%Y-%m-%d") if item.published_at else "undated"
|
|
lines.append(f"- {stamp}: {item.title}")
|
|
if item.summary:
|
|
lines.append(f" Narrative: {item.summary}")
|
|
return "\n".join(lines)
|