TradingAgents/tradingagents/dataflows/local_news.py

94 lines
3.5 KiB
Python

from datetime import date, timedelta, datetime
import json
import logging
import os
from .config import DATA_DIR
log = logging.getLogger(__name__)
def get_local_news(ticker, start_date: str, end_date: str) -> dict[str, str] | str:
"""Returns live and historical market news & sentiment data from premier news outlets worldwide.
Covers stocks, cryptocurrencies, forex, and topics like fiscal policy, mergers & acquisitions, IPOs.
Args:
ticker: Stock symbol for news articles.
start_date: Start date for news search.
end_date: End date for news search.
Returns:
Dictionary containing news sentiment data or JSON string.
"""
template = lambda feed: f"""{{
"items": {len(feed)},
"sentiment_score_definition": "x <= -0.65: Bearish; -0.65 < x <= -0.25: Somewhat-Bearish; -0.25 < x < 0.25: Neutral; 0.25 <= x < 0.65: Somewhat_Bullish; x >= 0.65: Bullish",
"relevance_score_definition": "0 < x <= 1, with a higher score indicating higher relevance.",
"feed": {feed}
}}"""
start_date_date = date.fromisoformat(start_date)
end_date_date = date.fromisoformat(end_date)
total_days = (end_date_date - start_date_date).days
dates_to_fetch = [start_date_date + timedelta(days=i) for i in range(total_days)]
feed = {}
for date_ in dates_to_fetch:
feed[str(date_)] = filter_irrelevant_news(load_news(ticker, date_))
return template(feed)
def load_news(ticker: str, date: date, save_dir:str = 'news/daily_news_processed') -> list:
"""
Load news articles from a JSON file.`
Args:
ticker (str): The stock ticker symbol.
date (date_cls): The date for which to load news articles.
Returns:
list: A list of news articles loaded from the file.
"""
save_dir = os.path.join(DATA_DIR, save_dir)
filename = f"{save_dir}/{ticker}/{date}.json"
try:
with open(filename, 'r') as f:
news = json.load(f)
return news
except Exception as e:
print(f"Error loading news from {filename}: {e}")
return []
def filter_irrelevant_news(news_list: list, threshold: float = 0.6) -> list:
"""
Filter news articles based on their relevancy score.
Args:
news_list (list): List of news articles with relevancy scores.
threshold (float): Minimum relevancy score to include a news article (default: 0.5).
Returns:
list: Filtered list of news articles with relevancy scores >= threshold.
"""
try:
if news_list is None or len(news_list) == 0:
log.info("No news articles provided for filtering.")
return []
filtered_news = []
for news in news_list:
if 'relevancy_score' in news:
if isinstance(news['relevancy_score'], (float, int)) and news['relevancy_score'] >= threshold:
filtered_news.append(
{
"summary": news.get("summary", ""),
"relevancy_score": news.get("relevancy_score", 0),
"sentiment_score": news.get("sentiment_score", 0)
}
)
else:
log.warning(f"News item missing valid 'relevancy_score': {news}")
log.info(f"Filtered {len(filtered_news)} out of {len(news_list)} news articles with relevancy_score >= {threshold}.")
except Exception as e:
log.error(f"Error filtering news: {e}")
filtered_news = news_list
return filtered_news