TradingAgents/tradingagents/dataflows/local_news.py

from datetime import date, timedelta, datetime
import json
import logging
import os

from .config import DATA_DIR

log = logging.getLogger(__name__)

def get_local_news(ticker, start_date: str, end_date: str) -> dict[str, str] | str:
    """Returns live and historical market news & sentiment data from premier news outlets worldwide.

    Covers stocks, cryptocurrencies, forex, and topics like fiscal policy, mergers & acquisitions, IPOs.

    Args:
        ticker: Stock symbol for news articles.
        start_date: Start date for news search.
        end_date: End date for news search.

    Returns:
        Dictionary containing news sentiment data or JSON string.
    """

    template = lambda feed: f"""{{
        "items": {len(feed)},
        "sentiment_score_definition": "x <= -0.65: Bearish; -0.65 < x <= -0.25: Somewhat-Bearish; -0.25 < x < 0.25: Neutral; 0.25 <= x < 0.65: Somewhat_Bullish; x >= 0.65: Bullish",
        "relevance_score_definition": "0 < x <= 1, with a higher score indicating higher relevance.",
        "feed": {feed}
    }}"""

    start_date_date = date.fromisoformat(start_date)
    end_date_date = date.fromisoformat(end_date)

    total_days = (end_date_date - start_date_date).days
    dates_to_fetch = [start_date_date + timedelta(days=i) for i in range(total_days)]

    feed = {}
    for date_ in dates_to_fetch:
        feed[str(date_)] = filter_irrelevant_news(load_news(ticker, date_))
    return template(feed)

def load_news(ticker: str, date: date, save_dir:str = 'news/daily_news_processed') -> list:
    """
    Load news articles from a JSON file.`

    Args:
        ticker (str): The stock ticker symbol.
        date (date_cls): The date for which to load news articles.
    Returns:
        list: A list of news articles loaded from the file.
    """
    save_dir = os.path.join(DATA_DIR, save_dir)
    filename = f"{save_dir}/{ticker}/{date}.json"
    try:
        with open(filename, 'r') as f:
            news = json.load(f)
        return news
    except Exception as e:
        print(f"Error loading news from {filename}: {e}")
        return []

def filter_irrelevant_news(news_list: list, threshold: float = 0.6) -> list:
    """
    Filter news articles based on their relevancy score.

    Args:
        news_list (list): List of news articles with relevancy scores.
        threshold (float): Minimum relevancy score to include a news article (default: 0.5).

    Returns:
        list: Filtered list of news articles with relevancy scores >= threshold.
    """
    try:
        if news_list is None or len(news_list) == 0:
            log.info("No news articles provided for filtering.")
            return []
        filtered_news = []
        for news in news_list:
            if 'relevancy_score' in news:
                if isinstance(news['relevancy_score'], (float, int)) and news['relevancy_score'] >= threshold:
                    filtered_news.append(
                        {
                            "summary": news.get("summary", ""),
                            "relevancy_score": news.get("relevancy_score", 0),
                            "sentiment_score": news.get("sentiment_score", 0)
                        }
                    )
            else:
                log.warning(f"News item missing valid 'relevancy_score': {news}")
        log.info(f"Filtered {len(filtered_news)} out of {len(news_list)} news articles with relevancy_score >= {threshold}.")
    except Exception as e:
        log.error(f"Error filtering news: {e}")
        filtered_news = news_list
    return filtered_news