From e7584b8d838905ef9b1935e53fb113680e3ea2cf Mon Sep 17 00:00:00 2001 From: nornen0202 <75664002+nornen0202@users.noreply.github.com> Date: Tue, 7 Apr 2026 02:13:38 +0900 Subject: [PATCH] Fix scheduled news coverage and run date reporting (#10) --- tests/test_report_localization.py | 4 + tests/test_scheduled_analysis.py | 6 +- tests/test_yfinance_news.py | 68 +++++++++ tradingagents/agents/analysts/news_analyst.py | 2 +- .../agents/analysts/social_media_analyst.py | 2 +- tradingagents/agents/utils/agent_states.py | 1 + tradingagents/dataflows/yfinance_news.py | 143 +++++++++++++++--- tradingagents/graph/propagation.py | 6 +- tradingagents/graph/trading_graph.py | 5 +- tradingagents/reporting.py | 17 ++- tradingagents/scheduled/runner.py | 12 +- tradingagents/scheduled/site.py | 5 +- 12 files changed, 237 insertions(+), 34 deletions(-) create mode 100644 tests/test_yfinance_news.py diff --git a/tests/test_report_localization.py b/tests/test_report_localization.py index 6782e31b..7f0274e1 100644 --- a/tests/test_report_localization.py +++ b/tests/test_report_localization.py @@ -10,6 +10,8 @@ from tradingagents.reporting import save_report_bundle class ReportLocalizationTests(unittest.TestCase): def test_save_report_bundle_uses_korean_labels(self): final_state = { + "analysis_date": "2026-04-06", + "trade_date": "2026-04-02", "market_report": "시장 보고서 본문", "sentiment_report": "소셜 보고서 본문", "news_report": "뉴스 보고서 본문", @@ -39,6 +41,8 @@ class ReportLocalizationTests(unittest.TestCase): self.assertIn("트레이딩 분석 리포트", report_text) self.assertIn("생성 시각", report_text) + self.assertIn("분석 기준일: 2026-04-06", report_text) + self.assertIn("시장 데이터 기준일: 2026-04-02", report_text) self.assertIn("애널리스트 팀 리포트", report_text) self.assertIn("포트폴리오 매니저 최종 판단", report_text) self.assertIn("시장 애널리스트", report_text) diff --git a/tests/test_scheduled_analysis.py b/tests/test_scheduled_analysis.py index 6adadcc5..ff854fca 100644 --- a/tests/test_scheduled_analysis.py +++ b/tests/test_scheduled_analysis.py @@ -24,13 +24,14 @@ class _FakeTradingAgentsGraph: self.config = config or {} self.callbacks = callbacks or [] - def propagate(self, ticker, trade_date): + def propagate(self, ticker, trade_date, analysis_date=None): if ticker == "FAIL": raise RuntimeError("synthetic failure") final_state = { "company_of_interest": ticker, "trade_date": trade_date, + "analysis_date": analysis_date or trade_date, "market_report": f"## Market\n{ticker} market analysis", "sentiment_report": f"## Sentiment\n{ticker} sentiment analysis", "news_report": f"## News\n{ticker} news analysis", @@ -104,6 +105,7 @@ subtitle = "Automated" self.assertEqual(manifest["settings"]["provider"], "codex") self.assertEqual(manifest["settings"]["deep_model"], "gpt-5.4") self.assertEqual(manifest["settings"]["quick_model"], "gpt-5.4") + self.assertEqual(manifest["tickers"][0]["analysis_date"], manifest["started_at"][:10]) run_dir = archive_dir / "runs" / manifest["started_at"][:4] / manifest["run_id"] self.assertTrue((run_dir / "run.json").exists()) @@ -118,6 +120,7 @@ subtitle = "Automated" self.assertIn("partial failure", index_html) self.assertIn("NVDA", run_html) self.assertIn("Rendered report", ticker_html) + self.assertIn("Analysis date", ticker_html) self.assertTrue((site_dir / "downloads" / manifest["run_id"] / "NVDA" / "complete_report.md").exists()) def test_main_site_only_rebuilds_from_existing_archive(self): @@ -162,6 +165,7 @@ subtitle = "Automated" { "ticker": "NVDA", "status": "success", + "analysis_date": "2026-04-05", "trade_date": "2026-04-04", "decision": "BUY", "started_at": "2026-04-05T09:13:00+09:00", diff --git a/tests/test_yfinance_news.py b/tests/test_yfinance_news.py new file mode 100644 index 00000000..9275b294 --- /dev/null +++ b/tests/test_yfinance_news.py @@ -0,0 +1,68 @@ +import unittest +from unittest.mock import patch + +from tradingagents.dataflows.yfinance_news import get_news_yfinance + + +def _article(date_value: str, title: str, link: str) -> dict: + return { + "content": { + "title": title, + "summary": f"Summary for {title}", + "provider": {"displayName": "Unit Test"}, + "canonicalUrl": {"url": link}, + "pubDate": f"{date_value}T12:00:00Z", + } + } + + +class _FakeTicker: + def __init__(self, full_news: list[dict]): + self.full_news = list(full_news) + + def get_news(self, count=20): + return self.full_news[:count] + + +class YFinanceNewsTests(unittest.TestCase): + def test_get_news_yfinance_expands_feed_depth_to_cover_requested_window(self): + recent_articles = [ + _article(f"2026-04-{day:02d}", f"Recent article {day}", f"https://example.com/recent-{day}") + for day in range(6, 2, -1) + for _ in range(15) + ] + older_articles = [ + _article("2026-04-02", "Alphabet April 2 article", "https://example.com/apr2"), + _article("2026-04-01", "Alphabet April 1 article", "https://example.com/apr1"), + ] + fake_ticker = _FakeTicker(recent_articles + older_articles) + + with ( + patch("tradingagents.dataflows.yfinance_news.yf.Ticker", return_value=fake_ticker), + patch("tradingagents.dataflows.yfinance_news.yf_retry", side_effect=lambda fn: fn()), + ): + result = get_news_yfinance("GOOGL", "2026-03-26", "2026-04-02") + + self.assertIn("Alphabet April 2 article", result) + self.assertIn("[2026-04-02]", result) + + def test_get_news_yfinance_reports_feed_coverage_when_window_is_unavailable(self): + fake_ticker = _FakeTicker( + [ + _article("2026-04-06", "Fresh article", "https://example.com/fresh"), + _article("2026-04-05", "Fresh article 2", "https://example.com/fresh-2"), + ] + ) + + with ( + patch("tradingagents.dataflows.yfinance_news.yf.Ticker", return_value=fake_ticker), + patch("tradingagents.dataflows.yfinance_news.yf_retry", side_effect=lambda fn: fn()), + ): + result = get_news_yfinance("GOOGL", "2026-03-26", "2026-04-02") + + self.assertIn("No news found for GOOGL between 2026-03-26 and 2026-04-02", result) + self.assertIn("2026-04-05 to 2026-04-06", result) + + +if __name__ == "__main__": + unittest.main() diff --git a/tradingagents/agents/analysts/news_analyst.py b/tradingagents/agents/analysts/news_analyst.py index e0fe93c5..275a498a 100644 --- a/tradingagents/agents/analysts/news_analyst.py +++ b/tradingagents/agents/analysts/news_analyst.py @@ -10,7 +10,7 @@ from tradingagents.dataflows.config import get_config def create_news_analyst(llm): def news_analyst_node(state): - current_date = state["trade_date"] + current_date = state.get("analysis_date") or state["trade_date"] instrument_context = build_instrument_context(state["company_of_interest"]) tools = [ diff --git a/tradingagents/agents/analysts/social_media_analyst.py b/tradingagents/agents/analysts/social_media_analyst.py index 34a53c46..2f97d174 100644 --- a/tradingagents/agents/analysts/social_media_analyst.py +++ b/tradingagents/agents/analysts/social_media_analyst.py @@ -5,7 +5,7 @@ from tradingagents.dataflows.config import get_config def create_social_media_analyst(llm): def social_media_analyst_node(state): - current_date = state["trade_date"] + current_date = state.get("analysis_date") or state["trade_date"] instrument_context = build_instrument_context(state["company_of_interest"]) tools = [ diff --git a/tradingagents/agents/utils/agent_states.py b/tradingagents/agents/utils/agent_states.py index 6423b936..41fcb037 100644 --- a/tradingagents/agents/utils/agent_states.py +++ b/tradingagents/agents/utils/agent_states.py @@ -46,6 +46,7 @@ class RiskDebateState(TypedDict): class AgentState(MessagesState): company_of_interest: Annotated[str, "Company that we are interested in trading"] trade_date: Annotated[str, "What date we are trading at"] + analysis_date: Annotated[str, "What date the full analysis is being generated on"] sender: Annotated[str, "Agent that sent this message"] diff --git a/tradingagents/dataflows/yfinance_news.py b/tradingagents/dataflows/yfinance_news.py index dd1046f5..a20c4034 100644 --- a/tradingagents/dataflows/yfinance_news.py +++ b/tradingagents/dataflows/yfinance_news.py @@ -1,12 +1,46 @@ """yfinance-based news data fetching functions.""" -import yfinance as yf -from datetime import datetime +from datetime import datetime, timezone + from dateutil.relativedelta import relativedelta +import yfinance as yf from .stockstats_utils import yf_retry +_TICKER_NEWS_FETCH_COUNTS = (20, 50, 100) +_MAX_FILTERED_TICKER_ARTICLES = 25 + + +def _parse_pub_date(raw_value) -> datetime | None: + """Normalize yfinance pub date values into a timezone-aware datetime.""" + if raw_value in (None, ""): + return None + + if isinstance(raw_value, datetime): + return raw_value + + if isinstance(raw_value, (int, float)): + try: + return datetime.fromtimestamp(raw_value, tz=timezone.utc) + except (OverflowError, OSError, ValueError): + return None + + if isinstance(raw_value, str): + normalized = raw_value.strip() + if not normalized: + return None + try: + return datetime.fromisoformat(normalized.replace("Z", "+00:00")) + except ValueError: + try: + return datetime.fromtimestamp(float(normalized), tz=timezone.utc) + except (OverflowError, OSError, ValueError): + return None + + return None + + def _extract_article_data(article: dict) -> dict: """Extract article data from yfinance news format (handles nested 'content' structure).""" # Handle nested content structure @@ -22,13 +56,7 @@ def _extract_article_data(article: dict) -> dict: link = url_obj.get("url", "") # Get publish date - pub_date_str = content.get("pubDate", "") - pub_date = None - if pub_date_str: - try: - pub_date = datetime.fromisoformat(pub_date_str.replace("Z", "+00:00")) - except (ValueError, AttributeError): - pass + pub_date = _parse_pub_date(content.get("pubDate", "")) return { "title": title, @@ -44,10 +72,79 @@ def _extract_article_data(article: dict) -> dict: "summary": article.get("summary", ""), "publisher": article.get("publisher", "Unknown"), "link": article.get("link", ""), - "pub_date": None, + "pub_date": _parse_pub_date(article.get("providerPublishTime")), } +def _article_identity(article: dict) -> str: + """Return a stable identity key for deduplicating news articles.""" + link = article.get("link", "").strip() + if link: + return link + + title = article.get("title", "").strip() + publisher = article.get("publisher", "").strip() + pub_date = article.get("pub_date") + stamp = pub_date.isoformat() if isinstance(pub_date, datetime) else "" + return f"{publisher}::{title}::{stamp}" + + +def _collect_ticker_news( + ticker: str, + start_dt: datetime, +) -> tuple[list[dict], datetime | None, datetime | None]: + """Fetch increasingly larger ticker feeds until the requested window is covered.""" + collected: list[dict] = [] + seen: set[str] = set() + oldest_pub_date = None + newest_pub_date = None + + for count in _TICKER_NEWS_FETCH_COUNTS: + news = yf_retry(lambda batch_size=count: yf.Ticker(ticker).get_news(count=batch_size)) + if not news: + continue + + for article in news: + data = _extract_article_data(article) + identity = _article_identity(data) + if identity in seen: + continue + seen.add(identity) + collected.append(data) + + pub_date = data.get("pub_date") + if pub_date: + if newest_pub_date is None or pub_date > newest_pub_date: + newest_pub_date = pub_date + if oldest_pub_date is None or pub_date < oldest_pub_date: + oldest_pub_date = pub_date + + if oldest_pub_date and oldest_pub_date.replace(tzinfo=None) <= start_dt: + break + if len(news) < count: + break + + collected.sort( + key=lambda article: article["pub_date"].timestamp() if article.get("pub_date") else float("-inf"), + reverse=True, + ) + return collected, oldest_pub_date, newest_pub_date + + +def _format_coverage_note(oldest_pub_date: datetime | None, newest_pub_date: datetime | None) -> str: + """Describe the yfinance coverage window when no article matches the requested range.""" + if oldest_pub_date and newest_pub_date: + return ( + "; the current yfinance ticker feed only covered " + f"{oldest_pub_date.strftime('%Y-%m-%d')} to {newest_pub_date.strftime('%Y-%m-%d')} at query time" + ) + if oldest_pub_date: + return f"; the current yfinance ticker feed only reached back to {oldest_pub_date.strftime('%Y-%m-%d')}" + if newest_pub_date: + return f"; the current yfinance ticker feed only returned articles up to {newest_pub_date.strftime('%Y-%m-%d')}" + return "" + + def get_news_yfinance( ticker: str, start_date: str, @@ -65,38 +162,40 @@ def get_news_yfinance( Formatted string containing news articles """ try: - stock = yf.Ticker(ticker) - news = yf_retry(lambda: stock.get_news(count=20)) - - if not news: - return f"No news found for {ticker}" - - # Parse date range for filtering start_dt = datetime.strptime(start_date, "%Y-%m-%d") end_dt = datetime.strptime(end_date, "%Y-%m-%d") + articles, oldest_pub_date, newest_pub_date = _collect_ticker_news(ticker, start_dt) + + if not articles: + return f"No news found for {ticker}" news_str = "" filtered_count = 0 - for article in news: - data = _extract_article_data(article) - + for data in articles: # Filter by date if publish time is available if data["pub_date"]: pub_date_naive = data["pub_date"].replace(tzinfo=None) if not (start_dt <= pub_date_naive <= end_dt + relativedelta(days=1)): continue - news_str += f"### {data['title']} (source: {data['publisher']})\n" + date_prefix = "" + if data["pub_date"]: + date_prefix = f"[{data['pub_date'].strftime('%Y-%m-%d')}] " + + news_str += f"### {date_prefix}{data['title']} (source: {data['publisher']})\n" if data["summary"]: news_str += f"{data['summary']}\n" if data["link"]: news_str += f"Link: {data['link']}\n" news_str += "\n" filtered_count += 1 + if filtered_count >= _MAX_FILTERED_TICKER_ARTICLES: + break if filtered_count == 0: - return f"No news found for {ticker} between {start_date} and {end_date}" + coverage_note = _format_coverage_note(oldest_pub_date, newest_pub_date) + return f"No news found for {ticker} between {start_date} and {end_date}{coverage_note}" return f"## {ticker} News, from {start_date} to {end_date}:\n\n{news_str}" diff --git a/tradingagents/graph/propagation.py b/tradingagents/graph/propagation.py index 0fd10c0c..a7281718 100644 --- a/tradingagents/graph/propagation.py +++ b/tradingagents/graph/propagation.py @@ -16,13 +16,17 @@ class Propagator: self.max_recur_limit = max_recur_limit def create_initial_state( - self, company_name: str, trade_date: str + self, + company_name: str, + trade_date: str, + analysis_date: str | None = None, ) -> Dict[str, Any]: """Create the initial state for the agent graph.""" return { "messages": [("human", company_name)], "company_of_interest": company_name, "trade_date": str(trade_date), + "analysis_date": str(analysis_date or trade_date), "investment_debate_state": InvestDebateState( { "bull_history": "", diff --git a/tradingagents/graph/trading_graph.py b/tradingagents/graph/trading_graph.py index 0723ebf5..0484616b 100644 --- a/tradingagents/graph/trading_graph.py +++ b/tradingagents/graph/trading_graph.py @@ -202,14 +202,14 @@ class TradingAgentsGraph: ), } - def propagate(self, company_name, trade_date): + def propagate(self, company_name, trade_date, analysis_date=None): """Run the trading agents graph for a company on a specific date.""" self.ticker = company_name # Initialize state init_agent_state = self.propagator.create_initial_state( - company_name, trade_date + company_name, trade_date, analysis_date=analysis_date ) args = self.propagator.get_graph_args() @@ -245,6 +245,7 @@ class TradingAgentsGraph: self.log_states_dict[str(trade_date)] = { "company_of_interest": final_state["company_of_interest"], "trade_date": final_state["trade_date"], + "analysis_date": final_state.get("analysis_date", final_state["trade_date"]), "market_report": final_state["market_report"], "sentiment_report": final_state["sentiment_report"], "news_report": final_state["news_report"], diff --git a/tradingagents/reporting.py b/tradingagents/reporting.py index f0b829e7..4537cc92 100644 --- a/tradingagents/reporting.py +++ b/tradingagents/reporting.py @@ -19,6 +19,8 @@ def save_report_bundle( save_path = Path(save_path) save_path.mkdir(parents=True, exist_ok=True) labels = _labels_for(language) + analysis_date = _coerce_text(final_state.get("analysis_date")) + trade_date = _coerce_text(final_state.get("trade_date")) sections: list[str] = [] @@ -104,10 +106,13 @@ def save_report_bundle( f"### {labels['portfolio_manager']}\n{portfolio_decision}" ) - header = ( - f"# {labels['report_title']}: {ticker}\n\n" - f"{labels['generated_at']}: {generated_at.strftime('%Y-%m-%d %H:%M:%S')}\n\n" - ) + metadata_lines = [f"{labels['generated_at']}: {generated_at.strftime('%Y-%m-%d %H:%M:%S')}"] + if analysis_date: + metadata_lines.append(f"{labels['analysis_date']}: {analysis_date}") + if trade_date: + metadata_lines.append(f"{labels['trade_date']}: {trade_date}") + + header = f"# {labels['report_title']}: {ticker}\n\n" + "\n".join(metadata_lines) + "\n\n" complete_report = save_path / "complete_report.md" _write_text(complete_report, header + "\n\n".join(sections)) return complete_report @@ -132,6 +137,8 @@ def _labels_for(language: str) -> dict[str, str]: return { "report_title": "트레이딩 분석 리포트", "generated_at": "생성 시각", + "analysis_date": "분석 기준일", + "trade_date": "시장 데이터 기준일", "section_analysts": "I. 애널리스트 팀 리포트", "section_research": "II. 리서치 팀 판단", "section_trading": "III. 트레이딩 팀 계획", @@ -154,6 +161,8 @@ def _labels_for(language: str) -> dict[str, str]: return { "report_title": "Trading Analysis Report", "generated_at": "Generated", + "analysis_date": "Analysis date", + "trade_date": "Market data date", "section_analysts": "I. Analyst Team Reports", "section_research": "II. Research Team Decision", "section_trading": "III. Trading Team Plan", diff --git a/tradingagents/scheduled/runner.py b/tradingagents/scheduled/runner.py index 7ccb3f5f..7ed27e65 100644 --- a/tradingagents/scheduled/runner.py +++ b/tradingagents/scheduled/runner.py @@ -159,6 +159,7 @@ def _run_single_ticker( ticker_started = datetime.now(ZoneInfo(config.run.timezone)) timer_start = perf_counter() + analysis_date = ticker_started.date().isoformat() try: trade_date = resolve_trade_date(ticker, config) @@ -169,7 +170,11 @@ def _run_single_ticker( config=_graph_config(config, engine_results_dir), callbacks=[stats_handler], ) - final_state, decision = graph.propagate(ticker, trade_date) + final_state, decision = graph.propagate( + ticker, + trade_date, + analysis_date=analysis_date, + ) report_dir = ticker_dir / "report" report_file = save_report_bundle( @@ -198,6 +203,7 @@ def _run_single_ticker( "ticker": ticker, "status": "success", "trade_date": trade_date, + "analysis_date": analysis_date, "decision": str(decision), "started_at": ticker_started.isoformat(), "finished_at": datetime.now(ZoneInfo(config.run.timezone)).isoformat(), @@ -216,6 +222,7 @@ def _run_single_ticker( "ticker": ticker, "status": "success", "trade_date": trade_date, + "analysis_date": analysis_date, "decision": str(decision), "started_at": ticker_started.isoformat(), "finished_at": analysis_payload["finished_at"], @@ -232,6 +239,7 @@ def _run_single_ticker( error_payload = { "ticker": ticker, "status": "failed", + "analysis_date": analysis_date, "error": str(exc), "traceback": traceback.format_exc(), "started_at": ticker_started.isoformat(), @@ -244,6 +252,7 @@ def _run_single_ticker( return { "ticker": ticker, "status": "failed", + "analysis_date": analysis_date, "trade_date": None, "decision": None, "error": str(exc), @@ -285,6 +294,7 @@ def _serialize_final_state(final_state: dict[str, Any]) -> dict[str, Any]: return { "company_of_interest": final_state.get("company_of_interest"), "trade_date": final_state.get("trade_date"), + "analysis_date": final_state.get("analysis_date"), "market_report": final_state.get("market_report"), "sentiment_report": final_state.get("sentiment_report"), "news_report": final_state.get("news_report"), diff --git a/tradingagents/scheduled/site.py b/tradingagents/scheduled/site.py index 48261c2d..ed17ab28 100644 --- a/tradingagents/scheduled/site.py +++ b/tradingagents/scheduled/site.py @@ -163,6 +163,7 @@ def _render_run_page(manifest: dict[str, Any], settings: SiteSettings) -> str: {_escape(ticker_summary['ticker'])} {_escape(ticker_summary['status'])} +
Analysis date{_escape(ticker_summary.get('analysis_date') or '-')}
Trade date{_escape(ticker_summary.get('trade_date') or '-')}
Duration{ticker_summary.get('duration_seconds', 0):.1f}s
Decision{_escape(ticker_summary.get('decision') or ticker_summary.get('error') or '-')}
@@ -239,10 +240,12 @@ def _render_ticker_page(Ticker report
{_escape(ticker_summary.get('trade_date') or '-')} / {_escape(ticker_summary['status'])}
+Analysis {_escape(ticker_summary.get('analysis_date') or '-')} / Market {_escape(ticker_summary.get('trade_date') or '-')} / {_escape(ticker_summary['status'])}
Analysis date{_escape(ticker_summary.get('analysis_date') or '-')}
+Trade date{_escape(ticker_summary.get('trade_date') or '-')}
Decision{_escape(ticker_summary.get('decision') or '-')}
Duration{ticker_summary.get('duration_seconds', 0):.1f}s
LLM calls{ticker_summary.get('metrics', {}).get('llm_calls', 0)}