diff --git a/tradingagents/dataflows/google.py b/tradingagents/dataflows/google.py index 80037ed4..90cc14aa 100644 --- a/tradingagents/dataflows/google.py +++ b/tradingagents/dataflows/google.py @@ -1,9 +1,50 @@ +import re from typing import Annotated, List, Dict, Any from datetime import datetime, timedelta from dateutil.relativedelta import relativedelta +from dateutil import parser as dateutil_parser from .googlenews_utils import getNewsData +def _parse_google_news_date(date_str: str) -> datetime: + if not date_str: + return datetime.now() + + date_str = date_str.strip().lower() + + relative_patterns = [ + (r"(\d+)\s*(?:hour|hr)s?\s*ago", "hours"), + (r"(\d+)\s*(?:minute|min)s?\s*ago", "minutes"), + (r"(\d+)\s*(?:day)s?\s*ago", "days"), + (r"(\d+)\s*(?:week)s?\s*ago", "weeks"), + (r"(\d+)\s*(?:month)s?\s*ago", "months"), + ] + + for pattern, unit in relative_patterns: + match = re.search(pattern, date_str) + if match: + value = int(match.group(1)) + now = datetime.now() + if unit == "hours": + return now - timedelta(hours=value) + elif unit == "minutes": + return now - timedelta(minutes=value) + elif unit == "days": + return now - timedelta(days=value) + elif unit == "weeks": + return now - timedelta(weeks=value) + elif unit == "months": + return now - relativedelta(months=value) + + if "yesterday" in date_str: + return datetime.now() - timedelta(days=1) + + try: + return dateutil_parser.parse(date_str, fuzzy=True) + except (ValueError, TypeError): + return datetime.now() + + def get_google_news( query: Annotated[str, "Query to search with"], curr_date: Annotated[str, "Curr date in yyyy-mm-dd format"], @@ -56,13 +97,7 @@ def get_bulk_news_google(lookback_hours: int) -> List[Dict[str, Any]]: seen_titles.add(title) date_str = news.get("date", "") - try: - if date_str: - published_at = datetime.now() - else: - published_at = datetime.now() - except ValueError: - published_at = datetime.now() + published_at = _parse_google_news_date(date_str) article = { "title": title,