import re from typing import Annotated, List, Dict, Any from datetime import datetime, timedelta from dateutil.relativedelta import relativedelta from dateutil import parser as dateutil_parser from .googlenews_utils import getNewsData def _parse_google_news_date(date_str: str) -> datetime: if not date_str: return datetime.now() date_str = date_str.strip().lower() relative_patterns = [ (r"(\d+)\s*(?:hour|hr)s?\s*ago", "hours"), (r"(\d+)\s*(?:minute|min)s?\s*ago", "minutes"), (r"(\d+)\s*(?:day)s?\s*ago", "days"), (r"(\d+)\s*(?:week)s?\s*ago", "weeks"), (r"(\d+)\s*(?:month)s?\s*ago", "months"), ] for pattern, unit in relative_patterns: match = re.search(pattern, date_str) if match: value = int(match.group(1)) now = datetime.now() if unit == "hours": return now - timedelta(hours=value) elif unit == "minutes": return now - timedelta(minutes=value) elif unit == "days": return now - timedelta(days=value) elif unit == "weeks": return now - timedelta(weeks=value) elif unit == "months": return now - relativedelta(months=value) if "yesterday" in date_str: return datetime.now() - timedelta(days=1) try: return dateutil_parser.parse(date_str, fuzzy=True) except (ValueError, TypeError): return datetime.now() def get_google_news( query: Annotated[str, "Query to search with"], curr_date: Annotated[str, "Curr date in yyyy-mm-dd format"], look_back_days: Annotated[int, "how many days to look back"], ) -> str: query = query.replace(" ", "+") start_date = datetime.strptime(curr_date, "%Y-%m-%d") before = start_date - relativedelta(days=look_back_days) before = before.strftime("%Y-%m-%d") news_results = getNewsData(query, before, curr_date) news_str = "" for news in news_results: news_str += ( f"### {news['title']} (source: {news['source']}) \n\n{news['snippet']}\n\n" ) if len(news_results) == 0: return "" return f"## {query} Google News, from {before} to {curr_date}:\n\n{news_str}" def get_bulk_news_google(lookback_hours: int) -> List[Dict[str, Any]]: end_date = datetime.now() start_date = end_date - timedelta(hours=lookback_hours) start_str = start_date.strftime("%Y-%m-%d") end_str = end_date.strftime("%Y-%m-%d") queries = [ "stock market", "trading news", "earnings report", ] all_articles = [] seen_titles = set() for query in queries: try: news_results = getNewsData(query.replace(" ", "+"), start_str, end_str) for news in news_results: title = news.get("title", "") if title and title not in seen_titles: seen_titles.add(title) date_str = news.get("date", "") published_at = _parse_google_news_date(date_str) article = { "title": title, "source": news.get("source", "Google News"), "url": news.get("link", ""), "published_at": published_at.isoformat(), "content_snippet": news.get("snippet", "")[:500], } all_articles.append(article) except Exception: continue return all_articles