Update google.py

This commit is contained in:
Joseph O'Brien 2025-12-02 20:40:44 -05:00
parent 7c4e8e1322
commit bd4bb009a9
1 changed files with 42 additions and 7 deletions

View File

@ -1,9 +1,50 @@
import re
from typing import Annotated, List, Dict, Any
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from dateutil import parser as dateutil_parser
from .googlenews_utils import getNewsData
def _parse_google_news_date(date_str: str) -> datetime:
if not date_str:
return datetime.now()
date_str = date_str.strip().lower()
relative_patterns = [
(r"(\d+)\s*(?:hour|hr)s?\s*ago", "hours"),
(r"(\d+)\s*(?:minute|min)s?\s*ago", "minutes"),
(r"(\d+)\s*(?:day)s?\s*ago", "days"),
(r"(\d+)\s*(?:week)s?\s*ago", "weeks"),
(r"(\d+)\s*(?:month)s?\s*ago", "months"),
]
for pattern, unit in relative_patterns:
match = re.search(pattern, date_str)
if match:
value = int(match.group(1))
now = datetime.now()
if unit == "hours":
return now - timedelta(hours=value)
elif unit == "minutes":
return now - timedelta(minutes=value)
elif unit == "days":
return now - timedelta(days=value)
elif unit == "weeks":
return now - timedelta(weeks=value)
elif unit == "months":
return now - relativedelta(months=value)
if "yesterday" in date_str:
return datetime.now() - timedelta(days=1)
try:
return dateutil_parser.parse(date_str, fuzzy=True)
except (ValueError, TypeError):
return datetime.now()
def get_google_news(
query: Annotated[str, "Query to search with"],
curr_date: Annotated[str, "Curr date in yyyy-mm-dd format"],
@ -56,13 +97,7 @@ def get_bulk_news_google(lookback_hours: int) -> List[Dict[str, Any]]:
seen_titles.add(title)
date_str = news.get("date", "")
try:
if date_str:
published_at = datetime.now()
else:
published_at = datetime.now()
except ValueError:
published_at = datetime.now()
published_at = _parse_google_news_date(date_str)
article = {
"title": title,