Update google.py

This commit is contained in:
Joseph O'Brien 2025-12-02 20:40:44 -05:00
parent 7c4e8e1322
commit bd4bb009a9
1 changed files with 42 additions and 7 deletions

View File

@ -1,9 +1,50 @@
import re
from typing import Annotated, List, Dict, Any from typing import Annotated, List, Dict, Any
from datetime import datetime, timedelta from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta from dateutil.relativedelta import relativedelta
from dateutil import parser as dateutil_parser
from .googlenews_utils import getNewsData from .googlenews_utils import getNewsData
def _parse_google_news_date(date_str: str) -> datetime:
if not date_str:
return datetime.now()
date_str = date_str.strip().lower()
relative_patterns = [
(r"(\d+)\s*(?:hour|hr)s?\s*ago", "hours"),
(r"(\d+)\s*(?:minute|min)s?\s*ago", "minutes"),
(r"(\d+)\s*(?:day)s?\s*ago", "days"),
(r"(\d+)\s*(?:week)s?\s*ago", "weeks"),
(r"(\d+)\s*(?:month)s?\s*ago", "months"),
]
for pattern, unit in relative_patterns:
match = re.search(pattern, date_str)
if match:
value = int(match.group(1))
now = datetime.now()
if unit == "hours":
return now - timedelta(hours=value)
elif unit == "minutes":
return now - timedelta(minutes=value)
elif unit == "days":
return now - timedelta(days=value)
elif unit == "weeks":
return now - timedelta(weeks=value)
elif unit == "months":
return now - relativedelta(months=value)
if "yesterday" in date_str:
return datetime.now() - timedelta(days=1)
try:
return dateutil_parser.parse(date_str, fuzzy=True)
except (ValueError, TypeError):
return datetime.now()
def get_google_news( def get_google_news(
query: Annotated[str, "Query to search with"], query: Annotated[str, "Query to search with"],
curr_date: Annotated[str, "Curr date in yyyy-mm-dd format"], curr_date: Annotated[str, "Curr date in yyyy-mm-dd format"],
@ -56,13 +97,7 @@ def get_bulk_news_google(lookback_hours: int) -> List[Dict[str, Any]]:
seen_titles.add(title) seen_titles.add(title)
date_str = news.get("date", "") date_str = news.get("date", "")
try: published_at = _parse_google_news_date(date_str)
if date_str:
published_at = datetime.now()
else:
published_at = datetime.now()
except ValueError:
published_at = datetime.now()
article = { article = {
"title": title, "title": title,