Update google.py
This commit is contained in:
parent
7c4e8e1322
commit
bd4bb009a9
|
|
@ -1,9 +1,50 @@
|
|||
import re
|
||||
from typing import Annotated, List, Dict, Any
|
||||
from datetime import datetime, timedelta
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from dateutil import parser as dateutil_parser
|
||||
from .googlenews_utils import getNewsData
|
||||
|
||||
|
||||
def _parse_google_news_date(date_str: str) -> datetime:
|
||||
if not date_str:
|
||||
return datetime.now()
|
||||
|
||||
date_str = date_str.strip().lower()
|
||||
|
||||
relative_patterns = [
|
||||
(r"(\d+)\s*(?:hour|hr)s?\s*ago", "hours"),
|
||||
(r"(\d+)\s*(?:minute|min)s?\s*ago", "minutes"),
|
||||
(r"(\d+)\s*(?:day)s?\s*ago", "days"),
|
||||
(r"(\d+)\s*(?:week)s?\s*ago", "weeks"),
|
||||
(r"(\d+)\s*(?:month)s?\s*ago", "months"),
|
||||
]
|
||||
|
||||
for pattern, unit in relative_patterns:
|
||||
match = re.search(pattern, date_str)
|
||||
if match:
|
||||
value = int(match.group(1))
|
||||
now = datetime.now()
|
||||
if unit == "hours":
|
||||
return now - timedelta(hours=value)
|
||||
elif unit == "minutes":
|
||||
return now - timedelta(minutes=value)
|
||||
elif unit == "days":
|
||||
return now - timedelta(days=value)
|
||||
elif unit == "weeks":
|
||||
return now - timedelta(weeks=value)
|
||||
elif unit == "months":
|
||||
return now - relativedelta(months=value)
|
||||
|
||||
if "yesterday" in date_str:
|
||||
return datetime.now() - timedelta(days=1)
|
||||
|
||||
try:
|
||||
return dateutil_parser.parse(date_str, fuzzy=True)
|
||||
except (ValueError, TypeError):
|
||||
return datetime.now()
|
||||
|
||||
|
||||
def get_google_news(
|
||||
query: Annotated[str, "Query to search with"],
|
||||
curr_date: Annotated[str, "Curr date in yyyy-mm-dd format"],
|
||||
|
|
@ -56,13 +97,7 @@ def get_bulk_news_google(lookback_hours: int) -> List[Dict[str, Any]]:
|
|||
seen_titles.add(title)
|
||||
|
||||
date_str = news.get("date", "")
|
||||
try:
|
||||
if date_str:
|
||||
published_at = datetime.now()
|
||||
else:
|
||||
published_at = datetime.now()
|
||||
except ValueError:
|
||||
published_at = datetime.now()
|
||||
published_at = _parse_google_news_date(date_str)
|
||||
|
||||
article = {
|
||||
"title": title,
|
||||
|
|
|
|||
Loading…
Reference in New Issue