Update google.py
This commit is contained in:
parent
7c4e8e1322
commit
bd4bb009a9
|
|
@ -1,9 +1,50 @@
|
||||||
|
import re
|
||||||
from typing import Annotated, List, Dict, Any
|
from typing import Annotated, List, Dict, Any
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from dateutil.relativedelta import relativedelta
|
from dateutil.relativedelta import relativedelta
|
||||||
|
from dateutil import parser as dateutil_parser
|
||||||
from .googlenews_utils import getNewsData
|
from .googlenews_utils import getNewsData
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_google_news_date(date_str: str) -> datetime:
|
||||||
|
if not date_str:
|
||||||
|
return datetime.now()
|
||||||
|
|
||||||
|
date_str = date_str.strip().lower()
|
||||||
|
|
||||||
|
relative_patterns = [
|
||||||
|
(r"(\d+)\s*(?:hour|hr)s?\s*ago", "hours"),
|
||||||
|
(r"(\d+)\s*(?:minute|min)s?\s*ago", "minutes"),
|
||||||
|
(r"(\d+)\s*(?:day)s?\s*ago", "days"),
|
||||||
|
(r"(\d+)\s*(?:week)s?\s*ago", "weeks"),
|
||||||
|
(r"(\d+)\s*(?:month)s?\s*ago", "months"),
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern, unit in relative_patterns:
|
||||||
|
match = re.search(pattern, date_str)
|
||||||
|
if match:
|
||||||
|
value = int(match.group(1))
|
||||||
|
now = datetime.now()
|
||||||
|
if unit == "hours":
|
||||||
|
return now - timedelta(hours=value)
|
||||||
|
elif unit == "minutes":
|
||||||
|
return now - timedelta(minutes=value)
|
||||||
|
elif unit == "days":
|
||||||
|
return now - timedelta(days=value)
|
||||||
|
elif unit == "weeks":
|
||||||
|
return now - timedelta(weeks=value)
|
||||||
|
elif unit == "months":
|
||||||
|
return now - relativedelta(months=value)
|
||||||
|
|
||||||
|
if "yesterday" in date_str:
|
||||||
|
return datetime.now() - timedelta(days=1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
return dateutil_parser.parse(date_str, fuzzy=True)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return datetime.now()
|
||||||
|
|
||||||
|
|
||||||
def get_google_news(
|
def get_google_news(
|
||||||
query: Annotated[str, "Query to search with"],
|
query: Annotated[str, "Query to search with"],
|
||||||
curr_date: Annotated[str, "Curr date in yyyy-mm-dd format"],
|
curr_date: Annotated[str, "Curr date in yyyy-mm-dd format"],
|
||||||
|
|
@ -56,13 +97,7 @@ def get_bulk_news_google(lookback_hours: int) -> List[Dict[str, Any]]:
|
||||||
seen_titles.add(title)
|
seen_titles.add(title)
|
||||||
|
|
||||||
date_str = news.get("date", "")
|
date_str = news.get("date", "")
|
||||||
try:
|
published_at = _parse_google_news_date(date_str)
|
||||||
if date_str:
|
|
||||||
published_at = datetime.now()
|
|
||||||
else:
|
|
||||||
published_at = datetime.now()
|
|
||||||
except ValueError:
|
|
||||||
published_at = datetime.now()
|
|
||||||
|
|
||||||
article = {
|
article = {
|
||||||
"title": title,
|
"title": title,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue