154 lines
5.2 KiB
Python
154 lines
5.2 KiB
Python
import logging
|
|
import os
|
|
import time
|
|
import requests
|
|
from datetime import datetime, timedelta
|
|
from typing import List, Dict, Any
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/news/search"
|
|
DEFAULT_TIMEOUT = 30
|
|
MAX_RETRIES = 3
|
|
RETRY_BACKOFF = 1.0
|
|
|
|
|
|
def get_api_key() -> str:
|
|
api_key = os.getenv("BRAVE_API_KEY")
|
|
if not api_key:
|
|
raise ValueError("BRAVE_API_KEY environment variable is not set.")
|
|
return api_key
|
|
|
|
|
|
def _make_request_with_retry(url: str, headers: Dict, params: Dict, max_retries: int = MAX_RETRIES) -> requests.Response:
|
|
last_exception = None
|
|
for attempt in range(max_retries):
|
|
try:
|
|
response = requests.get(url, headers=headers, params=params, timeout=DEFAULT_TIMEOUT)
|
|
if response.status_code == 429:
|
|
retry_after = int(response.headers.get("Retry-After", RETRY_BACKOFF * (attempt + 1)))
|
|
logger.debug("Brave rate limited, waiting %ds before retry %d/%d", retry_after, attempt + 1, max_retries)
|
|
time.sleep(retry_after)
|
|
continue
|
|
response.raise_for_status()
|
|
return response
|
|
except requests.exceptions.Timeout as e:
|
|
last_exception = e
|
|
logger.debug("Brave request timeout, retry %d/%d", attempt + 1, max_retries)
|
|
time.sleep(RETRY_BACKOFF * (attempt + 1))
|
|
except requests.exceptions.ConnectionError as e:
|
|
last_exception = e
|
|
logger.debug("Brave connection error, retry %d/%d", attempt + 1, max_retries)
|
|
time.sleep(RETRY_BACKOFF * (attempt + 1))
|
|
except requests.exceptions.HTTPError as e:
|
|
if e.response is not None and e.response.status_code >= 500:
|
|
last_exception = e
|
|
logger.debug("Brave server error %d, retry %d/%d", e.response.status_code, attempt + 1, max_retries)
|
|
time.sleep(RETRY_BACKOFF * (attempt + 1))
|
|
else:
|
|
raise
|
|
raise last_exception if last_exception else requests.exceptions.RequestException("Max retries exceeded")
|
|
|
|
|
|
def get_bulk_news_brave(lookback_hours: int) -> List[Dict[str, Any]]:
|
|
try:
|
|
api_key = get_api_key()
|
|
except ValueError as e:
|
|
logger.debug("Brave API key not configured: %s", e)
|
|
return []
|
|
|
|
headers = {
|
|
"Accept": "application/json",
|
|
"Accept-Encoding": "gzip",
|
|
"X-Subscription-Token": api_key,
|
|
}
|
|
|
|
queries = [
|
|
"stock market news",
|
|
"earnings report",
|
|
"merger acquisition",
|
|
"company financial news",
|
|
"trading stocks",
|
|
]
|
|
|
|
all_articles = []
|
|
seen_urls = set()
|
|
|
|
if lookback_hours <= 24:
|
|
freshness = "pd"
|
|
elif lookback_hours <= 168:
|
|
freshness = "pw"
|
|
else:
|
|
freshness = "pm"
|
|
|
|
for query in queries:
|
|
try:
|
|
params = {
|
|
"q": query,
|
|
"count": 20,
|
|
"freshness": freshness,
|
|
}
|
|
|
|
response = _make_request_with_retry(BRAVE_SEARCH_URL, headers, params)
|
|
|
|
data = response.json()
|
|
results = data.get("results", [])
|
|
|
|
for item in results:
|
|
url = item.get("url", "")
|
|
if url and url not in seen_urls:
|
|
seen_urls.add(url)
|
|
|
|
age = item.get("age", "")
|
|
published_at = _parse_brave_age(age)
|
|
|
|
article = {
|
|
"title": item.get("title", ""),
|
|
"source": item.get("meta_url", {}).get("netloc", "Brave News"),
|
|
"url": url,
|
|
"published_at": published_at.isoformat(),
|
|
"content_snippet": item.get("description", "")[:500],
|
|
}
|
|
all_articles.append(article)
|
|
|
|
except requests.exceptions.HTTPError as e:
|
|
logger.debug("Brave search HTTP error for '%s': %s", query, e)
|
|
continue
|
|
except requests.exceptions.Timeout as e:
|
|
logger.debug("Brave search timeout for '%s': %s", query, e)
|
|
continue
|
|
except requests.exceptions.RequestException as e:
|
|
logger.debug("Brave search request failed for '%s': %s", query, e)
|
|
continue
|
|
except Exception as e:
|
|
logger.debug("Brave search failed for query '%s': %s", query, e)
|
|
continue
|
|
|
|
logger.debug("Brave returned %d articles", len(all_articles))
|
|
return all_articles
|
|
|
|
|
|
def _parse_brave_age(age_str: str) -> datetime:
|
|
now = datetime.now()
|
|
if not age_str:
|
|
return now
|
|
|
|
age_str = age_str.lower()
|
|
try:
|
|
if "hour" in age_str:
|
|
hours = int("".join(filter(str.isdigit, age_str)) or "1")
|
|
return now - timedelta(hours=hours)
|
|
elif "day" in age_str:
|
|
days = int("".join(filter(str.isdigit, age_str)) or "1")
|
|
return now - timedelta(days=days)
|
|
elif "week" in age_str:
|
|
weeks = int("".join(filter(str.isdigit, age_str)) or "1")
|
|
return now - timedelta(weeks=weeks)
|
|
elif "minute" in age_str:
|
|
minutes = int("".join(filter(str.isdigit, age_str)) or "1")
|
|
return now - timedelta(minutes=minutes)
|
|
except (ValueError, TypeError):
|
|
pass
|
|
|
|
return now
|