TradingAgents/tradingagents/dataflows/brave.py

154 lines
5.2 KiB
Python

import logging
import os
import time
import requests
from datetime import datetime, timedelta
from typing import List, Dict, Any
logger = logging.getLogger(__name__)
BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/news/search"
DEFAULT_TIMEOUT = 30
MAX_RETRIES = 3
RETRY_BACKOFF = 1.0
def get_api_key() -> str:
api_key = os.getenv("BRAVE_API_KEY")
if not api_key:
raise ValueError("BRAVE_API_KEY environment variable is not set.")
return api_key
def _make_request_with_retry(url: str, headers: Dict, params: Dict, max_retries: int = MAX_RETRIES) -> requests.Response:
last_exception = None
for attempt in range(max_retries):
try:
response = requests.get(url, headers=headers, params=params, timeout=DEFAULT_TIMEOUT)
if response.status_code == 429:
retry_after = int(response.headers.get("Retry-After", RETRY_BACKOFF * (attempt + 1)))
logger.debug("Brave rate limited, waiting %ds before retry %d/%d", retry_after, attempt + 1, max_retries)
time.sleep(retry_after)
continue
response.raise_for_status()
return response
except requests.exceptions.Timeout as e:
last_exception = e
logger.debug("Brave request timeout, retry %d/%d", attempt + 1, max_retries)
time.sleep(RETRY_BACKOFF * (attempt + 1))
except requests.exceptions.ConnectionError as e:
last_exception = e
logger.debug("Brave connection error, retry %d/%d", attempt + 1, max_retries)
time.sleep(RETRY_BACKOFF * (attempt + 1))
except requests.exceptions.HTTPError as e:
if e.response is not None and e.response.status_code >= 500:
last_exception = e
logger.debug("Brave server error %d, retry %d/%d", e.response.status_code, attempt + 1, max_retries)
time.sleep(RETRY_BACKOFF * (attempt + 1))
else:
raise
raise last_exception if last_exception else requests.exceptions.RequestException("Max retries exceeded")
def get_bulk_news_brave(lookback_hours: int) -> List[Dict[str, Any]]:
try:
api_key = get_api_key()
except ValueError as e:
logger.debug("Brave API key not configured: %s", e)
return []
headers = {
"Accept": "application/json",
"Accept-Encoding": "gzip",
"X-Subscription-Token": api_key,
}
queries = [
"stock market news",
"earnings report",
"merger acquisition",
"company financial news",
"trading stocks",
]
all_articles = []
seen_urls = set()
if lookback_hours <= 24:
freshness = "pd"
elif lookback_hours <= 168:
freshness = "pw"
else:
freshness = "pm"
for query in queries:
try:
params = {
"q": query,
"count": 20,
"freshness": freshness,
}
response = _make_request_with_retry(BRAVE_SEARCH_URL, headers, params)
data = response.json()
results = data.get("results", [])
for item in results:
url = item.get("url", "")
if url and url not in seen_urls:
seen_urls.add(url)
age = item.get("age", "")
published_at = _parse_brave_age(age)
article = {
"title": item.get("title", ""),
"source": item.get("meta_url", {}).get("netloc", "Brave News"),
"url": url,
"published_at": published_at.isoformat(),
"content_snippet": item.get("description", "")[:500],
}
all_articles.append(article)
except requests.exceptions.HTTPError as e:
logger.debug("Brave search HTTP error for '%s': %s", query, e)
continue
except requests.exceptions.Timeout as e:
logger.debug("Brave search timeout for '%s': %s", query, e)
continue
except requests.exceptions.RequestException as e:
logger.debug("Brave search request failed for '%s': %s", query, e)
continue
except Exception as e:
logger.debug("Brave search failed for query '%s': %s", query, e)
continue
logger.debug("Brave returned %d articles", len(all_articles))
return all_articles
def _parse_brave_age(age_str: str) -> datetime:
now = datetime.now()
if not age_str:
return now
age_str = age_str.lower()
try:
if "hour" in age_str:
hours = int("".join(filter(str.isdigit, age_str)) or "1")
return now - timedelta(hours=hours)
elif "day" in age_str:
days = int("".join(filter(str.isdigit, age_str)) or "1")
return now - timedelta(days=days)
elif "week" in age_str:
weeks = int("".join(filter(str.isdigit, age_str)) or "1")
return now - timedelta(weeks=weeks)
elif "minute" in age_str:
minutes = int("".join(filter(str.isdigit, age_str)) or "1")
return now - timedelta(minutes=minutes)
except (ValueError, TypeError):
pass
return now