TradingAgents/tradingagents/clients/google_news_client.py

211 lines
6.7 KiB
Python

"""
Google News client for live news data via web scraping.
"""
import logging
from datetime import datetime, timedelta
from typing import Any
from tradingagents.dataflows.googlenews_utils import getNewsData
from .base import BaseClient
logger = logging.getLogger(__name__)
class GoogleNewsClient(BaseClient):
"""Client for Google News data via web scraping."""
def __init__(self, **kwargs):
"""
Initialize Google News client.
Args:
**kwargs: Configuration options including rate limits
"""
super().__init__(**kwargs)
self.max_retries = kwargs.get("max_retries", 3)
self.delay_between_requests = kwargs.get("delay_between_requests", 1.0)
def test_connection(self) -> bool:
"""Test Google News connection by fetching a simple query."""
try:
# Test with a simple query for recent news
end_date = datetime.now().strftime("%Y-%m-%d")
start_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
test_data = getNewsData("technology", start_date, end_date)
return isinstance(test_data, list)
except Exception as e:
logger.error(f"Google News connection test failed: {e}")
return False
def get_data(
self, query: str, start_date: str, end_date: str, **kwargs
) -> dict[str, Any]:
"""
Get news data for a query and date range.
Args:
query: Search query
start_date: Start date in YYYY-MM-DD format
end_date: End date in YYYY-MM-DD format
**kwargs: Additional parameters
Returns:
Dict[str, Any]: News data with metadata
"""
if not self.validate_date_range(start_date, end_date):
raise ValueError(f"Invalid date range: {start_date} to {end_date}")
try:
# Replace spaces with + for URL encoding
formatted_query = query.replace(" ", "+")
logger.info(
f"Fetching Google News for query: {query} from {start_date} to {end_date}"
)
news_results = getNewsData(formatted_query, start_date, end_date)
if not news_results:
logger.warning(f"No news found for query: {query}")
return {
"query": query,
"period": {"start": start_date, "end": end_date},
"articles": [],
"metadata": {
"source": "google_news",
"empty": True,
"reason": "no_articles_found",
},
}
# Process and standardize article data
processed_articles = []
for article in news_results:
processed_article = {
"headline": article.get("title", ""),
"summary": article.get("snippet", ""),
"url": article.get("link", ""),
"source": article.get("source", "Unknown"),
"date": article.get(
"date", end_date
), # Fallback to end_date if no date
"entities": article.get("entities", []),
}
processed_articles.append(processed_article)
return {
"query": query,
"period": {"start": start_date, "end": end_date},
"articles": processed_articles,
"metadata": {
"source": "google_news",
"article_count": len(processed_articles),
"retrieved_at": datetime.utcnow().isoformat(),
"search_query": formatted_query,
},
}
except Exception as e:
logger.error(f"Error fetching Google News for query '{query}': {e}")
raise
def get_company_news(
self, symbol: str, start_date: str, end_date: str, **kwargs
) -> dict[str, Any]:
"""
Get news data specific to a company symbol.
Args:
symbol: Stock ticker symbol
start_date: Start date in YYYY-MM-DD format
end_date: End date in YYYY-MM-DD format
**kwargs: Additional parameters
Returns:
Dict[str, Any]: Company-specific news data
"""
# Create company-focused search query
company_query = f"{symbol} stock"
result = self.get_data(company_query, start_date, end_date, **kwargs)
result["symbol"] = symbol
result["metadata"]["query_type"] = "company_specific"
return result
def get_global_news(
self,
start_date: str,
end_date: str,
categories: list[str] | None = None,
**kwargs,
) -> dict[str, Any]:
"""
Get global/macro news that might affect markets.
Args:
start_date: Start date in YYYY-MM-DD format
end_date: End date in YYYY-MM-DD format
categories: List of news categories to search
**kwargs: Additional parameters
Returns:
Dict[str, Any]: Global news data
"""
if categories is None:
categories = ["economy", "finance", "markets", "business"]
all_articles = []
for category in categories:
try:
category_data = self.get_data(category, start_date, end_date, **kwargs)
# Add category tag to each article
for article in category_data.get("articles", []):
article["category"] = category
all_articles.extend(category_data.get("articles", []))
except Exception as e:
logger.warning(f"Failed to fetch news for category '{category}': {e}")
continue
return {
"query": "global_news",
"categories": categories,
"period": {"start": start_date, "end": end_date},
"articles": all_articles,
"metadata": {
"source": "google_news",
"article_count": len(all_articles),
"categories_searched": categories,
"retrieved_at": datetime.utcnow().isoformat(),
"query_type": "global_news",
},
}
def get_available_categories(self) -> list[str]:
"""
Get list of commonly used news categories.
Returns:
List[str]: News categories
"""
return [
"business",
"economy",
"finance",
"markets",
"technology",
"politics",
"world",
"healthcare",
"energy",
"crypto",
]