305 lines
12 KiB
Python
305 lines
12 KiB
Python
"""
|
|
Tests for Google News RSS feed client using pytest-vcr.
|
|
"""
|
|
|
|
from datetime import datetime, timezone
|
|
from unittest.mock import Mock, patch
|
|
|
|
import feedparser
|
|
import pytest
|
|
import requests
|
|
|
|
from tradingagents.domains.news.google_news_client import (
|
|
GoogleNewsArticle,
|
|
GoogleNewsClient,
|
|
)
|
|
|
|
# VCR configuration
|
|
vcr = pytest.mark.vcr(
|
|
cassette_library_dir="tests/fixtures/vcr_cassettes/news",
|
|
record_mode="once", # Record once, then replay
|
|
match_on=["uri", "method"],
|
|
filter_headers=["authorization", "cookie"],
|
|
)
|
|
|
|
|
|
class TestGoogleNewsClient:
|
|
"""Test GoogleNewsClient with VCR cassettes."""
|
|
|
|
@pytest.fixture
|
|
def client(self):
|
|
"""Create GoogleNewsClient instance."""
|
|
return GoogleNewsClient()
|
|
|
|
@vcr
|
|
def test_get_company_news_real(self, client):
|
|
"""Test fetching company news with real RSS feed (recorded)."""
|
|
articles = client.get_company_news("AAPL")
|
|
|
|
assert isinstance(articles, list)
|
|
if articles: # If we got articles
|
|
article = articles[0]
|
|
assert isinstance(article, GoogleNewsArticle)
|
|
assert article.title
|
|
assert article.link
|
|
assert isinstance(article.published, datetime)
|
|
assert article.source
|
|
|
|
@vcr
|
|
def test_get_global_news_real(self, client):
|
|
"""Test fetching global news with real RSS feed (recorded)."""
|
|
articles = client.get_global_news(["technology", "finance"])
|
|
|
|
assert isinstance(articles, list)
|
|
# Should have articles from multiple categories
|
|
if articles:
|
|
sources = {article.source for article in articles}
|
|
assert len(sources) >= 1 # Multiple sources expected
|
|
|
|
def test_get_rss_feed_network_error(self, client):
|
|
"""Test handling of network errors."""
|
|
with patch("requests.get") as mock_get:
|
|
mock_get.side_effect = requests.exceptions.RequestException("Network error")
|
|
|
|
articles = client._get_rss_feed("AAPL")
|
|
|
|
assert articles == []
|
|
mock_get.assert_called_once()
|
|
|
|
def test_get_rss_feed_http_error(self, client):
|
|
"""Test handling of HTTP errors."""
|
|
with patch("requests.get") as mock_get:
|
|
mock_response = Mock()
|
|
mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(
|
|
"404 Not Found"
|
|
)
|
|
mock_get.return_value = mock_response
|
|
|
|
articles = client._get_rss_feed("INVALID")
|
|
|
|
assert articles == []
|
|
|
|
def test_get_rss_feed_malformed_feed(self, client):
|
|
"""Test handling of malformed RSS feed."""
|
|
with patch("requests.get") as mock_get:
|
|
mock_response = Mock()
|
|
mock_response.content = b"<html>Not an RSS feed</html>"
|
|
mock_response.raise_for_status.return_value = None
|
|
mock_get.return_value = mock_response
|
|
|
|
articles = client._get_rss_feed("TEST")
|
|
|
|
# Should handle gracefully and return empty or partial results
|
|
assert isinstance(articles, list)
|
|
|
|
def test_get_rss_feed_with_bozo_feed(self, client):
|
|
"""Test handling of feed with parsing issues (bozo)."""
|
|
with patch("requests.get") as mock_get, patch("feedparser.parse") as mock_parse:
|
|
mock_response = Mock()
|
|
mock_response.content = b"<rss>Slightly malformed RSS</rss>"
|
|
mock_response.raise_for_status.return_value = None
|
|
mock_get.return_value = mock_response
|
|
|
|
# Create mock feed with bozo flag
|
|
mock_feed = Mock()
|
|
mock_feed.bozo = True
|
|
mock_feed.bozo_exception = Exception("Parsing error")
|
|
mock_feed.entries = []
|
|
mock_parse.return_value = mock_feed
|
|
|
|
articles = client._get_rss_feed("TEST")
|
|
|
|
assert articles == []
|
|
|
|
def test_parse_feed_entry_valid_date(self, client):
|
|
"""Test parsing entry with valid date."""
|
|
mock_entry = Mock(spec=feedparser.FeedParserDict)
|
|
mock_entry.title = "Apple News - TechCrunch"
|
|
mock_entry.link = "https://example.com/apple"
|
|
mock_entry.published = "Mon, 15 Jan 2024 10:00:00 GMT"
|
|
mock_entry.summary = "Apple announces new product"
|
|
mock_entry.id = "guid-123"
|
|
|
|
article = client._parse_feed_entry(mock_entry)
|
|
|
|
assert article.title == "Apple News"
|
|
assert article.source == "TechCrunch"
|
|
assert article.link == "https://example.com/apple"
|
|
assert isinstance(article.published, datetime)
|
|
assert article.summary == "Apple announces new product"
|
|
assert article.guid == "guid-123"
|
|
|
|
def test_parse_feed_entry_invalid_date(self, client):
|
|
"""Test parsing entry with invalid date."""
|
|
mock_entry = Mock(spec=feedparser.FeedParserDict)
|
|
mock_entry.title = "Breaking News"
|
|
mock_entry.link = "https://example.com/news"
|
|
mock_entry.published = "Invalid Date String"
|
|
mock_entry.summary = "News summary"
|
|
mock_entry.id = "guid-456"
|
|
|
|
# Should use current time as fallback
|
|
before = datetime.now(timezone.utc).replace(tzinfo=None)
|
|
article = client._parse_feed_entry(mock_entry)
|
|
after = datetime.now(timezone.utc).replace(tzinfo=None)
|
|
|
|
assert before <= article.published <= after
|
|
assert article.title == "Breaking News"
|
|
assert article.source == "Unknown" # No source in title
|
|
|
|
def test_parse_feed_entry_no_source_separator(self, client):
|
|
"""Test parsing entry without source separator in title."""
|
|
mock_entry = Mock(spec=feedparser.FeedParserDict)
|
|
mock_entry.title = "Simple Title Without Source"
|
|
mock_entry.link = "https://example.com"
|
|
mock_entry.published = ""
|
|
mock_entry.summary = "Summary"
|
|
mock_entry.id = "guid-789"
|
|
|
|
article = client._parse_feed_entry(mock_entry)
|
|
|
|
assert article.title == "Simple Title Without Source"
|
|
assert article.source == "Unknown"
|
|
|
|
def test_parse_feed_entry_missing_fields(self, client):
|
|
"""Test parsing entry with missing fields."""
|
|
mock_entry = Mock(spec=feedparser.FeedParserDict)
|
|
# Don't set any attributes to test defaults
|
|
|
|
article = client._parse_feed_entry(mock_entry)
|
|
|
|
assert article.title == "Untitled"
|
|
assert article.link == ""
|
|
assert article.summary == ""
|
|
assert article.source == "Unknown"
|
|
assert article.guid == "" # Falls back through id -> link -> ""
|
|
|
|
@vcr
|
|
def test_parse_feed_entry_with_google_news_url_decoding_real(self, client):
|
|
"""Test that Google News URLs are decoded to actual article URLs with real decoder."""
|
|
mock_entry = Mock(spec=feedparser.FeedParserDict)
|
|
mock_entry.title = "Tech News - TechCrunch"
|
|
# Use a real Google News URL that will be recorded by VCR
|
|
mock_entry.link = "https://news.google.com/rss/articles/CBMiWWh0dHBzOi8vdGVjaGNydW5jaC5jb20vMjAyNC8wMS8xNS90ZXN0LWFydGljbGUv0gEA"
|
|
mock_entry.published = "Mon, 15 Jan 2024 10:00:00 GMT"
|
|
mock_entry.summary = "Tech summary"
|
|
mock_entry.id = "guid-decoded"
|
|
|
|
# Let the decoder run and record HTTP requests with VCR
|
|
article = client._parse_feed_entry(mock_entry)
|
|
|
|
# Should have either decoded the URL or used the original
|
|
assert article.link # Link should not be empty
|
|
assert article.title == "Tech News"
|
|
assert article.source == "TechCrunch"
|
|
|
|
def test_parse_feed_entry_with_non_google_news_url(self, client):
|
|
"""Test that non-Google News URLs are passed through unchanged."""
|
|
mock_entry = Mock(spec=feedparser.FeedParserDict)
|
|
mock_entry.title = "Direct News - CNN"
|
|
# Regular URL that should not be processed by decoder
|
|
mock_entry.link = "https://cnn.com/2024/01/15/direct-article"
|
|
mock_entry.published = "Mon, 15 Jan 2024 10:00:00 GMT"
|
|
mock_entry.summary = "Direct article summary"
|
|
mock_entry.id = "guid-direct"
|
|
|
|
article = client._parse_feed_entry(mock_entry)
|
|
|
|
# Should use original URL unchanged
|
|
assert article.link == "https://cnn.com/2024/01/15/direct-article"
|
|
assert article.title == "Direct News"
|
|
assert article.source == "CNN"
|
|
|
|
def test_parse_feed_entry_with_decoder_failure(self, client):
|
|
"""Test graceful fallback when URL decoder fails."""
|
|
mock_entry = Mock(spec=feedparser.FeedParserDict)
|
|
mock_entry.title = "News Article"
|
|
mock_entry.link = "https://news.google.com/rss/articles/CBMiEncodedURL"
|
|
mock_entry.published = ""
|
|
mock_entry.summary = "Summary"
|
|
mock_entry.id = "guid-123"
|
|
|
|
with patch("googlenewsdecoder.gnewsdecoder") as mock_decoder:
|
|
# Mock the decoder to raise an exception
|
|
mock_decoder.side_effect = Exception("Decoder error")
|
|
|
|
article = client._parse_feed_entry(mock_entry)
|
|
|
|
# Should fallback to using the original URL
|
|
assert article.link == "https://news.google.com/rss/articles/CBMiEncodedURL"
|
|
assert article.title == "News Article"
|
|
|
|
def test_parse_feed_entry_with_decoder_returns_failure(self, client):
|
|
"""Test fallback when decoder returns failure status."""
|
|
mock_entry = Mock(spec=feedparser.FeedParserDict)
|
|
mock_entry.title = "News Article"
|
|
mock_entry.link = "https://news.google.com/rss/articles/CBMiEncodedURL"
|
|
mock_entry.published = ""
|
|
mock_entry.summary = "Summary"
|
|
mock_entry.id = "guid-456"
|
|
|
|
with patch("googlenewsdecoder.gnewsdecoder") as mock_decoder:
|
|
# Mock the decoder to return failure status
|
|
mock_decoder.return_value = {
|
|
"status": False,
|
|
"message": "Invalid URL format",
|
|
}
|
|
|
|
article = client._parse_feed_entry(mock_entry)
|
|
|
|
# Should fallback to using the original URL
|
|
assert article.link == "https://news.google.com/rss/articles/CBMiEncodedURL"
|
|
|
|
def test_get_global_news_category_failure(self, client):
|
|
"""Test global news when some categories fail."""
|
|
with patch.object(client, "_get_rss_feed") as mock_get_rss:
|
|
# First category succeeds, second fails
|
|
mock_get_rss.side_effect = [
|
|
[
|
|
GoogleNewsArticle(
|
|
title="Tech News",
|
|
link="https://tech.com",
|
|
published=datetime.now(timezone.utc).replace(tzinfo=None),
|
|
summary="Tech summary",
|
|
source="TechSite",
|
|
guid="tech-1",
|
|
)
|
|
],
|
|
Exception("Failed to fetch"),
|
|
]
|
|
|
|
articles = client.get_global_news(["technology", "invalid_category"])
|
|
|
|
assert len(articles) == 1
|
|
assert articles[0].title == "Tech News"
|
|
assert mock_get_rss.call_count == 2
|
|
|
|
def test_convert_entry_with_malformed_entry(self, client):
|
|
"""Test handling of malformed entry during conversion."""
|
|
with patch(
|
|
"tradingagents.domains.news.google_news_client.logger"
|
|
) as mock_logger:
|
|
mock_feed = Mock()
|
|
mock_feed.bozo = False
|
|
|
|
# Create entry that will cause conversion to fail
|
|
bad_entry = Mock(spec=feedparser.FeedParserDict)
|
|
bad_entry.title = None # This will cause AttributeError
|
|
|
|
mock_feed.entries = [bad_entry]
|
|
|
|
with (
|
|
patch("feedparser.parse", return_value=mock_feed),
|
|
patch("requests.get") as mock_get,
|
|
):
|
|
mock_response = Mock()
|
|
mock_response.content = b"<rss></rss>"
|
|
mock_response.raise_for_status.return_value = None
|
|
mock_get.return_value = mock_response
|
|
|
|
articles = client._get_rss_feed("TEST")
|
|
|
|
assert articles == []
|
|
# Should log warning about failed parsing
|
|
mock_logger.warning.assert_called()
|