Add Brave and Tavily news data vendors with retry handling

- Add Brave Search API integration for bulk news fetching
- Add Tavily API integration for bulk news fetching
- Implement timeout and retry logic with exponential backoff for both vendors
- Make bulk news vendor order configurable via default_config.py
- Add tavily-python to requirements.txt
- Document BRAVE_API_KEY and TAVILY_API_KEY in .env.example and README
- Add comprehensive unit tests for both vendors (49 tests)

The news discovery system now uses fallback chain: Tavily → Brave → Alpha Vantage → OpenAI → Google

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Joseph O'Brien 2025-12-03 00:43:27 -05:00
parent 821a7e4db7
commit 24e955d29b
9 changed files with 1024 additions and 4 deletions

View File

@ -1,2 +1,4 @@
ALPHA_VANTAGE_API_KEY=alpha_vantage_api_key_placeholder
OPENAI_API_KEY=openai_api_key_placeholder
OPENAI_API_KEY=openai_api_key_placeholder
BRAVE_API_KEY=brave_api_key_placeholder
TAVILY_API_KEY=tavily_api_key_placeholder

View File

@ -73,13 +73,25 @@ source .venv/bin/activate
### Required API Keys
The framework requires an OpenAI API key for powering the agents and an Alpha Vantage API key for fundamental and news data (default configuration).
The framework requires an OpenAI API key for powering the agents and at least one news data provider API key.
**Required:**
- `OPENAI_API_KEY` - Powers the LLM agents
**News Data Providers (at least one required):**
- `TAVILY_API_KEY` - Tavily search API (preferred for news discovery)
- `BRAVE_API_KEY` - Brave Search API (fallback option)
- `ALPHA_VANTAGE_API_KEY` - Alpha Vantage API (for fundamentals and news)
The news discovery system uses a fallback chain: Tavily → Brave → Alpha Vantage → OpenAI → Google. Configure the API keys for your preferred providers.
Set environment variables:
```bash
export OPENAI_API_KEY=your_openai_api_key
export ALPHA_VANTAGE_API_KEY=your_alpha_vantage_api_key
export TAVILY_API_KEY=your_tavily_api_key
export BRAVE_API_KEY=your_brave_api_key
```
Alternatively, create a `.env` file in the project root:
@ -95,7 +107,7 @@ Then edit the `.env` file with your API keys.
Run the CLI:
```bash
uv run cli/main.py
uv run python -m cli.main
```
The CLI provides two main modes:
@ -105,6 +117,7 @@ The CLI provides two main modes:
The trending stock discovery feature uses LLM-powered entity extraction to identify stocks making news. This is the primary enhancement in this fork, enabling proactive discovery of trading opportunities.
Configuration options:
- **Lookback period:** 1h, 6h, 24h, or 7d
- **Sector filter:** Technology, Healthcare, Finance, Energy, Consumer Goods, Industrials
- **Event type filter:** Earnings, Merger/Acquisition, Regulatory, Product Launch, Executive Change

View File

@ -24,3 +24,4 @@ rich
questionary
langchain_anthropic
langchain-google-genai
tavily-python

View File

@ -0,0 +1,348 @@
import pytest
from unittest.mock import Mock, patch, MagicMock
from datetime import datetime, timedelta
import requests
from tradingagents.dataflows.brave import (
get_api_key,
get_bulk_news_brave,
_parse_brave_age,
_make_request_with_retry,
BRAVE_SEARCH_URL,
DEFAULT_TIMEOUT,
MAX_RETRIES,
)
class TestGetApiKey:
def test_get_api_key_success(self):
with patch.dict('os.environ', {'BRAVE_API_KEY': 'test_key_123'}):
result = get_api_key()
assert result == 'test_key_123'
def test_get_api_key_missing(self):
with patch.dict('os.environ', {}, clear=True):
with pytest.raises(ValueError, match="BRAVE_API_KEY environment variable is not set"):
get_api_key()
class TestParseBraveAge:
def test_parse_hours_ago(self):
result = _parse_brave_age("2 hours ago")
expected = datetime.now() - timedelta(hours=2)
assert abs((result - expected).total_seconds()) < 2
def test_parse_single_hour(self):
result = _parse_brave_age("1 hour ago")
expected = datetime.now() - timedelta(hours=1)
assert abs((result - expected).total_seconds()) < 2
def test_parse_days_ago(self):
result = _parse_brave_age("3 days ago")
expected = datetime.now() - timedelta(days=3)
assert abs((result - expected).total_seconds()) < 2
def test_parse_weeks_ago(self):
result = _parse_brave_age("2 weeks ago")
expected = datetime.now() - timedelta(weeks=2)
assert abs((result - expected).total_seconds()) < 2
def test_parse_minutes_ago(self):
result = _parse_brave_age("30 minutes ago")
expected = datetime.now() - timedelta(minutes=30)
assert abs((result - expected).total_seconds()) < 2
def test_parse_empty_string(self):
result = _parse_brave_age("")
expected = datetime.now()
assert abs((result - expected).total_seconds()) < 2
def test_parse_invalid_format(self):
result = _parse_brave_age("invalid format")
expected = datetime.now()
assert abs((result - expected).total_seconds()) < 2
def test_parse_uppercase(self):
result = _parse_brave_age("5 HOURS AGO")
expected = datetime.now() - timedelta(hours=5)
assert abs((result - expected).total_seconds()) < 2
class TestMakeRequestWithRetry:
@patch('tradingagents.dataflows.brave.requests.get')
def test_successful_request(self, mock_get):
mock_response = Mock()
mock_response.status_code = 200
mock_response.raise_for_status = Mock()
mock_get.return_value = mock_response
result = _make_request_with_retry("http://test.com", {}, {})
assert result == mock_response
mock_get.assert_called_once()
@patch('tradingagents.dataflows.brave.requests.get')
@patch('tradingagents.dataflows.brave.time.sleep')
def test_retry_on_timeout(self, mock_sleep, mock_get):
mock_get.side_effect = [
requests.exceptions.Timeout(),
requests.exceptions.Timeout(),
Mock(status_code=200, raise_for_status=Mock()),
]
result = _make_request_with_retry("http://test.com", {}, {})
assert mock_get.call_count == 3
assert mock_sleep.call_count == 2
@patch('tradingagents.dataflows.brave.requests.get')
@patch('tradingagents.dataflows.brave.time.sleep')
def test_retry_on_connection_error(self, mock_sleep, mock_get):
mock_get.side_effect = [
requests.exceptions.ConnectionError(),
Mock(status_code=200, raise_for_status=Mock()),
]
result = _make_request_with_retry("http://test.com", {}, {})
assert mock_get.call_count == 2
assert mock_sleep.call_count == 1
@patch('tradingagents.dataflows.brave.requests.get')
@patch('tradingagents.dataflows.brave.time.sleep')
def test_retry_on_rate_limit(self, mock_sleep, mock_get):
rate_limited_response = Mock()
rate_limited_response.status_code = 429
rate_limited_response.headers = {"Retry-After": "1"}
success_response = Mock()
success_response.status_code = 200
success_response.raise_for_status = Mock()
mock_get.side_effect = [rate_limited_response, success_response]
result = _make_request_with_retry("http://test.com", {}, {})
assert mock_get.call_count == 2
assert mock_sleep.call_count == 1
@patch('tradingagents.dataflows.brave.requests.get')
@patch('tradingagents.dataflows.brave.time.sleep')
def test_max_retries_exceeded(self, mock_sleep, mock_get):
mock_get.side_effect = requests.exceptions.Timeout()
with pytest.raises(requests.exceptions.Timeout):
_make_request_with_retry("http://test.com", {}, {}, max_retries=3)
assert mock_get.call_count == 3
@patch('tradingagents.dataflows.brave.requests.get')
def test_non_retryable_http_error(self, mock_get):
mock_response = Mock()
mock_response.status_code = 400
mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_response)
mock_get.return_value = mock_response
with pytest.raises(requests.exceptions.HTTPError):
_make_request_with_retry("http://test.com", {}, {})
assert mock_get.call_count == 1
class TestGetBulkNewsBrave:
@patch('tradingagents.dataflows.brave.get_api_key')
def test_returns_empty_when_no_api_key(self, mock_get_api_key):
mock_get_api_key.side_effect = ValueError("BRAVE_API_KEY not set")
result = get_bulk_news_brave(24)
assert result == []
@patch('tradingagents.dataflows.brave._make_request_with_retry')
@patch('tradingagents.dataflows.brave.get_api_key')
def test_basic_call(self, mock_get_api_key, mock_request):
mock_get_api_key.return_value = "test_key"
mock_response = Mock()
mock_response.json.return_value = {"results": []}
mock_request.return_value = mock_response
result = get_bulk_news_brave(24)
assert isinstance(result, list)
assert mock_request.call_count == 5
@patch('tradingagents.dataflows.brave._make_request_with_retry')
@patch('tradingagents.dataflows.brave.get_api_key')
def test_parses_articles(self, mock_get_api_key, mock_request):
mock_get_api_key.return_value = "test_key"
mock_article = {
"title": "Test Stock News",
"meta_url": {"netloc": "reuters.com"},
"url": "https://reuters.com/article1",
"age": "2 hours ago",
"description": "This is a test article about stocks.",
}
mock_response = Mock()
mock_response.json.return_value = {"results": [mock_article]}
mock_request.return_value = mock_response
result = get_bulk_news_brave(24)
assert len(result) >= 1
article = result[0]
assert article["title"] == "Test Stock News"
assert article["source"] == "reuters.com"
assert article["url"] == "https://reuters.com/article1"
assert "published_at" in article
assert article["content_snippet"] == "This is a test article about stocks."
@patch('tradingagents.dataflows.brave._make_request_with_retry')
@patch('tradingagents.dataflows.brave.get_api_key')
def test_deduplicates_by_url(self, mock_get_api_key, mock_request):
mock_get_api_key.return_value = "test_key"
duplicate_article = {
"title": "Duplicate Article",
"meta_url": {"netloc": "news.com"},
"url": "https://news.com/same-url",
"age": "1 hour ago",
"description": "Duplicate content.",
}
mock_response = Mock()
mock_response.json.return_value = {"results": [duplicate_article, duplicate_article]}
mock_request.return_value = mock_response
result = get_bulk_news_brave(24)
urls = [a["url"] for a in result]
assert len(urls) == len(set(urls))
@patch('tradingagents.dataflows.brave._make_request_with_retry')
@patch('tradingagents.dataflows.brave.get_api_key')
def test_truncates_long_descriptions(self, mock_get_api_key, mock_request):
mock_get_api_key.return_value = "test_key"
long_description = "A" * 1000
mock_article = {
"title": "Long Article",
"meta_url": {"netloc": "news.com"},
"url": "https://news.com/article",
"age": "1 hour ago",
"description": long_description,
}
mock_response = Mock()
mock_response.json.return_value = {"results": [mock_article]}
mock_request.return_value = mock_response
result = get_bulk_news_brave(24)
assert len(result[0]["content_snippet"]) == 500
@patch('tradingagents.dataflows.brave._make_request_with_retry')
@patch('tradingagents.dataflows.brave.get_api_key')
def test_freshness_parameter_24h(self, mock_get_api_key, mock_request):
mock_get_api_key.return_value = "test_key"
mock_response = Mock()
mock_response.json.return_value = {"results": []}
mock_request.return_value = mock_response
get_bulk_news_brave(24)
call_args = mock_request.call_args_list[0]
params = call_args[0][2]
assert params["freshness"] == "pd"
@patch('tradingagents.dataflows.brave._make_request_with_retry')
@patch('tradingagents.dataflows.brave.get_api_key')
def test_freshness_parameter_7d(self, mock_get_api_key, mock_request):
mock_get_api_key.return_value = "test_key"
mock_response = Mock()
mock_response.json.return_value = {"results": []}
mock_request.return_value = mock_response
get_bulk_news_brave(168)
call_args = mock_request.call_args_list[0]
params = call_args[0][2]
assert params["freshness"] == "pw"
@patch('tradingagents.dataflows.brave._make_request_with_retry')
@patch('tradingagents.dataflows.brave.get_api_key')
def test_freshness_parameter_month(self, mock_get_api_key, mock_request):
mock_get_api_key.return_value = "test_key"
mock_response = Mock()
mock_response.json.return_value = {"results": []}
mock_request.return_value = mock_response
get_bulk_news_brave(720)
call_args = mock_request.call_args_list[0]
params = call_args[0][2]
assert params["freshness"] == "pm"
@patch('tradingagents.dataflows.brave._make_request_with_retry')
@patch('tradingagents.dataflows.brave.get_api_key')
def test_handles_missing_meta_url(self, mock_get_api_key, mock_request):
mock_get_api_key.return_value = "test_key"
mock_article = {
"title": "Article Without Meta URL",
"url": "https://news.com/article",
"age": "1 hour ago",
"description": "Content",
}
mock_response = Mock()
mock_response.json.return_value = {"results": [mock_article]}
mock_request.return_value = mock_response
result = get_bulk_news_brave(24)
assert result[0]["source"] == "Brave News"
@patch('tradingagents.dataflows.brave._make_request_with_retry')
@patch('tradingagents.dataflows.brave.get_api_key')
def test_continues_on_query_failure(self, mock_get_api_key, mock_request):
mock_get_api_key.return_value = "test_key"
mock_response = Mock()
mock_response.json.return_value = {"results": [{"title": "Article", "url": "https://test.com", "age": "1h", "description": "test"}]}
mock_request.side_effect = [
requests.exceptions.HTTPError("Error"),
mock_response,
mock_response,
mock_response,
mock_response,
]
result = get_bulk_news_brave(24)
assert len(result) > 0
@patch('tradingagents.dataflows.brave._make_request_with_retry')
@patch('tradingagents.dataflows.brave.get_api_key')
def test_skips_articles_without_url(self, mock_get_api_key, mock_request):
mock_get_api_key.return_value = "test_key"
mock_articles = [
{"title": "No URL Article", "age": "1h", "description": "test"},
{"title": "Has URL", "url": "https://test.com", "age": "1h", "description": "test"},
]
mock_response = Mock()
mock_response.json.return_value = {"results": mock_articles}
mock_request.return_value = mock_response
result = get_bulk_news_brave(24)
urls = [a["url"] for a in result if a.get("url")]
assert all(url for url in urls)

View File

@ -0,0 +1,370 @@
import pytest
from unittest.mock import Mock, patch, MagicMock
from datetime import datetime, timedelta
from tradingagents.dataflows.tavily import (
get_api_key,
get_bulk_news_tavily,
_search_with_retry,
DEFAULT_TIMEOUT,
MAX_RETRIES,
)
class TestGetApiKey:
def test_get_api_key_success(self):
with patch.dict('os.environ', {'TAVILY_API_KEY': 'test_key_123'}):
result = get_api_key()
assert result == 'test_key_123'
def test_get_api_key_missing(self):
with patch.dict('os.environ', {}, clear=True):
with pytest.raises(ValueError, match="TAVILY_API_KEY environment variable is not set"):
get_api_key()
class TestSearchWithRetry:
def test_successful_search(self):
mock_client = Mock()
mock_client.search.return_value = {"results": []}
result = _search_with_retry(
client=mock_client,
query="test query",
search_depth="advanced",
topic="news",
time_range="day",
max_results=10,
)
assert result == {"results": []}
mock_client.search.assert_called_once()
@patch('tradingagents.dataflows.tavily.time.sleep')
def test_retry_on_rate_limit(self, mock_sleep):
mock_client = Mock()
mock_client.search.side_effect = [
Exception("Rate limit exceeded"),
{"results": []},
]
result = _search_with_retry(
client=mock_client,
query="test query",
search_depth="advanced",
topic="news",
time_range="day",
max_results=10,
)
assert result == {"results": []}
assert mock_client.search.call_count == 2
assert mock_sleep.call_count == 1
@patch('tradingagents.dataflows.tavily.time.sleep')
def test_retry_on_timeout(self, mock_sleep):
mock_client = Mock()
mock_client.search.side_effect = [
Exception("Request timed out"),
{"results": []},
]
result = _search_with_retry(
client=mock_client,
query="test query",
search_depth="advanced",
topic="news",
time_range="day",
max_results=10,
)
assert result == {"results": []}
assert mock_client.search.call_count == 2
@patch('tradingagents.dataflows.tavily.time.sleep')
def test_retry_on_connection_error(self, mock_sleep):
mock_client = Mock()
mock_client.search.side_effect = [
Exception("Connection error occurred"),
{"results": []},
]
result = _search_with_retry(
client=mock_client,
query="test query",
search_depth="advanced",
topic="news",
time_range="day",
max_results=10,
)
assert result == {"results": []}
assert mock_client.search.call_count == 2
@patch('tradingagents.dataflows.tavily.time.sleep')
def test_max_retries_exceeded(self, mock_sleep):
mock_client = Mock()
mock_client.search.side_effect = Exception("Rate limit 429")
with pytest.raises(Exception, match="Rate limit 429"):
_search_with_retry(
client=mock_client,
query="test query",
search_depth="advanced",
topic="news",
time_range="day",
max_results=10,
max_retries=3,
)
assert mock_client.search.call_count == 3
def test_non_retryable_error(self):
mock_client = Mock()
mock_client.search.side_effect = Exception("Invalid API key")
with pytest.raises(Exception, match="Invalid API key"):
_search_with_retry(
client=mock_client,
query="test query",
search_depth="advanced",
topic="news",
time_range="day",
max_results=10,
)
assert mock_client.search.call_count == 1
class TestGetBulkNewsTavily:
@patch('tradingagents.dataflows.tavily.TAVILY_AVAILABLE', False)
def test_returns_empty_when_library_not_installed(self):
result = get_bulk_news_tavily(24)
assert result == []
@patch('tradingagents.dataflows.tavily.TAVILY_AVAILABLE', True)
@patch('tradingagents.dataflows.tavily.TavilyClient')
@patch('tradingagents.dataflows.tavily.get_api_key')
def test_returns_empty_when_no_api_key(self, mock_get_api_key, mock_client_class):
mock_get_api_key.side_effect = ValueError("TAVILY_API_KEY not set")
result = get_bulk_news_tavily(24)
assert result == []
@patch('tradingagents.dataflows.tavily.TAVILY_AVAILABLE', True)
@patch('tradingagents.dataflows.tavily.TavilyClient')
@patch('tradingagents.dataflows.tavily.get_api_key')
@patch('tradingagents.dataflows.tavily._search_with_retry')
def test_basic_call(self, mock_search, mock_get_api_key, mock_client_class):
mock_get_api_key.return_value = "test_key"
mock_search.return_value = {"results": []}
result = get_bulk_news_tavily(24)
assert isinstance(result, list)
assert mock_search.call_count == 5
@patch('tradingagents.dataflows.tavily.TAVILY_AVAILABLE', True)
@patch('tradingagents.dataflows.tavily.TavilyClient')
@patch('tradingagents.dataflows.tavily.get_api_key')
@patch('tradingagents.dataflows.tavily._search_with_retry')
def test_parses_articles(self, mock_search, mock_get_api_key, mock_client_class):
mock_get_api_key.return_value = "test_key"
mock_article = {
"title": "Test Stock News",
"url": "https://reuters.com/article1",
"published_date": "2024-01-15T10:30:00Z",
"content": "This is a test article about stocks.",
}
mock_search.return_value = {"results": [mock_article]}
result = get_bulk_news_tavily(24)
assert len(result) >= 1
article = result[0]
assert article["title"] == "Test Stock News"
assert article["source"] == "Tavily"
assert article["url"] == "https://reuters.com/article1"
assert "published_at" in article
assert article["content_snippet"] == "This is a test article about stocks."
@patch('tradingagents.dataflows.tavily.TAVILY_AVAILABLE', True)
@patch('tradingagents.dataflows.tavily.TavilyClient')
@patch('tradingagents.dataflows.tavily.get_api_key')
@patch('tradingagents.dataflows.tavily._search_with_retry')
def test_deduplicates_by_url(self, mock_search, mock_get_api_key, mock_client_class):
mock_get_api_key.return_value = "test_key"
duplicate_article = {
"title": "Duplicate Article",
"url": "https://news.com/same-url",
"published_date": "2024-01-15T10:30:00Z",
"content": "Duplicate content.",
}
mock_search.return_value = {"results": [duplicate_article, duplicate_article]}
result = get_bulk_news_tavily(24)
urls = [a["url"] for a in result]
assert len(urls) == len(set(urls))
@patch('tradingagents.dataflows.tavily.TAVILY_AVAILABLE', True)
@patch('tradingagents.dataflows.tavily.TavilyClient')
@patch('tradingagents.dataflows.tavily.get_api_key')
@patch('tradingagents.dataflows.tavily._search_with_retry')
def test_truncates_long_content(self, mock_search, mock_get_api_key, mock_client_class):
mock_get_api_key.return_value = "test_key"
long_content = "A" * 1000
mock_article = {
"title": "Long Article",
"url": "https://news.com/article",
"published_date": "2024-01-15T10:30:00Z",
"content": long_content,
}
mock_search.return_value = {"results": [mock_article]}
result = get_bulk_news_tavily(24)
assert len(result[0]["content_snippet"]) == 500
@patch('tradingagents.dataflows.tavily.TAVILY_AVAILABLE', True)
@patch('tradingagents.dataflows.tavily.TavilyClient')
@patch('tradingagents.dataflows.tavily.get_api_key')
@patch('tradingagents.dataflows.tavily._search_with_retry')
def test_time_range_day(self, mock_search, mock_get_api_key, mock_client_class):
mock_get_api_key.return_value = "test_key"
mock_search.return_value = {"results": []}
get_bulk_news_tavily(24)
call_kwargs = mock_search.call_args_list[0][1]
assert call_kwargs["time_range"] == "day"
@patch('tradingagents.dataflows.tavily.TAVILY_AVAILABLE', True)
@patch('tradingagents.dataflows.tavily.TavilyClient')
@patch('tradingagents.dataflows.tavily.get_api_key')
@patch('tradingagents.dataflows.tavily._search_with_retry')
def test_time_range_week(self, mock_search, mock_get_api_key, mock_client_class):
mock_get_api_key.return_value = "test_key"
mock_search.return_value = {"results": []}
get_bulk_news_tavily(168)
call_kwargs = mock_search.call_args_list[0][1]
assert call_kwargs["time_range"] == "week"
@patch('tradingagents.dataflows.tavily.TAVILY_AVAILABLE', True)
@patch('tradingagents.dataflows.tavily.TavilyClient')
@patch('tradingagents.dataflows.tavily.get_api_key')
@patch('tradingagents.dataflows.tavily._search_with_retry')
def test_time_range_month(self, mock_search, mock_get_api_key, mock_client_class):
mock_get_api_key.return_value = "test_key"
mock_search.return_value = {"results": []}
get_bulk_news_tavily(720)
call_kwargs = mock_search.call_args_list[0][1]
assert call_kwargs["time_range"] == "month"
@patch('tradingagents.dataflows.tavily.TAVILY_AVAILABLE', True)
@patch('tradingagents.dataflows.tavily.TavilyClient')
@patch('tradingagents.dataflows.tavily.get_api_key')
@patch('tradingagents.dataflows.tavily._search_with_retry')
def test_handles_missing_published_date(self, mock_search, mock_get_api_key, mock_client_class):
mock_get_api_key.return_value = "test_key"
mock_article = {
"title": "Article Without Date",
"url": "https://news.com/article",
"content": "Content",
}
mock_search.return_value = {"results": [mock_article]}
result = get_bulk_news_tavily(24)
assert len(result) == 1
assert "published_at" in result[0]
@patch('tradingagents.dataflows.tavily.TAVILY_AVAILABLE', True)
@patch('tradingagents.dataflows.tavily.TavilyClient')
@patch('tradingagents.dataflows.tavily.get_api_key')
@patch('tradingagents.dataflows.tavily._search_with_retry')
def test_handles_invalid_date_format(self, mock_search, mock_get_api_key, mock_client_class):
mock_get_api_key.return_value = "test_key"
mock_article = {
"title": "Article With Bad Date",
"url": "https://news.com/article",
"published_date": "invalid_date_format",
"content": "Content",
}
mock_search.return_value = {"results": [mock_article]}
result = get_bulk_news_tavily(24)
assert len(result) == 1
assert "published_at" in result[0]
@patch('tradingagents.dataflows.tavily.TAVILY_AVAILABLE', True)
@patch('tradingagents.dataflows.tavily.TavilyClient')
@patch('tradingagents.dataflows.tavily.get_api_key')
@patch('tradingagents.dataflows.tavily._search_with_retry')
def test_continues_on_query_failure(self, mock_search, mock_get_api_key, mock_client_class):
mock_get_api_key.return_value = "test_key"
mock_search.side_effect = [
Exception("Query failed"),
{"results": [{"title": "Article", "url": "https://test.com", "content": "test"}]},
{"results": []},
{"results": []},
{"results": []},
]
result = get_bulk_news_tavily(24)
assert len(result) > 0
@patch('tradingagents.dataflows.tavily.TAVILY_AVAILABLE', True)
@patch('tradingagents.dataflows.tavily.TavilyClient')
@patch('tradingagents.dataflows.tavily.get_api_key')
@patch('tradingagents.dataflows.tavily._search_with_retry')
def test_skips_articles_without_url(self, mock_search, mock_get_api_key, mock_client_class):
mock_get_api_key.return_value = "test_key"
mock_articles = [
{"title": "No URL Article", "content": "test"},
{"title": "Has URL", "url": "https://test.com", "content": "test"},
]
mock_search.return_value = {"results": mock_articles}
result = get_bulk_news_tavily(24)
urls = [a["url"] for a in result if a.get("url")]
assert all(url for url in urls)
@patch('tradingagents.dataflows.tavily.TAVILY_AVAILABLE', True)
@patch('tradingagents.dataflows.tavily.TavilyClient')
@patch('tradingagents.dataflows.tavily.get_api_key')
@patch('tradingagents.dataflows.tavily._search_with_retry')
def test_uses_correct_search_parameters(self, mock_search, mock_get_api_key, mock_client_class):
mock_get_api_key.return_value = "test_key"
mock_search.return_value = {"results": []}
get_bulk_news_tavily(24)
call_kwargs = mock_search.call_args_list[0][1]
assert call_kwargs["search_depth"] == "advanced"
assert call_kwargs["topic"] == "news"
assert call_kwargs["max_results"] == 10

View File

@ -0,0 +1,150 @@
import os
import time
import requests
from datetime import datetime, timedelta
from typing import List, Dict, Any
BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/news/search"
DEFAULT_TIMEOUT = 30
MAX_RETRIES = 3
RETRY_BACKOFF = 1.0
def get_api_key() -> str:
api_key = os.getenv("BRAVE_API_KEY")
if not api_key:
raise ValueError("BRAVE_API_KEY environment variable is not set.")
return api_key
def _make_request_with_retry(url: str, headers: Dict, params: Dict, max_retries: int = MAX_RETRIES) -> requests.Response:
last_exception = None
for attempt in range(max_retries):
try:
response = requests.get(url, headers=headers, params=params, timeout=DEFAULT_TIMEOUT)
if response.status_code == 429:
retry_after = int(response.headers.get("Retry-After", RETRY_BACKOFF * (attempt + 1)))
print(f"DEBUG: Brave rate limited, waiting {retry_after}s before retry {attempt + 1}/{max_retries}")
time.sleep(retry_after)
continue
response.raise_for_status()
return response
except requests.exceptions.Timeout as e:
last_exception = e
print(f"DEBUG: Brave request timeout, retry {attempt + 1}/{max_retries}")
time.sleep(RETRY_BACKOFF * (attempt + 1))
except requests.exceptions.ConnectionError as e:
last_exception = e
print(f"DEBUG: Brave connection error, retry {attempt + 1}/{max_retries}")
time.sleep(RETRY_BACKOFF * (attempt + 1))
except requests.exceptions.HTTPError as e:
if e.response is not None and e.response.status_code >= 500:
last_exception = e
print(f"DEBUG: Brave server error {e.response.status_code}, retry {attempt + 1}/{max_retries}")
time.sleep(RETRY_BACKOFF * (attempt + 1))
else:
raise
raise last_exception if last_exception else requests.exceptions.RequestException("Max retries exceeded")
def get_bulk_news_brave(lookback_hours: int) -> List[Dict[str, Any]]:
try:
api_key = get_api_key()
except ValueError as e:
print(f"DEBUG: Brave API key not configured: {e}")
return []
headers = {
"Accept": "application/json",
"Accept-Encoding": "gzip",
"X-Subscription-Token": api_key,
}
queries = [
"stock market news",
"earnings report",
"merger acquisition",
"company financial news",
"trading stocks",
]
all_articles = []
seen_urls = set()
if lookback_hours <= 24:
freshness = "pd"
elif lookback_hours <= 168:
freshness = "pw"
else:
freshness = "pm"
for query in queries:
try:
params = {
"q": query,
"count": 20,
"freshness": freshness,
}
response = _make_request_with_retry(BRAVE_SEARCH_URL, headers, params)
data = response.json()
results = data.get("results", [])
for item in results:
url = item.get("url", "")
if url and url not in seen_urls:
seen_urls.add(url)
age = item.get("age", "")
published_at = _parse_brave_age(age)
article = {
"title": item.get("title", ""),
"source": item.get("meta_url", {}).get("netloc", "Brave News"),
"url": url,
"published_at": published_at.isoformat(),
"content_snippet": item.get("description", "")[:500],
}
all_articles.append(article)
except requests.exceptions.HTTPError as e:
print(f"DEBUG: Brave search HTTP error for '{query}': {e}")
continue
except requests.exceptions.Timeout as e:
print(f"DEBUG: Brave search timeout for '{query}': {e}")
continue
except requests.exceptions.RequestException as e:
print(f"DEBUG: Brave search request failed for '{query}': {e}")
continue
except Exception as e:
print(f"DEBUG: Brave search failed for query '{query}': {e}")
continue
print(f"DEBUG: Brave returned {len(all_articles)} articles")
return all_articles
def _parse_brave_age(age_str: str) -> datetime:
now = datetime.now()
if not age_str:
return now
age_str = age_str.lower()
try:
if "hour" in age_str:
hours = int("".join(filter(str.isdigit, age_str)) or "1")
return now - timedelta(hours=hours)
elif "day" in age_str:
days = int("".join(filter(str.isdigit, age_str)) or "1")
return now - timedelta(days=days)
elif "week" in age_str:
weeks = int("".join(filter(str.isdigit, age_str)) or "1")
return now - timedelta(weeks=weeks)
elif "minute" in age_str:
minutes = int("".join(filter(str.isdigit, age_str)) or "1")
return now - timedelta(minutes=minutes)
except (ValueError, TypeError):
pass
return now

View File

@ -18,6 +18,8 @@ from .alpha_vantage import (
)
from .alpha_vantage_news import get_bulk_news_alpha_vantage
from .alpha_vantage_common import AlphaVantageRateLimitError
from .tavily import get_bulk_news_tavily
from .brave import get_bulk_news_brave
from .config import get_config
@ -113,6 +115,8 @@ VENDOR_METHODS = {
"local": get_finnhub_company_insider_transactions,
},
"get_bulk_news": {
"tavily": get_bulk_news_tavily,
"brave": get_bulk_news_brave,
"alpha_vantage": get_bulk_news_alpha_vantage,
"openai": get_bulk_news_openai,
"google": get_bulk_news_google,
@ -192,7 +196,8 @@ def _convert_to_news_articles(raw_articles: List[Dict[str, Any]]) -> List[NewsAr
def _fetch_bulk_news_from_vendor(lookback_period: str) -> List[Dict[str, Any]]:
lookback_hours = parse_lookback_period(lookback_period)
vendor_order = ["alpha_vantage", "openai", "google"]
config = get_config()
vendor_order = config.get("bulk_news_vendor_order", ["tavily", "brave", "alpha_vantage", "openai", "google"])
for vendor in vendor_order:
if vendor not in VENDOR_METHODS["get_bulk_news"]:

View File

@ -0,0 +1,128 @@
import os
import time
from datetime import datetime, timedelta
from typing import List, Dict, Any
try:
from tavily import TavilyClient
TAVILY_AVAILABLE = True
except ImportError:
TAVILY_AVAILABLE = False
DEFAULT_TIMEOUT = 30
MAX_RETRIES = 3
RETRY_BACKOFF = 1.0
def get_api_key() -> str:
api_key = os.getenv("TAVILY_API_KEY")
if not api_key:
raise ValueError("TAVILY_API_KEY environment variable is not set.")
return api_key
def _search_with_retry(client, query: str, search_depth: str, topic: str, time_range: str, max_results: int, max_retries: int = MAX_RETRIES) -> Dict[str, Any]:
last_exception = None
for attempt in range(max_retries):
try:
response = client.search(
query=query,
search_depth=search_depth,
topic=topic,
time_range=time_range,
max_results=max_results,
)
return response
except Exception as e:
error_str = str(e).lower()
if "rate" in error_str or "limit" in error_str or "429" in error_str:
wait_time = RETRY_BACKOFF * (attempt + 1) * 2
print(f"DEBUG: Tavily rate limited, waiting {wait_time}s before retry {attempt + 1}/{max_retries}")
time.sleep(wait_time)
last_exception = e
elif "timeout" in error_str or "timed out" in error_str:
wait_time = RETRY_BACKOFF * (attempt + 1)
print(f"DEBUG: Tavily timeout, waiting {wait_time}s before retry {attempt + 1}/{max_retries}")
time.sleep(wait_time)
last_exception = e
elif "connection" in error_str or "network" in error_str:
wait_time = RETRY_BACKOFF * (attempt + 1)
print(f"DEBUG: Tavily connection error, waiting {wait_time}s before retry {attempt + 1}/{max_retries}")
time.sleep(wait_time)
last_exception = e
else:
raise
raise last_exception if last_exception else Exception("Max retries exceeded")
def get_bulk_news_tavily(lookback_hours: int) -> List[Dict[str, Any]]:
if not TAVILY_AVAILABLE:
print("DEBUG: Tavily library not installed")
return []
try:
client = TavilyClient(api_key=get_api_key())
except ValueError as e:
print(f"DEBUG: Tavily API key not configured: {e}")
return []
queries = [
"stock market news today",
"earnings report announcement",
"merger acquisition deal",
"IPO stock market",
"company financial results",
]
days = max(1, lookback_hours // 24)
if lookback_hours <= 24:
time_range = "day"
elif lookback_hours <= 168:
time_range = "week"
else:
time_range = "month"
all_articles = []
seen_urls = set()
for query in queries:
try:
response = _search_with_retry(
client=client,
query=query,
search_depth="advanced",
topic="news",
time_range=time_range,
max_results=10,
)
results = response.get("results", [])
for item in results:
url = item.get("url", "")
if url and url not in seen_urls:
seen_urls.add(url)
published_date = item.get("published_date")
if published_date:
try:
published_at = datetime.fromisoformat(published_date.replace("Z", "+00:00"))
except (ValueError, TypeError):
published_at = datetime.now()
else:
published_at = datetime.now()
article = {
"title": item.get("title", ""),
"source": "Tavily",
"url": url,
"published_at": published_at.isoformat(),
"content_snippet": item.get("content", "")[:500],
}
all_articles.append(article)
except Exception as e:
print(f"DEBUG: Tavily search failed for query '{query}': {e}")
continue
print(f"DEBUG: Tavily returned {len(all_articles)} articles")
return all_articles

View File

@ -28,4 +28,7 @@ DEFAULT_CONFIG = {
"discovery_cache_ttl": 300,
"discovery_max_results": 20,
"discovery_min_mentions": 2,
"bulk_news_vendor_order": ["tavily", "brave", "alpha_vantage", "openai", "google"],
"bulk_news_timeout": 30,
"bulk_news_max_retries": 3,
}