Fix #274: Add warnings for OpenAI news/fundamentals hallucination risk

- Add UserWarning to get_global_news_openai, get_stock_news_openai, and get_fundamentals_openai
- Warnings inform users that OpenAI vendor may hallucinate or provide outdated data
- Recommend alternative vendors: alpha_vantage, google, yfinance, or local
- Add comprehensive test suite to verify warnings are emitted
- Refactor to use shared _warn_hallucination_risk helper function

The issue reported that OpenAI was hallucinating and providing outdated news
(e.g., fake news from November 2025). This is because the OpenAI vendor relies
on the LLM's training data rather than real-time web search. Users should use
alternative vendors for reliable, up-to-date news and fundamental data.

All tests passing (4/4).
This commit is contained in:
Godnight1006 2025-11-14 11:47:33 +08:00
parent 13b826a31d
commit 006b354829
3 changed files with 211 additions and 0 deletions

2
tests/__init__.py Normal file
View File

@ -0,0 +1,2 @@
# Tests package

View File

@ -0,0 +1,124 @@
"""
Tests for OpenAI news dataflow functions to ensure proper warnings about hallucination risks.
Issue #274: OpenAI is hallucinating and provides outdated news.
The OpenAI vendor for news retrieval doesn't have reliable real-time web search access,
so it may generate fake or outdated news based on its training data.
"""
import pytest
import warnings
from unittest.mock import Mock, patch
from tradingagents.dataflows.openai import (
get_global_news_openai,
get_stock_news_openai,
get_fundamentals_openai,
)
class TestOpenAINewsWarnings:
"""Test that OpenAI news functions emit appropriate warnings about hallucination risks."""
@patch("tradingagents.dataflows.openai.OpenAI")
@patch("tradingagents.dataflows.openai.get_config")
def test_get_global_news_emits_warning(self, mock_get_config, mock_openai_class):
"""Test that get_global_news_openai emits a warning about potential hallucination."""
# Setup mocks
mock_config = {
"backend_url": "https://api.openai.com/v1",
"quick_think_llm": "gpt-4o-mini",
}
mock_get_config.return_value = mock_config
mock_client = Mock()
mock_openai_class.return_value = mock_client
# Mock the response
mock_response = Mock()
mock_response.output = [None, Mock(content=[Mock(text="Fake news content")])]
mock_client.responses.create.return_value = mock_response
# Test that a warning is emitted
with pytest.warns(UserWarning, match="may hallucinate|outdated|unreliable"):
result = get_global_news_openai("2024-11-14", look_back_days=7, limit=5)
assert result is not None
@patch("tradingagents.dataflows.openai.OpenAI")
@patch("tradingagents.dataflows.openai.get_config")
def test_get_stock_news_emits_warning(self, mock_get_config, mock_openai_class):
"""Test that get_stock_news_openai emits a warning about potential hallucination."""
# Setup mocks
mock_config = {
"backend_url": "https://api.openai.com/v1",
"quick_think_llm": "gpt-4o-mini",
}
mock_get_config.return_value = mock_config
mock_client = Mock()
mock_openai_class.return_value = mock_client
# Mock the response
mock_response = Mock()
mock_response.output = [None, Mock(content=[Mock(text="Fake stock news")])]
mock_client.responses.create.return_value = mock_response
# Test that a warning is emitted
with pytest.warns(UserWarning, match="may hallucinate|outdated|unreliable"):
result = get_stock_news_openai("NVDA", "2024-11-01", "2024-11-14")
assert result is not None
@patch("tradingagents.dataflows.openai.OpenAI")
@patch("tradingagents.dataflows.openai.get_config")
def test_get_fundamentals_emits_warning(self, mock_get_config, mock_openai_class):
"""Test that get_fundamentals_openai emits a warning about potential hallucination."""
# Setup mocks
mock_config = {
"backend_url": "https://api.openai.com/v1",
"quick_think_llm": "gpt-4o-mini",
}
mock_get_config.return_value = mock_config
mock_client = Mock()
mock_openai_class.return_value = mock_client
# Mock the response
mock_response = Mock()
mock_response.output = [None, Mock(content=[Mock(text="Fake fundamentals")])]
mock_client.responses.create.return_value = mock_response
# Test that a warning is emitted
with pytest.warns(UserWarning, match="may hallucinate|outdated|unreliable"):
result = get_fundamentals_openai("NVDA", "2024-11-14")
assert result is not None
def test_warning_message_content(self):
"""Test that warning messages contain helpful information about alternatives."""
# This test verifies the warning message suggests using alternative vendors
with patch("tradingagents.dataflows.openai.OpenAI"), \
patch("tradingagents.dataflows.openai.get_config") as mock_get_config:
mock_get_config.return_value = {
"backend_url": "https://api.openai.com/v1",
"quick_think_llm": "gpt-4o-mini",
}
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
try:
get_global_news_openai("2024-11-14")
except Exception:
pass # We're only testing the warning, not the full execution
# Check that at least one warning was issued
assert len(w) > 0
# Check that the warning mentions alternatives
warning_text = str(w[0].message).lower()
assert any(keyword in warning_text for keyword in [
"alpha_vantage", "google", "local", "alternative", "vendor"
])

View File

@ -1,8 +1,50 @@
import warnings
from openai import OpenAI
from .config import get_config
def _warn_hallucination_risk(data_type="news", category="news_data", alternatives=None):
"""
Emit a warning about potential hallucination when using OpenAI for data retrieval.
Args:
data_type: Type of data being retrieved (e.g., "news", "fundamental data")
category: Config category for vendor selection (e.g., "news_data", "fundamental_data")
alternatives: List of alternative vendor names (default: ["alpha_vantage", "google", "local"])
"""
if alternatives is None:
alternatives = ["alpha_vantage", "google", "local"]
alternatives_str = "', '".join(alternatives)
warnings.warn(
f"OpenAI {data_type} vendor may hallucinate or provide outdated {data_type}. "
f"For reliable {data_type}, use alternative vendors: '{alternatives_str}'. "
f"Configure in config['data_vendors']['{category}'].",
UserWarning,
stacklevel=3
)
def get_stock_news_openai(query, start_date, end_date):
"""
Retrieve stock news using OpenAI's LLM.
WARNING: This function may hallucinate or provide outdated news because it relies on
the LLM's training data rather than real-time web search. For reliable, up-to-date news,
consider using alternative vendors such as 'alpha_vantage', 'google', or 'local'.
Configure alternative vendors in your config:
config["data_vendors"]["news_data"] = "alpha_vantage" # or "google" or "local"
Args:
query: Stock ticker or search query
start_date: Start date in yyyy-mm-dd format
end_date: End date in yyyy-mm-dd format
Returns:
str: News content (may be hallucinated or outdated)
"""
_warn_hallucination_risk(data_type="news", category="news_data")
config = get_config()
client = OpenAI(base_url=config["backend_url"])
@ -38,6 +80,26 @@ def get_stock_news_openai(query, start_date, end_date):
def get_global_news_openai(curr_date, look_back_days=7, limit=5):
"""
Retrieve global news using OpenAI's LLM.
WARNING: This function may hallucinate or provide outdated news because it relies on
the LLM's training data rather than real-time web search. For reliable, up-to-date news,
consider using alternative vendors such as 'alpha_vantage', 'google', or 'local'.
Configure alternative vendors in your config:
config["data_vendors"]["news_data"] = "alpha_vantage" # or "google" or "local"
Args:
curr_date: Current date in yyyy-mm-dd format
look_back_days: Number of days to look back (default: 7)
limit: Maximum number of articles to return (default: 5)
Returns:
str: News content (may be hallucinated or outdated)
"""
_warn_hallucination_risk(data_type="news", category="news_data")
config = get_config()
client = OpenAI(base_url=config["backend_url"])
@ -73,6 +135,29 @@ def get_global_news_openai(curr_date, look_back_days=7, limit=5):
def get_fundamentals_openai(ticker, curr_date):
"""
Retrieve fundamental data using OpenAI's LLM.
WARNING: This function may hallucinate or provide outdated data because it relies on
the LLM's training data rather than real-time data sources. For reliable, up-to-date
fundamental data, consider using alternative vendors such as 'alpha_vantage', 'yfinance', or 'local'.
Configure alternative vendors in your config:
config["data_vendors"]["fundamental_data"] = "alpha_vantage" # or "yfinance" or "local"
Args:
ticker: Stock ticker symbol
curr_date: Current date in yyyy-mm-dd format
Returns:
str: Fundamental data (may be hallucinated or outdated)
"""
_warn_hallucination_risk(
data_type="fundamental data",
category="fundamental_data",
alternatives=["alpha_vantage", "yfinance", "local"]
)
config = get_config()
client = OpenAI(base_url=config["backend_url"])