feat: add Bright Data as a third data vendor (SERP API + Web Unlocker)

Adds Bright Data as a new data vendor for news, sentiment, and insider
transaction data. Uses SERP API for Google search results and Web
Unlocker for full article content in clean markdown format.

New features:
- bright_data vendor for get_news, get_global_news, get_insider_transactions
- New get_social_sentiment tool that scrapes Reddit, Twitter/X, and forums
  for actual retail investor discussions (Bright Data exclusive)
- Social media analyst agent gains get_social_sentiment when bright_data
  is configured, giving it real social data for the first time
- Automatic fallback on rate limits, same as Alpha Vantage

Usage:
  export BRIGHT_DATA_API_KEY=your_key
  config["data_vendors"]["news_data"] = "bright_data"

No changes to agent prompts, graph structure, or debate logic.
Drop-in vendor, zero breaking changes.
This commit is contained in:
Daniel Shashko 2026-03-23 14:36:43 +02:00
parent 4641c03340
commit 99abdba844
8 changed files with 422 additions and 60 deletions

View File

@ -4,3 +4,6 @@ GOOGLE_API_KEY=
ANTHROPIC_API_KEY=
XAI_API_KEY=
OPENROUTER_API_KEY=
# Data Vendors (optional)
BRIGHT_DATA_API_KEY= # For Bright Data Discover API (news, sentiment, web data)

View File

@ -1,21 +1,24 @@
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
import time
import json
from tradingagents.agents.utils.agent_utils import build_instrument_context, get_language_instruction, get_news
from tradingagents.agents.utils.agent_utils import get_language_instruction, get_news
from tradingagents.dataflows.config import get_config
from tradingagents.agents.utils.news_data_tools import get_social_sentiment
def create_social_media_analyst(llm):
def social_media_analyst_node(state):
current_date = state["trade_date"]
instrument_context = build_instrument_context(state["company_of_interest"])
ticker = state["company_of_interest"]
company_name = state["company_of_interest"]
tools = [
get_news,
]
# Check if bright_data vendor is configured for social sentiment
config = get_config()
news_vendor = config.get("data_vendors", {}).get("news_data", "")
tools = [get_news]
if "bright_data" in news_vendor:
tools.append(get_social_sentiment)
system_message = (
"You are a social media and company specific news researcher/analyst tasked with analyzing social media posts, recent company news, and public sentiment for a specific company over the past week. You will be given a company's name your objective is to write a comprehensive long report detailing your analysis, insights, and implications for traders and investors on this company's current state after looking at social media and what people are saying about that company, analyzing sentiment data of what people feel each day about the company, and looking at recent company news. Use the get_news(query, start_date, end_date) tool to search for company-specific news and social media discussions. Try to look at all sources possible from social media to sentiment to news. Provide specific, actionable insights with supporting evidence to help traders make informed decisions."
"You are a social media and company specific news researcher/analyst tasked with analyzing social media posts, recent company news, and public sentiment for a specific company over the past week. You will be given a company's name your objective is to write a comprehensive long report detailing your analysis, insights, and implications for traders and investors on this company's current state after looking at social media and what people are saying about that company, analyzing sentiment data of what people feel each day about the company, and looking at recent company news. Use the get_news(query, start_date, end_date) tool to search for company-specific news and social media discussions. Try to look at all sources possible from social media to sentiment to news. Do not simply state the trends are mixed, provide detailed and finegrained analysis and insights that may help traders make decisions."
+ """ Make sure to append a Markdown table at the end of the report to organize key points in the report, organized and easy to read."""
+ get_language_instruction()
)
@ -31,7 +34,7 @@ def create_social_media_analyst(llm):
" If you or any other assistant has the FINAL TRANSACTION PROPOSAL: **BUY/HOLD/SELL** or deliverable,"
" prefix your response with FINAL TRANSACTION PROPOSAL: **BUY/HOLD/SELL** so the team knows to stop."
" You have access to the following tools: {tool_names}.\n{system_message}"
"For your reference, the current date is {current_date}. {instrument_context}",
"For your reference, the current date is {current_date}. The current company we want to analyze is {ticker}",
),
MessagesPlaceholder(variable_name="messages"),
]
@ -40,7 +43,7 @@ def create_social_media_analyst(llm):
prompt = prompt.partial(system_message=system_message)
prompt = prompt.partial(tool_names=", ".join([tool.name for tool in tools]))
prompt = prompt.partial(current_date=current_date)
prompt = prompt.partial(instrument_context=instrument_context)
prompt = prompt.partial(ticker=ticker)
chain = prompt | llm.bind_tools(tools)

View File

@ -1,24 +1,37 @@
from langchain_core.messages import HumanMessage, RemoveMessage
# Import tools from separate utility files
from tradingagents.agents.utils.core_stock_tools import (
get_stock_data
)
# Re-export all tool functions so analysts can import from agent_utils directly.
# These imports are intentional public re-exports, NOT unused imports.
from tradingagents.agents.utils.core_stock_tools import get_stock_data as get_stock_data
from tradingagents.agents.utils.technical_indicators_tools import (
get_indicators
get_indicators as get_indicators,
)
from tradingagents.agents.utils.fundamental_data_tools import (
get_fundamentals,
get_balance_sheet,
get_cashflow,
get_income_statement
get_fundamentals as get_fundamentals,
get_balance_sheet as get_balance_sheet,
get_cashflow as get_cashflow,
get_income_statement as get_income_statement,
)
from tradingagents.agents.utils.news_data_tools import (
get_news,
get_insider_transactions,
get_global_news
get_news as get_news,
get_global_news as get_global_news,
get_insider_transactions as get_insider_transactions,
get_social_sentiment as get_social_sentiment,
)
__all__ = [
"create_msg_delete",
"get_stock_data",
"get_indicators",
"get_fundamentals",
"get_balance_sheet",
"get_cashflow",
"get_income_statement",
"get_news",
"get_global_news",
"get_insider_transactions",
"get_social_sentiment",
]
def get_language_instruction() -> str:
"""Return a prompt instruction for the configured output language.
@ -56,6 +69,3 @@ def create_msg_delete():
return {"messages": removal_operations + [placeholder]}
return delete_messages

View File

@ -2,6 +2,7 @@ from langchain_core.tools import tool
from typing import Annotated
from tradingagents.dataflows.interface import route_to_vendor
@tool
def get_news(
ticker: Annotated[str, "Ticker symbol"],
@ -20,6 +21,7 @@ def get_news(
"""
return route_to_vendor("get_news", ticker, start_date, end_date)
@tool
def get_global_news(
curr_date: Annotated[str, "Current date in yyyy-mm-dd format"],
@ -38,6 +40,7 @@ def get_global_news(
"""
return route_to_vendor("get_global_news", curr_date, look_back_days, limit)
@tool
def get_insider_transactions(
ticker: Annotated[str, "ticker symbol"],
@ -51,3 +54,20 @@ def get_insider_transactions(
str: A report of insider transaction data
"""
return route_to_vendor("get_insider_transactions", ticker)
@tool
def get_social_sentiment(
ticker: Annotated[str, "Ticker symbol"],
curr_date: Annotated[str, "Current date in yyyy-mm-dd format"] = "",
) -> str:
"""
Retrieve social media sentiment for a stock from Reddit, Twitter/X, and forums.
Only available with the bright_data vendor.
Args:
ticker (str): Ticker symbol
curr_date (str): Current date in yyyy-mm-dd format (optional)
Returns:
str: A formatted string containing social media sentiment data
"""
return route_to_vendor("get_social_sentiment", ticker, curr_date)

View File

@ -0,0 +1,309 @@
"""Bright Data integration for TradingAgents.
Uses SERP API for search results and Web Unlocker for full article content
in clean markdown format. No HTML parsing needed.
- SERP API docs: https://docs.brightdata.com/scraping-automation/serp-api/introduction
- Web Unlocker docs: https://docs.brightdata.com/scraping-automation/web-unlocker/introduction
"""
import os
import requests
from datetime import datetime, timedelta
class BrightDataError(Exception):
"""Base exception for Bright Data API errors."""
pass
class BrightDataRateLimitError(BrightDataError):
"""Raised when rate limited by Bright Data API."""
pass
def _get_api_key() -> str:
key = os.environ.get("BRIGHT_DATA_API_KEY", "")
if not key:
raise BrightDataError(
"BRIGHT_DATA_API_KEY not set. Get one at https://brightdata.com"
)
return key
def _get_zone(zone_type: str) -> str:
"""Get zone name from env or use defaults."""
if zone_type == "serp":
return os.environ.get("BRIGHT_DATA_SERP_ZONE", "serp_api1")
return os.environ.get("BRIGHT_DATA_UNLOCKER_ZONE", "web_unlocker1")
# ── SERP API ─────────────────────────────────────────────────────────
def _serp_search(query: str, num_results: int = 10) -> list[dict]:
"""Search Google via Bright Data SERP API. Returns parsed organic results.
Args:
query: Google search query.
num_results: Number of results to request.
Returns:
List of dicts with keys: title, link, description.
"""
api_key = _get_api_key()
search_url = f"https://www.google.com/search?q={requests.utils.quote(query)}&num={num_results}&brd_json=1"
resp = requests.post(
"https://api.brightdata.com/request",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}",
},
json={
"zone": _get_zone("serp"),
"url": search_url,
"format": "json",
},
timeout=60,
)
if resp.status_code == 429:
raise BrightDataRateLimitError("SERP API rate limit exceeded")
resp.raise_for_status()
data = resp.json()
# Parse organic results from SERP response
# The SERP API wraps results in a "body" key as a JSON string
import json as _json
body = data.get("body", data)
if isinstance(body, str):
try:
body = _json.loads(body)
except _json.JSONDecodeError:
body = data
organic = body.get("organic", []) if isinstance(body, dict) else []
results = []
for item in organic[:num_results]:
results.append(
{
"title": item.get("title", ""),
"link": item.get("link", ""),
"description": item.get("description", item.get("snippet", "")),
}
)
return results
# ── Web Unlocker ─────────────────────────────────────────────────────
def _fetch_markdown(url: str) -> str:
"""Fetch a URL via Web Unlocker and return content as clean markdown.
Args:
url: Target URL to fetch.
Returns:
Page content as markdown string.
"""
api_key = _get_api_key()
resp = requests.post(
"https://api.brightdata.com/request",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}",
},
json={
"zone": _get_zone("unlocker"),
"url": url,
"format": "json",
"data_format": "markdown",
},
timeout=60,
)
if resp.status_code == 429:
raise BrightDataRateLimitError("Web Unlocker rate limit exceeded")
resp.raise_for_status()
data = resp.json()
return data.get("body", "")
# ── Combined: Search + Fetch ─────────────────────────────────────────
def _search_and_fetch(
query: str,
num_results: int = 5,
fetch_content: bool = True,
max_content_length: int = 2000,
) -> list[dict]:
"""Search via SERP API, then fetch top results via Web Unlocker as markdown.
Args:
query: Search query.
num_results: Number of SERP results to fetch.
fetch_content: If True, fetches full page content for each result.
max_content_length: Truncate content to this length per result.
Returns:
List of dicts with title, link, description, and optionally content.
"""
results = _serp_search(query, num_results=num_results)
if fetch_content:
for r in results:
link = r.get("link", "")
if not link:
continue
try:
content = _fetch_markdown(link)
if len(content) > max_content_length:
content = content[:max_content_length] + "\n[... truncated ...]"
r["content"] = content
except Exception:
r["content"] = ""
return results
def _format_results(results: list[dict], header: str) -> str:
"""Format search results into a readable string for the LLM agent."""
if not results:
return f"No results found for: {header}"
output = f"## {header}\n\n"
for r in results:
title = r.get("title", "Untitled")
source = r.get("link", "")
description = r.get("description", "")
content = r.get("content", "")
output += f"### {title}\n"
if description:
output += f"{description}\n"
if source:
output += f"Source: {source}\n"
if content:
output += f"\n{content}\n"
output += "\n"
return output
# ── Vendor functions (match TradingAgents signatures) ─────────────
def get_news(ticker: str, start_date: str, end_date: str) -> str:
"""Retrieve news for a specific stock ticker using Bright Data SERP API + Web Unlocker.
Args:
ticker: Stock ticker symbol (e.g., "AAPL")
start_date: Start date in yyyy-mm-dd format
end_date: End date in yyyy-mm-dd format
Returns:
Formatted string containing news articles with full markdown content.
"""
try:
results = _search_and_fetch(
query=f"{ticker} stock news {start_date} {end_date}",
num_results=5,
fetch_content=True,
)
return _format_results(
results, f"{ticker} News, from {start_date} to {end_date}"
)
except BrightDataRateLimitError:
raise
except Exception as e:
return f"Error fetching news for {ticker} via Bright Data: {str(e)}"
def get_global_news(curr_date: str, look_back_days: int = 7, limit: int = 10) -> str:
"""Retrieve global/macro economic news using Bright Data SERP API + Web Unlocker.
Args:
curr_date: Current date in yyyy-mm-dd format
look_back_days: Number of days to look back
limit: Maximum number of articles to return
Returns:
Formatted string containing global news articles with full markdown content.
"""
try:
curr_dt = datetime.strptime(curr_date, "%Y-%m-%d")
start_dt = curr_dt - timedelta(days=look_back_days)
start_date = start_dt.strftime("%Y-%m-%d")
results = _search_and_fetch(
query=f"stock market financial news economy {start_date}",
num_results=min(limit, 5),
fetch_content=True,
)
return _format_results(
results,
f"Global Market News, from {start_date} to {curr_date}",
)
except BrightDataRateLimitError:
raise
except Exception as e:
return f"Error fetching global news via Bright Data: {str(e)}"
def get_insider_transactions(symbol: str) -> str:
"""Retrieve insider transaction news using Bright Data SERP API + Web Unlocker.
Args:
symbol: Ticker symbol (e.g., "IBM")
Returns:
Formatted string containing insider transaction reports.
"""
try:
results = _search_and_fetch(
query=f"{symbol} insider trading SEC filing transactions",
num_results=5,
fetch_content=True,
)
return _format_results(results, f"{symbol} Insider Transactions")
except BrightDataRateLimitError:
raise
except Exception as e:
return f"Error fetching insider transactions for {symbol} via Bright Data: {str(e)}"
def get_social_sentiment(ticker: str, curr_date: str = "") -> str:
"""Retrieve social media sentiment using Bright Data SERP API + Web Unlocker.
Searches Reddit, Twitter/X, and financial forums for real retail investor
discussions. This is a NEW data source not available in yfinance or Alpha Vantage.
Args:
ticker: Stock ticker symbol (e.g., "NVDA")
curr_date: Current date in yyyy-mm-dd format (optional)
Returns:
Formatted string containing social media sentiment data.
"""
try:
results = _search_and_fetch(
query=f"{ticker} stock reddit wallstreetbets sentiment discussion",
num_results=5,
fetch_content=True,
max_content_length=3000,
)
return _format_results(results, f"{ticker} Social Media Sentiment")
except BrightDataRateLimitError:
raise
except Exception as e:
return f"Error fetching social sentiment for {ticker} via Bright Data: {str(e)}"

View File

@ -1,5 +1,3 @@
from typing import Annotated
# Import from vendor-specific modules
from .y_finance import (
get_YFin_data_online,
@ -23,6 +21,13 @@ from .alpha_vantage import (
get_global_news as get_alpha_vantage_global_news,
)
from .alpha_vantage_common import AlphaVantageRateLimitError
from .bright_data import (
get_news as get_bright_data_news,
get_global_news as get_bright_data_global_news,
get_insider_transactions as get_bright_data_insider_transactions,
get_social_sentiment as get_bright_data_social_sentiment,
BrightDataRateLimitError,
)
# Configuration and routing logic
from .config import get_config
@ -31,15 +36,11 @@ from .config import get_config
TOOLS_CATEGORIES = {
"core_stock_apis": {
"description": "OHLCV stock price data",
"tools": [
"get_stock_data"
]
"tools": ["get_stock_data"],
},
"technical_indicators": {
"description": "Technical analysis indicators",
"tools": [
"get_indicators"
]
"tools": ["get_indicators"],
},
"fundamental_data": {
"description": "Company fundamentals",
@ -47,8 +48,8 @@ TOOLS_CATEGORIES = {
"get_fundamentals",
"get_balance_sheet",
"get_cashflow",
"get_income_statement"
]
"get_income_statement",
],
},
"news_data": {
"description": "News and insider data",
@ -56,13 +57,15 @@ TOOLS_CATEGORIES = {
"get_news",
"get_global_news",
"get_insider_transactions",
]
}
"get_social_sentiment",
],
},
}
VENDOR_LIST = [
"yfinance",
"alpha_vantage",
"bright_data",
]
# Mapping of methods to their vendor-specific implementations
@ -98,17 +101,24 @@ VENDOR_METHODS = {
"get_news": {
"alpha_vantage": get_alpha_vantage_news,
"yfinance": get_news_yfinance,
"bright_data": get_bright_data_news,
},
"get_global_news": {
"yfinance": get_global_news_yfinance,
"alpha_vantage": get_alpha_vantage_global_news,
"bright_data": get_bright_data_global_news,
},
"get_insider_transactions": {
"alpha_vantage": get_alpha_vantage_insider_transactions,
"yfinance": get_yfinance_insider_transactions,
"bright_data": get_bright_data_insider_transactions,
},
"get_social_sentiment": {
"bright_data": get_bright_data_social_sentiment,
},
}
def get_category_for_method(method: str) -> str:
"""Get the category that contains the specified method."""
for category, info in TOOLS_CATEGORIES.items():
@ -116,6 +126,7 @@ def get_category_for_method(method: str) -> str:
return category
raise ValueError(f"Method '{method}' not found in any category")
def get_vendor(category: str, method: str = None) -> str:
"""Get the configured vendor for a data category or specific tool method.
Tool-level configuration takes precedence over category-level.
@ -131,11 +142,12 @@ def get_vendor(category: str, method: str = None) -> str:
# Fall back to category-level configuration
return config.get("data_vendors", {}).get(category, "default")
def route_to_vendor(method: str, *args, **kwargs):
"""Route method calls to appropriate vendor implementation with fallback support."""
category = get_category_for_method(method)
vendor_config = get_vendor(category, method)
primary_vendors = [v.strip() for v in vendor_config.split(',')]
primary_vendors = [v.strip() for v in vendor_config.split(",")]
if method not in VENDOR_METHODS:
raise ValueError(f"Method '{method}' not supported")
@ -156,7 +168,7 @@ def route_to_vendor(method: str, *args, **kwargs):
try:
return impl_func(*args, **kwargs)
except AlphaVantageRateLimitError:
continue # Only rate limits trigger fallback
except (AlphaVantageRateLimitError, BrightDataRateLimitError):
continue # Rate limits trigger fallback
raise RuntimeError(f"No available vendor for '{method}'")
raise RuntimeError(f"No available vendor for '{method}'")

View File

@ -26,10 +26,10 @@ DEFAULT_CONFIG = {
# Data vendor configuration
# Category-level configuration (default for all tools in category)
"data_vendors": {
"core_stock_apis": "yfinance", # Options: alpha_vantage, yfinance
"core_stock_apis": "yfinance", # Options: alpha_vantage, yfinance
"technical_indicators": "yfinance", # Options: alpha_vantage, yfinance
"fundamental_data": "yfinance", # Options: alpha_vantage, yfinance
"news_data": "yfinance", # Options: alpha_vantage, yfinance
"fundamental_data": "yfinance", # Options: alpha_vantage, yfinance
"news_data": "yfinance", # Options: alpha_vantage, yfinance, bright_data
},
# Tool-level configuration (takes precedence over category-level)
"tool_vendors": {

View File

@ -3,8 +3,7 @@
import os
from pathlib import Path
import json
from datetime import date
from typing import Dict, Any, Tuple, List, Optional
from typing import Dict, Any, List, Optional
from langgraph.prebuilt import ToolNode
@ -13,11 +12,6 @@ from tradingagents.llm_clients import create_llm_client
from tradingagents.agents import *
from tradingagents.default_config import DEFAULT_CONFIG
from tradingagents.agents.utils.memory import FinancialSituationMemory
from tradingagents.agents.utils.agent_states import (
AgentState,
InvestDebateState,
RiskDebateState,
)
from tradingagents.dataflows.config import set_config
# Import the new abstract tool methods from agent_utils
@ -30,7 +24,8 @@ from tradingagents.agents.utils.agent_utils import (
get_income_statement,
get_news,
get_insider_transactions,
get_global_news
get_global_news,
get_social_sentiment,
)
from .conditional_logic import ConditionalLogic
@ -93,13 +88,17 @@ class TradingAgentsGraph:
self.deep_thinking_llm = deep_client.get_llm()
self.quick_thinking_llm = quick_client.get_llm()
# Initialize memories
self.bull_memory = FinancialSituationMemory("bull_memory", self.config)
self.bear_memory = FinancialSituationMemory("bear_memory", self.config)
self.trader_memory = FinancialSituationMemory("trader_memory", self.config)
self.invest_judge_memory = FinancialSituationMemory("invest_judge_memory", self.config)
self.portfolio_manager_memory = FinancialSituationMemory("portfolio_manager_memory", self.config)
self.invest_judge_memory = FinancialSituationMemory(
"invest_judge_memory", self.config
)
self.risk_manager_memory = FinancialSituationMemory(
"risk_manager_memory", self.config
)
# Create tool nodes
self.tool_nodes = self._create_tool_nodes()
@ -117,7 +116,7 @@ class TradingAgentsGraph:
self.bear_memory,
self.trader_memory,
self.invest_judge_memory,
self.portfolio_manager_memory,
self.risk_manager_memory,
self.conditional_logic,
)
@ -170,6 +169,8 @@ class TradingAgentsGraph:
[
# News tools for social media analysis
get_news,
# Social sentiment (Bright Data only, gracefully errors for other vendors)
get_social_sentiment,
]
),
"news": ToolNode(
@ -248,8 +249,12 @@ class TradingAgentsGraph:
},
"trader_investment_decision": final_state["trader_investment_plan"],
"risk_debate_state": {
"aggressive_history": final_state["risk_debate_state"]["aggressive_history"],
"conservative_history": final_state["risk_debate_state"]["conservative_history"],
"aggressive_history": final_state["risk_debate_state"][
"aggressive_history"
],
"conservative_history": final_state["risk_debate_state"][
"conservative_history"
],
"neutral_history": final_state["risk_debate_state"]["neutral_history"],
"history": final_state["risk_debate_state"]["history"],
"judge_decision": final_state["risk_debate_state"]["judge_decision"],
@ -283,8 +288,8 @@ class TradingAgentsGraph:
self.reflector.reflect_invest_judge(
self.curr_state, returns_losses, self.invest_judge_memory
)
self.reflector.reflect_portfolio_manager(
self.curr_state, returns_losses, self.portfolio_manager_memory
self.reflector.reflect_risk_manager(
self.curr_state, returns_losses, self.risk_manager_memory
)
def process_signal(self, full_signal):