TradingAgents/tradingagents/graph/discovery_graph.py

1452 lines
54 KiB
Python

from typing import Any, Callable, Dict, List, Optional
from langgraph.graph import END, StateGraph
from tradingagents.agents.utils.agent_states import DiscoveryState
from tradingagents.dataflows.discovery import scanners # Load scanners to trigger registration
from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY
from tradingagents.dataflows.discovery.utils import PRIORITY_ORDER, Priority, serialize_for_log
from tradingagents.tools.executor import execute_tool
# Known PERMANENTLY delisted tickers (verified mergers, bankruptcies, delistings)
# NOTE: This list should only contain tickers that are CONFIRMED to be permanently delisted.
# Do NOT add actively traded stocks here. Use the dynamic delisted_cache for uncertain cases.
def get_delisted_tickers():
"""Get combined list of delisted tickers from permanent list + dynamic cache."""
from tradingagents.dataflows.discovery.utils import get_delisted_tickers
return get_delisted_tickers()
def is_valid_ticker(ticker: str) -> bool:
"""Validate if a ticker is tradeable and not junk."""
from tradingagents.dataflows.discovery.utils import is_valid_ticker
return is_valid_ticker(ticker)
class DiscoveryGraph:
"""
Discovery Graph for finding investment opportunities.
Orchestrates the discovery workflow: scanning -> filtering -> ranking.
Supports traditional, semantic, and hybrid discovery modes.
"""
# Node names
NODE_SCANNER = "scanner"
NODE_FILTER = "filter"
NODE_RANKER = "ranker"
# Source types
SOURCE_NEWS_MENTION = "news_direct_mention"
SOURCE_SEMANTIC = "semantic_news_match"
SOURCE_UNKNOWN = "unknown"
# Priority levels (lower number = higher priority)
PRIORITY_ORDER = PRIORITY_ORDER
# Priority level names
PRIORITY_CRITICAL = Priority.CRITICAL.value
PRIORITY_HIGH = Priority.HIGH.value
PRIORITY_MEDIUM = Priority.MEDIUM.value
PRIORITY_LOW = Priority.LOW.value
PRIORITY_UNKNOWN = Priority.UNKNOWN.value
def __init__(self, config: Dict[str, Any] = None):
"""
Initialize Discovery Graph.
Args:
config: Configuration dictionary containing:
- llm_provider: LLM provider (e.g., 'openai', 'google')
- discovery: Discovery-specific settings
- results_dir: Directory for saving results
"""
self.config = config or {}
# Initialize LLMs
from tradingagents.utils.llm_factory import create_llms
self.deep_thinking_llm, self.quick_thinking_llm = create_llms(self.config)
# Load configurations
self._load_discovery_config()
self._load_logging_config()
# Store run directory for saving results
self.run_dir = self.config.get("discovery_run_dir", None)
# Initialize Analytics
from tradingagents.dataflows.discovery.analytics import DiscoveryAnalytics
self.analytics = DiscoveryAnalytics(data_dir="data")
self.graph = self._create_graph()
def _load_discovery_config(self) -> None:
"""Load discovery-specific configuration with defaults."""
discovery_config = self.config.get("discovery", {})
# Scanner limits
self.reddit_trending_limit = discovery_config.get("reddit_trending_limit", 15)
self.market_movers_limit = discovery_config.get("market_movers_limit", 10)
self.max_candidates_to_analyze = discovery_config.get("max_candidates_to_analyze", 100)
self.analyze_all_candidates = discovery_config.get("analyze_all_candidates", False)
self.final_recommendations = discovery_config.get("final_recommendations", 3)
self.deep_dive_max_workers = discovery_config.get("deep_dive_max_workers", 3)
# Volume and movement filters
self.min_average_volume = discovery_config.get("min_average_volume", 0)
self.volume_lookback_days = discovery_config.get("volume_lookback_days", 20)
self.volume_cache_key = discovery_config.get("volume_cache_key", "default")
self.filter_same_day_movers = discovery_config.get("filter_same_day_movers", True)
self.intraday_movement_threshold = discovery_config.get("intraday_movement_threshold", 15.0)
# Earnings discovery limits
self.max_earnings_candidates = discovery_config.get("max_earnings_candidates", 50)
self.max_days_until_earnings = discovery_config.get("max_days_until_earnings", 7)
self.min_market_cap = discovery_config.get(
"min_market_cap", 0
) # In billions, 0 = no filter
# News settings
self.news_lookback_days = discovery_config.get("news_lookback_days", 7)
self.batch_news_vendor = discovery_config.get("batch_news_vendor", "openai")
self.batch_news_batch_size = discovery_config.get("batch_news_batch_size", 50)
# Discovery mode: "traditional", "semantic", or "hybrid"
self.discovery_mode = discovery_config.get("discovery_mode", "hybrid")
# Semantic discovery settings
self.semantic_news_sources = discovery_config.get("semantic_news_sources", ["openai"])
self.semantic_news_lookback_hours = discovery_config.get("semantic_news_lookback_hours", 24)
self.semantic_min_news_importance = discovery_config.get("semantic_min_news_importance", 5)
self.semantic_min_similarity = discovery_config.get("semantic_min_similarity", 0.2)
self.semantic_max_tickers_per_news = discovery_config.get(
"semantic_max_tickers_per_news", 5
)
# Console price charts
self.console_price_charts = discovery_config.get("console_price_charts", False)
self.price_chart_library = discovery_config.get("price_chart_library", "plotille")
self.price_chart_windows = discovery_config.get("price_chart_windows", ["1m"])
self.price_chart_lookback_days = discovery_config.get("price_chart_lookback_days", 30)
self.price_chart_width = discovery_config.get("price_chart_width", 60)
self.price_chart_height = discovery_config.get("price_chart_height", 12)
self.price_chart_max_tickers = discovery_config.get("price_chart_max_tickers", 10)
self.price_chart_show_movement_stats = discovery_config.get(
"price_chart_show_movement_stats", True
)
def _load_logging_config(self) -> None:
"""Load logging configuration."""
discovery_config = self.config.get("discovery", {})
self.log_tool_calls = discovery_config.get("log_tool_calls", True)
self.log_tool_calls_console = discovery_config.get("log_tool_calls_console", False)
self.tool_log_max_chars = discovery_config.get("tool_log_max_chars", 10_000)
self.tool_log_exclude = set(discovery_config.get("tool_log_exclude", []))
def _safe_serialize(self, value: Any) -> str:
"""Safely serialize any value to a string."""
return serialize_for_log(value)
def _log_tool_call(
self,
tool_logs: List[Dict[str, Any]],
node: str,
step_name: str,
tool_name: str,
params: Dict[str, Any],
output: Any,
context: str = "",
error: str = "",
) -> Dict[str, Any]:
"""
Log a tool call with metadata for debugging and analysis.
Args:
tool_logs: List to append the log entry to
node: Name of the graph node executing the tool
step_name: Description of the current step
tool_name: Name of the tool being executed
params: Parameters passed to the tool
output: Output from the tool execution
context: Additional context for the log entry
error: Error message if tool execution failed
Returns:
The created log entry dictionary
"""
from datetime import datetime
output_str = self._safe_serialize(output)
log_entry = {
"timestamp": datetime.now().isoformat(),
"type": "tool",
"node": node,
"step": step_name,
"tool": tool_name,
"parameters": params,
"context": context,
"output": output_str,
"output_length": len(output_str),
"error": error,
}
tool_logs.append(log_entry)
if self.log_tool_calls_console:
import logging
output_preview = output_str
if self.tool_log_max_chars and len(output_preview) > self.tool_log_max_chars:
output_preview = output_preview[: self.tool_log_max_chars] + "..."
logging.getLogger(__name__).info(
"TOOL %s node=%s step=%s params=%s error=%s output=%s",
tool_name,
node,
step_name,
params,
bool(error),
output_preview,
)
return log_entry
def _execute_tool_logged(
self,
state: DiscoveryState,
*,
node: str,
step: str,
tool_name: str,
context: str = "",
**params,
) -> Any:
"""
Execute a tool with optional logging.
Args:
state: Current discovery state containing tool_logs
node: Name of the graph node executing the tool
step: Description of the current step
tool_name: Name of the tool to execute
context: Additional context for logging
**params: Parameters to pass to the tool
Returns:
Tool execution result
Raises:
Exception: Re-raises any exception from tool execution after logging
"""
tool_logs = state.get("tool_logs", [])
if not self.log_tool_calls or tool_name in self.tool_log_exclude:
return execute_tool(tool_name, **params)
try:
result = execute_tool(tool_name, **params)
self._log_tool_call(
tool_logs,
node=node,
step_name=step,
tool_name=tool_name,
params=params,
output=result,
context=context,
)
state["tool_logs"] = tool_logs
return result
except Exception as e:
self._log_tool_call(
tool_logs,
node=node,
step_name=step,
tool_name=tool_name,
params=params,
output="",
context=context,
error=str(e),
)
state["tool_logs"] = tool_logs
raise
def _create_graph(self) -> StateGraph:
"""
Create the discovery workflow graph.
The graph follows this flow:
scanner -> filter -> ranker -> END
Returns:
Compiled workflow graph
"""
workflow = StateGraph(DiscoveryState)
workflow.add_node(self.NODE_SCANNER, self.scanner_node)
workflow.add_node(self.NODE_FILTER, self.filter_node)
workflow.add_node(self.NODE_RANKER, self.preliminary_ranker_node)
workflow.set_entry_point(self.NODE_SCANNER)
workflow.add_edge(self.NODE_SCANNER, self.NODE_FILTER)
workflow.add_edge(self.NODE_FILTER, self.NODE_RANKER)
workflow.add_edge(self.NODE_RANKER, END)
return workflow.compile()
def semantic_scanner_node(self, state: DiscoveryState) -> Dict[str, Any]:
"""
Scan market using semantic news-ticker matching.
Uses news semantic scanner to find tickers mentioned in or
semantically related to recent market-moving news.
Args:
state: Current discovery state
Returns:
Updated state with semantic candidates
"""
print("🔍 Scanning market with semantic discovery...")
# Update performance tracking for historical recommendations (runs before discovery)
try:
self.analytics.update_performance_tracking()
except Exception as e:
print(f" Warning: Performance tracking update failed: {e}")
print(" Continuing with discovery...")
tool_logs = state.setdefault("tool_logs", [])
def log_callback(entry: Dict[str, Any]) -> None:
tool_logs.append(entry)
state["tool_logs"] = tool_logs
try:
from tradingagents.dataflows.semantic_discovery import SemanticDiscovery
# Build config for semantic discovery
semantic_config = {
"project_dir": self.config.get("project_dir", "."),
"use_openai_embeddings": True,
"news_sources": self.semantic_news_sources,
"max_news_items": 20,
"news_lookback_hours": self.semantic_news_lookback_hours,
"min_news_importance": self.semantic_min_news_importance,
"min_similarity_threshold": self.semantic_min_similarity,
"max_tickers_per_news": self.semantic_max_tickers_per_news,
"max_total_candidates": self.max_candidates_to_analyze,
"log_callback": log_callback,
}
# Run semantic discovery
discovery = SemanticDiscovery(semantic_config)
ranked_candidates = discovery.discover()
# Also get directly mentioned tickers from news (highest signal)
directly_mentioned = discovery.get_directly_mentioned_tickers()
# Convert to candidate format
candidates = []
# Add directly mentioned tickers first (highest priority)
for ticker_info in directly_mentioned:
candidates.append(
{
"ticker": ticker_info["ticker"],
"source": self.SOURCE_NEWS_MENTION,
"context": f"Directly mentioned in news: {ticker_info['news_title']}",
"priority": self.PRIORITY_CRITICAL, # Direct mention = highest priority
"news_sentiment": ticker_info.get("sentiment", "neutral"),
"news_importance": ticker_info.get("importance", 5),
"news_context": [ticker_info],
}
)
# Add semantically matched tickers
for rank_info in ranked_candidates:
ticker = rank_info["ticker"]
news_matches = rank_info["news_matches"]
# Combine all news titles for richer context
all_news_titles = "; ".join([n["news_title"] for n in news_matches[:3]])
candidates.append(
{
"ticker": ticker,
"source": self.SOURCE_SEMANTIC,
"context": f"News-driven: {all_news_titles}",
"priority": self.PRIORITY_HIGH, # News-driven is always high priority (leading indicator)
"semantic_score": rank_info["aggregate_score"],
"num_news_matches": rank_info["num_news_matches"],
"news_context": news_matches, # Store full news context for later
}
)
print(f" Found {len(candidates)} candidates from semantic discovery.")
return {
"tickers": [c["ticker"] for c in candidates],
"candidate_metadata": candidates,
"tool_logs": state.get("tool_logs", []),
"status": "scanned",
}
except Exception as e:
print(f" Error in semantic discovery: {e}")
print(" Falling back to traditional scanner...")
# Directly call traditional scanner to avoid recursion
return self.traditional_scanner_node(state)
def _merge_candidates_into_dict(
self, candidates: List[Dict[str, Any]], target_dict: Dict[str, Dict[str, Any]]
) -> None:
"""
Merge candidates into target dictionary with smart deduplication.
For duplicate tickers, merges sources and contexts intelligently,
upgrading priority when higher-priority sources are found.
Args:
candidates: List of candidate dictionaries to merge
target_dict: Target dictionary to merge into (ticker -> candidate data)
"""
for candidate in candidates:
ticker = candidate["ticker"]
if ticker not in target_dict:
self._add_new_candidate(candidate, target_dict)
else:
self._merge_with_existing_candidate(candidate, target_dict[ticker])
def _add_new_candidate(
self, candidate: Dict[str, Any], target_dict: Dict[str, Dict[str, Any]]
) -> None:
"""
Add a new candidate to the target dictionary.
Args:
candidate: Candidate dictionary to add
target_dict: Target dictionary to add to
"""
ticker = candidate["ticker"]
target_dict[ticker] = candidate.copy()
source = candidate.get("source", self.SOURCE_UNKNOWN)
context = candidate.get("context", "").strip()
target_dict[ticker]["all_sources"] = [source]
target_dict[ticker]["all_contexts"] = [context] if context else []
def _merge_with_existing_candidate(
self, incoming: Dict[str, Any], existing: Dict[str, Any]
) -> None:
"""
Merge incoming candidate data with existing candidate.
Args:
incoming: New candidate data to merge
existing: Existing candidate data to update
"""
# Initialize list fields if needed
existing.setdefault("all_sources", [existing.get("source", self.SOURCE_UNKNOWN)])
existing.setdefault(
"all_contexts", [existing.get("context", "")] if existing.get("context") else []
)
# Update sources
incoming_source = incoming.get("source", self.SOURCE_UNKNOWN)
if incoming_source not in existing["all_sources"]:
existing["all_sources"].append(incoming_source)
# Update priority and contexts based on priority ranking
self._update_priority_and_context(incoming, existing)
def _update_priority_and_context(
self, incoming: Dict[str, Any], existing: Dict[str, Any]
) -> None:
"""
Update priority and context based on incoming candidate priority.
If incoming has higher priority, upgrades existing candidate.
Otherwise, just appends context.
Args:
incoming: New candidate data
existing: Existing candidate data to update
"""
incoming_rank = self.PRIORITY_ORDER.get(incoming.get("priority", self.PRIORITY_UNKNOWN), 4)
existing_rank = self.PRIORITY_ORDER.get(existing.get("priority", self.PRIORITY_UNKNOWN), 4)
incoming_context = incoming.get("context", "").strip()
if incoming_rank < existing_rank:
# Higher priority - upgrade and prepend context
existing["priority"] = incoming.get("priority")
existing["source"] = incoming.get("source")
self._prepend_context(incoming_context, existing)
else:
# Same or lower priority - just append context
self._append_context(incoming_context, existing)
def _prepend_context(self, new_context: str, candidate: Dict[str, Any]) -> None:
"""
Prepend context to existing candidate (for higher priority updates).
Args:
new_context: New context string to prepend
candidate: Candidate dictionary to update
"""
if not new_context:
return
candidate["all_contexts"].append(new_context)
current_ctx = candidate.get("context", "")
candidate["context"] = f"{new_context}; Also: {current_ctx}" if current_ctx else new_context
def _append_context(self, new_context: str, candidate: Dict[str, Any]) -> None:
"""
Append context to existing candidate (for same/lower priority updates).
Args:
new_context: New context string to append
candidate: Candidate dictionary to update
"""
if not new_context or new_context in candidate["all_contexts"]:
return
candidate["all_contexts"].append(new_context)
current_ctx = candidate.get("context", "")
if not current_ctx:
candidate["context"] = new_context
elif new_context not in current_ctx:
candidate["context"] = f"{current_ctx}; Also: {new_context}"
def scanner_node(self, state: DiscoveryState) -> Dict[str, Any]:
"""
Scan the market for potential candidates using the modular scanner registry.
Iterates through all scanners in SCANNER_REGISTRY, checks if they're enabled,
and runs them to collect candidates organized by pipeline.
Args:
state: Current discovery state
Returns:
Updated state with discovered candidates
"""
print("Scanning market for opportunities...")
# Update performance tracking for historical recommendations (runs before discovery)
try:
self.analytics.update_performance_tracking()
except Exception as e:
print(f" Warning: Performance tracking update failed: {e}")
print(" Continuing with discovery...")
# Initialize tool_logs in state
state.setdefault("tool_logs", [])
# Get execution config
exec_config = self.config.get("discovery", {}).get("scanner_execution", {})
concurrent = exec_config.get("concurrent", True)
max_workers = exec_config.get("max_workers", 8)
timeout_seconds = exec_config.get("timeout_seconds", 30)
# Get pipeline_config from config
pipeline_config = self.config.get("discovery", {}).get("pipelines", {})
# Prepare enabled scanners
enabled_scanners = []
for scanner_class in SCANNER_REGISTRY.get_all_scanners():
pipeline = scanner_class.pipeline
# Check if scanner's pipeline is enabled
if not pipeline_config.get(pipeline, {}).get("enabled", True):
print(f" Skipping {scanner_class.name} (pipeline '{pipeline}' disabled)")
continue
try:
# Instantiate scanner with config
scanner = scanner_class(self.config)
# Check if scanner is enabled
if not scanner.is_enabled():
print(f" Skipping {scanner_class.name} (scanner disabled)")
continue
enabled_scanners.append((scanner, scanner_class.name, pipeline))
except Exception as e:
print(f" Error instantiating {scanner_class.name}: {e}")
continue
# Run scanners concurrently or sequentially based on config
if concurrent and len(enabled_scanners) > 1:
pipeline_candidates = self._run_scanners_concurrent(
enabled_scanners, state, max_workers, timeout_seconds
)
else:
pipeline_candidates = self._run_scanners_sequential(enabled_scanners, state)
# Merge all candidates from all pipelines using _merge_candidates_into_dict()
all_candidates_dict: Dict[str, Dict[str, Any]] = {}
for pipeline, candidates in pipeline_candidates.items():
self._merge_candidates_into_dict(candidates, all_candidates_dict)
# Convert merged dict to list
final_candidates = list(all_candidates_dict.values())
final_tickers = [c["ticker"] for c in final_candidates]
print(f" Found {len(final_candidates)} unique candidates from all scanners.")
# Return state with tickers, candidate_metadata, tool_logs, status
return {
"tickers": final_tickers,
"candidate_metadata": final_candidates,
"tool_logs": state.get("tool_logs", []),
"status": "scanned",
}
def _run_scanners_sequential(
self, enabled_scanners: List[tuple], state: DiscoveryState
) -> Dict[str, List[Dict[str, Any]]]:
"""
Run scanners sequentially (original behavior).
Args:
enabled_scanners: List of (scanner, name, pipeline) tuples
state: Current discovery state
Returns:
Dict mapping pipeline -> list of candidates
"""
pipeline_candidates: Dict[str, List[Dict[str, Any]]] = {}
for scanner, name, pipeline in enabled_scanners:
# Initialize pipeline list if needed
if pipeline not in pipeline_candidates:
pipeline_candidates[pipeline] = []
try:
# Set tool_executor in state for scanner to use
state["tool_executor"] = self._execute_tool_logged
# Call scanner.scan_with_validation(state)
print(f" Running {name}...")
candidates = scanner.scan_with_validation(state)
# Route candidates to appropriate pipeline
pipeline_candidates[pipeline].extend(candidates)
print(f" Found {len(candidates)} candidates")
except Exception as e:
print(f" Error in {name}: {e}")
continue
return pipeline_candidates
def _run_scanners_concurrent(
self,
enabled_scanners: List[tuple],
state: DiscoveryState,
max_workers: int,
timeout_seconds: int,
) -> Dict[str, List[Dict[str, Any]]]:
"""
Run scanners concurrently using ThreadPoolExecutor.
Args:
enabled_scanners: List of (scanner, name, pipeline) tuples
state: Current discovery state
max_workers: Maximum concurrent threads
timeout_seconds: Timeout per scanner in seconds
Returns:
Dict mapping pipeline -> list of candidates
"""
import logging
from concurrent.futures import ThreadPoolExecutor, TimeoutError, as_completed
logger = logging.getLogger(__name__)
pipeline_candidates: Dict[str, List[Dict[str, Any]]] = {}
print(
f" Running {len(enabled_scanners)} scanners concurrently (max {max_workers} workers)..."
)
def run_scanner(scanner_info: tuple) -> tuple:
"""Execute a single scanner with error handling."""
scanner, name, pipeline = scanner_info
try:
# Create a copy of state for thread safety
scanner_state = state.copy()
scanner_state["tool_executor"] = self._execute_tool_logged
# Run scanner with validation
candidates = scanner.scan_with_validation(scanner_state)
# Merge tool_logs back into main state (thread-safe append)
if "tool_logs" in scanner_state:
state.setdefault("tool_logs", []).extend(scanner_state["tool_logs"])
return (name, pipeline, candidates, None)
except Exception as e:
logger.error(f"Scanner {name} failed: {e}", exc_info=True)
return (name, pipeline, [], str(e))
# Submit all scanner tasks
with ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_scanner = {
executor.submit(run_scanner, scanner_info): scanner_info[1]
for scanner_info in enabled_scanners
}
# Collect results as they complete (no global timeout, handle per-scanner)
completed_count = 0
for future in as_completed(future_to_scanner):
scanner_name = future_to_scanner[future]
try:
# Get result with per-scanner timeout
name, pipeline, candidates, error = future.result(timeout=timeout_seconds)
# Initialize pipeline list if needed
if pipeline not in pipeline_candidates:
pipeline_candidates[pipeline] = []
if error:
print(f" ⚠️ {name}: {error}")
else:
pipeline_candidates[pipeline].extend(candidates)
print(f"{name}: {len(candidates)} candidates")
except TimeoutError:
logger.warning(f"Scanner {scanner_name} timed out after {timeout_seconds}s")
print(f" ⏱️ {scanner_name}: timeout after {timeout_seconds}s")
except Exception as e:
logger.error(f"Scanner {scanner_name} failed unexpectedly: {e}", exc_info=True)
print(f" ⚠️ {scanner_name}: unexpected error")
finally:
completed_count += 1
# Log completion stats
if completed_count < len(enabled_scanners):
logger.warning(f"Only {completed_count}/{len(enabled_scanners)} scanners completed")
return pipeline_candidates
def hybrid_scanner_node(self, state: DiscoveryState) -> Dict[str, Any]:
"""
Run both semantic and traditional discovery with smart deduplication.
Combines news-driven semantic discovery (leading indicators) with
traditional discovery (social, market movers, earnings). Merges
results and boosts candidates confirmed by multiple sources.
Args:
state: Current discovery state
Returns:
Updated state with merged candidates from both approaches
"""
print("🔍 Hybrid Discovery: Combining news-driven AND traditional signals...")
# Update performance tracking once (not in each sub-scanner)
try:
self.analytics.update_performance_tracking()
except Exception as e:
print(f" Warning: Performance tracking update failed: {e}")
print(" Continuing with discovery...")
tool_logs = state.setdefault("tool_logs", [])
def log_callback(entry: Dict[str, Any]) -> None:
tool_logs.append(entry)
state["tool_logs"] = tool_logs
# We will merge all candidates into this dict
unique_candidates = {}
all_tickers = set()
# ========================================
# Phase 1: Semantic Discovery (news-driven - leading indicators)
# ========================================
print("\n📰 Phase 1: Semantic Discovery (news-driven)...")
try:
from tradingagents.dataflows.semantic_discovery import SemanticDiscovery
# Build config for semantic discovery
semantic_config = {
"project_dir": self.config.get("project_dir", "."),
"use_openai_embeddings": True,
"news_sources": self.semantic_news_sources,
"max_news_items": 20,
"news_lookback_hours": self.semantic_news_lookback_hours,
"min_news_importance": self.semantic_min_news_importance,
"min_similarity_threshold": self.semantic_min_similarity,
"max_tickers_per_news": self.semantic_max_tickers_per_news,
"max_total_candidates": self.max_candidates_to_analyze,
"log_callback": log_callback,
}
# Run semantic discovery
discovery = SemanticDiscovery(semantic_config)
ranked_candidates = discovery.discover()
# Also get directly mentioned tickers from news (highest signal)
directly_mentioned = discovery.get_directly_mentioned_tickers()
# Prepare semantic candidates list
semantic_candidates = []
# Add directly mentioned tickers first (highest priority)
for ticker_info in directly_mentioned:
semantic_candidates.append(
{
"ticker": ticker_info["ticker"],
"source": self.SOURCE_NEWS_MENTION,
"context": f"Directly mentioned in news: {ticker_info['news_title']}",
"priority": self.PRIORITY_CRITICAL, # Direct mention = highest priority
"news_sentiment": ticker_info.get("sentiment", "neutral"),
"news_importance": ticker_info.get("importance", 5),
"news_context": [ticker_info],
}
)
all_tickers.add(ticker_info["ticker"])
# Add semantically matched tickers
for rank_info in ranked_candidates:
ticker = rank_info["ticker"]
news_matches = rank_info["news_matches"]
# Combine all news titles for richer context
all_news_titles = "; ".join([n["news_title"] for n in news_matches[:3]])
semantic_candidates.append(
{
"ticker": ticker,
"source": self.SOURCE_SEMANTIC,
"context": f"News-driven: {all_news_titles}",
"priority": self.PRIORITY_HIGH, # News-driven is always high priority (leading indicator)
"semantic_score": rank_info["aggregate_score"],
"num_news_matches": rank_info["num_news_matches"],
"news_context": news_matches,
}
)
all_tickers.add(ticker)
print(f" Found {len(semantic_candidates)} candidates from semantic discovery")
# Merge semantic candidates into unique dict
self._merge_candidates_into_dict(semantic_candidates, unique_candidates)
except Exception as e:
print(f" Semantic discovery failed: {e}")
print(" Continuing with traditional discovery...")
# ========================================
# Phase 2: Traditional Discovery (social, market movers, etc.)
# ========================================
print("\n📊 Phase 2: Traditional Discovery (Reddit, market movers, earnings, etc.)...")
traditional_candidates = self._run_traditional_scanners(state)
print(f" Found {len(traditional_candidates)} candidates from traditional discovery")
# Merge traditional candidates into unique dict
self._merge_candidates_into_dict(traditional_candidates, unique_candidates)
# ========================================
# Phase 3: Post-Merge Processing
# ========================================
print("\n🔄 Phase 3: Finalizing candidates...")
final_candidates = list(unique_candidates.values())
# Check for multi-source confirmation
semantic_sources = {self.SOURCE_SEMANTIC, self.SOURCE_NEWS_MENTION}
for c in final_candidates:
sources = c.get("all_sources", [])
has_semantic = any(s in semantic_sources for s in sources)
has_traditional = any(
s not in semantic_sources and s != self.SOURCE_UNKNOWN for s in sources
)
if has_semantic and has_traditional:
# Found by BOTH semantic and traditional - boost confidence
c["multi_source_confirmed"] = True
if c.get("priority") == self.PRIORITY_HIGH:
c["priority"] = self.PRIORITY_CRITICAL # Upgrade to critical
# Sort by priority
final_candidates.sort(
key=lambda x: self.PRIORITY_ORDER.get(x.get("priority", self.PRIORITY_UNKNOWN), 4)
)
# Update all_tickers set
all_tickers = {c["ticker"] for c in final_candidates}
# Count by priority for reporting
critical_count = sum(
1 for c in final_candidates if c.get("priority") == self.PRIORITY_CRITICAL
)
high_count = sum(1 for c in final_candidates if c.get("priority") == self.PRIORITY_HIGH)
medium_count = sum(1 for c in final_candidates if c.get("priority") == self.PRIORITY_MEDIUM)
low_count = sum(1 for c in final_candidates if c.get("priority") == self.PRIORITY_LOW)
multi_confirmed = sum(1 for c in final_candidates if c.get("multi_source_confirmed"))
print(f"\n✅ Hybrid discovery complete: {len(final_candidates)} total candidates")
print(
f" Priority: {critical_count} critical, {high_count} high, {medium_count} medium, {low_count} low"
)
if multi_confirmed:
print(
f" 🎯 {multi_confirmed} candidates confirmed by BOTH semantic AND traditional sources"
)
return {
"tickers": list(all_tickers),
"candidate_metadata": final_candidates,
"tool_logs": state.get("tool_logs", []),
"status": "scanned",
}
def _run_traditional_scanners(self, state: DiscoveryState) -> List[Dict[str, Any]]:
"""
Run all traditional scanner sources and return candidates.
Traditional sources include:
- Reddit trending
- Market movers
- Earnings calendar
- IPO calendar
- Short interest
- Unusual volume
- Analyst rating changes
- Insider buying
Args:
state: Current discovery state
Returns:
List of candidates (without deduplication)
"""
from tradingagents.dataflows.discovery.scanners import TraditionalScanner
scanner = TraditionalScanner(
config=self.config, llm=self.quick_thinking_llm, tool_executor=self._execute_tool_logged
)
return scanner.scan(state)
def traditional_scanner_node(self, state: DiscoveryState) -> Dict[str, Any]:
"""
Traditional market scanning: Reddit, market movers, earnings, etc.
Args:
state: Current discovery state
Returns:
Updated state with traditional candidates
"""
print("🔍 Scanning market for opportunities...")
# Update performance tracking for historical recommendations (runs before discovery)
try:
self.analytics.update_performance_tracking()
except Exception as e:
print(f" Warning: Performance tracking update failed: {e}")
print(" Continuing with discovery...")
state.setdefault("tool_logs", [])
# Run all traditional scanners
candidates = self._run_traditional_scanners(state)
# Deduplicate candidates
unique_candidates = {}
self._merge_candidates_into_dict(candidates, unique_candidates)
final_candidates = list(unique_candidates.values())
print(f" Found {len(final_candidates)} unique candidates.")
return {
"tickers": [c["ticker"] for c in final_candidates],
"candidate_metadata": final_candidates,
"tool_logs": state.get("tool_logs", []),
"status": "scanned",
}
def filter_node(self, state: DiscoveryState) -> Dict[str, Any]:
"""
Filter candidates and enrich with additional data.
Filters candidates based on:
- Ticker validity
- Liquidity (volume)
- Same-day price movement
- Data availability
Enriches with:
- Current price
- Fundamentals
- Business description
- Technical indicators
- News
- Insider transactions
- Analyst recommendations
- Options activity
Args:
state: Current discovery state with candidates
Returns:
Updated state with filtered and enriched candidates
"""
from tradingagents.dataflows.discovery.filter import CandidateFilter
cand_filter = CandidateFilter(self.config, self._execute_tool_logged)
return cand_filter.filter(state)
def preliminary_ranker_node(self, state: DiscoveryState) -> Dict[str, Any]:
"""
Rank all filtered candidates and select top opportunities.
Uses LLM to analyze all enriched candidate data and rank
by investment potential based on:
- Strategy match
- Fundamental strength
- Technical setup
- Catalyst timing
- Options flow
- Historical performance patterns
Args:
state: Current discovery state with filtered candidates
Returns:
Final state with ranked opportunities and final_ranking JSON
"""
from tradingagents.dataflows.discovery.ranker import CandidateRanker
ranker = CandidateRanker(self.config, self.deep_thinking_llm, self.analytics)
return ranker.rank(state)
def run(self, trade_date: str = None):
"""Execute the discovery graph workflow.
Args:
trade_date: Trade date in YYYY-MM-DD format (defaults to today if not provided)
"""
from tradingagents.dataflows.discovery.utils import resolve_trade_date_str
trade_date = resolve_trade_date_str({"trade_date": trade_date})
print(f"\n{'='*60}")
print(f"Discovery Analysis - {trade_date}")
print(f"{'='*60}")
initial_state = {
"trade_date": trade_date,
"tickers": [],
"filtered_tickers": [],
"final_ranking": "",
"status": "initialized",
"tool_logs": [],
}
final_state = self.graph.invoke(initial_state)
# Save results and recommendations
self.analytics.save_discovery_results(final_state, trade_date, self.config)
# Extract and save rankings if available
rankings = final_state.get("final_ranking", [])
if isinstance(rankings, str):
try:
import json
rankings = json.loads(rankings)
except Exception:
rankings = []
if rankings:
if isinstance(rankings, dict) and "rankings" in rankings:
rankings_list = rankings["rankings"]
elif isinstance(rankings, list):
rankings_list = rankings
else:
rankings_list = []
if rankings_list:
self.analytics.save_recommendations(
rankings_list, trade_date, self.config.get("llm_provider", "unknown")
)
return final_state
def build_price_chart_bundle(self, rankings: Any) -> Dict[str, Dict[str, Any]]:
"""Build per-ticker chart + movement stats for top recommendations."""
if not self.console_price_charts:
return {}
rankings_list = self._normalize_rankings(rankings)
tickers: List[str] = []
for item in rankings_list:
ticker = (item.get("ticker") or "").upper()
if ticker and ticker not in tickers:
tickers.append(ticker)
if not tickers:
return {}
tickers = tickers[: self.price_chart_max_tickers]
chart_windows = self._get_chart_windows()
renderer = self._get_chart_renderer()
if renderer is None:
return {}
bundle: Dict[str, Dict[str, Any]] = {}
for ticker in tickers:
series = self._fetch_price_series(ticker)
if not series:
bundle[ticker] = {
"chart": f"{ticker}: no price history available",
"charts": {},
"movement": {},
}
continue
per_window_charts: Dict[str, str] = {}
for window in chart_windows:
window_closes = self._get_window_closes(ticker, series, window)
if len(window_closes) < 2:
continue
change_pct = None
if window_closes[0]:
change_pct = (window_closes[-1] / window_closes[0] - 1) * 100.0
label = window.upper()
title = f"{ticker} ({label})"
if change_pct is not None:
title = f"{ticker} ({label}, {change_pct:+.1f}%)"
chart_text = renderer(window_closes, title)
if chart_text:
per_window_charts[window] = chart_text
primary_chart = ""
if per_window_charts:
first_key = chart_windows[0]
primary_chart = per_window_charts.get(
first_key, next(iter(per_window_charts.values()))
)
bundle[ticker] = {
"chart": primary_chart,
"charts": per_window_charts,
"movement": self._compute_movement_stats(series),
}
return bundle
def build_price_chart_map(self, rankings: Any) -> Dict[str, str]:
"""Build mini price charts keyed by ticker."""
bundle = self.build_price_chart_bundle(rankings)
return {ticker: item.get("chart", "") for ticker, item in bundle.items()}
def build_price_chart_strings(self, rankings: Any) -> List[str]:
"""Build mini price charts for top recommendations (returns ANSI strings)."""
charts = self.build_price_chart_map(rankings)
return list(charts.values()) if charts else []
def _print_price_charts(self, rankings_list: List[Dict[str, Any]]) -> None:
"""Render mini price charts for top recommendations in the console."""
charts = self.build_price_chart_strings(rankings_list)
if not charts:
return
print(f"\n📈 Price Charts (last {self.price_chart_lookback_days} days)")
for chart in charts:
print(chart)
def _fetch_price_series(self, ticker: str) -> List[Dict[str, Any]]:
"""Fetch recent daily close prices with dates for charting and movement stats."""
try:
import pandas as pd
import yfinance as yf
from tradingagents.dataflows.y_finance import suppress_yfinance_warnings
history_days = max(self.price_chart_lookback_days + 10, 390)
with suppress_yfinance_warnings():
data = yf.download(
ticker,
period=f"{history_days}d",
interval="1d",
auto_adjust=True,
progress=False,
)
if data is None or data.empty:
return []
series = None
if isinstance(data.columns, pd.MultiIndex):
if "Close" in data.columns.get_level_values(0):
close_data = data["Close"]
series = (
close_data.iloc[:, 0]
if isinstance(close_data, pd.DataFrame)
else close_data
)
elif "Close" in data.columns:
series = data["Close"]
if series is None:
series = data.iloc[:, 0]
if isinstance(series, pd.DataFrame):
series = series.iloc[:, 0]
series = series.dropna()
if series.empty:
return []
points: List[Dict[str, Any]] = []
for index, close in series.items():
dt = getattr(index, "to_pydatetime", lambda: index)()
points.append({"date": dt, "close": float(close)})
return points
except Exception as exc:
print(f" {ticker}: error fetching prices: {exc}")
return []
def _get_chart_renderer(self) -> Optional[Callable[[List[float], str], str]]:
"""Return selected chart renderer, with fallback to plotext."""
preferred = str(self.price_chart_library or "plotext").lower().strip()
if preferred == "plotille":
try:
import plotille
return lambda closes, title: self._render_plotille_chart(plotille, closes, title)
except Exception as exc:
print(f" ⚠️ plotille unavailable, falling back to plotext: {exc}")
try:
import plotext as plt
return lambda closes, title: self._render_plotext_chart(plt, closes, title)
except Exception as exc:
print(f" ⚠️ plotext not available, skipping charts: {exc}")
return None
def _render_plotille_chart(self, plotille: Any, closes: List[float], title: str) -> str:
"""Build a plotille chart and return as ANSI string."""
if not closes:
return ""
fig = plotille.Figure()
fig.width = self.price_chart_width
fig.height = self.price_chart_height
fig.color_mode = "byte"
fig.set_x_limits(min_=0, max_=max(1, len(closes) - 1))
min_close = min(closes)
max_close = max(closes)
if min_close == max_close:
padding = max(0.01, min_close * 0.01)
min_close -= padding
max_close += padding
fig.set_y_limits(min_=min_close, max_=max_close)
fig.plot(range(len(closes)), closes, lc=45)
return f"{title}\n{fig.show(legend=False)}"
def _render_plotext_chart(self, plt: Any, closes: List[float], title: str) -> str:
"""Build a single plotext line chart and return as ANSI string."""
self._reset_plotext(plt)
if hasattr(plt, "plotsize"):
plt.plotsize(self.price_chart_width, self.price_chart_height)
if hasattr(plt, "theme"):
try:
plt.theme("pro")
except Exception:
pass
if hasattr(plt, "title"):
plt.title(title)
if hasattr(plt, "xlabel"):
plt.xlabel("")
if hasattr(plt, "ylabel"):
plt.ylabel("")
plt.plot(closes)
if hasattr(plt, "build"):
chart = plt.build()
if chart:
return chart
plt.show()
return ""
def _compute_movement_stats(self, series: List[Dict[str, Any]]) -> Dict[str, Optional[float]]:
"""Compute 1D, 7D, 6M, and 1Y percent movement from latest close."""
if not series:
return {}
from datetime import timedelta
latest = series[-1]
latest_date = latest["date"]
latest_close = latest["close"]
if not latest_close:
return {}
windows = {
"1d": timedelta(days=1),
"7d": timedelta(days=7),
"1m": timedelta(days=30),
"6m": timedelta(days=182),
"1y": timedelta(days=365),
}
stats: Dict[str, Optional[float]] = {}
for label, delta in windows.items():
target_date = latest_date - delta
baseline = None
for point in series:
if point["date"] <= target_date:
baseline = point["close"]
else:
break
if baseline and baseline != 0:
stats[label] = (latest_close / baseline - 1.0) * 100.0
else:
stats[label] = None
return stats
def _get_chart_windows(self) -> List[str]:
"""Normalize configured chart windows."""
allowed = {"1d", "7d", "1m", "6m", "1y"}
configured = self.price_chart_windows
if isinstance(configured, str):
configured = [part.strip().lower() for part in configured.split(",")]
elif not isinstance(configured, list):
configured = ["1m"]
windows = []
for value in configured:
key = str(value).strip().lower()
if key in allowed and key not in windows:
windows.append(key)
return windows or ["1m"]
def _get_window_closes(
self, ticker: str, series: List[Dict[str, Any]], window: str
) -> List[float]:
"""Return closes for a given chart window."""
if not series:
return []
from datetime import timedelta
if window == "1d":
intraday = self._fetch_intraday_closes(ticker)
if len(intraday) >= 2:
return intraday
# fallback to last 2 daily points if intraday unavailable
return [point["close"] for point in series[-2:]]
window_days = {
"7d": 7,
"1m": 30,
"6m": 182,
"1y": 365,
}.get(window, self.price_chart_lookback_days)
latest_date = series[-1]["date"]
cutoff = latest_date - timedelta(days=window_days)
closes = [point["close"] for point in series if point["date"] >= cutoff]
return closes
def _fetch_intraday_closes(self, ticker: str) -> List[float]:
"""Fetch intraday close prices for 1-day chart window."""
try:
import pandas as pd
import yfinance as yf
from tradingagents.dataflows.y_finance import suppress_yfinance_warnings
with suppress_yfinance_warnings():
data = yf.download(
ticker,
period="1d",
interval="15m",
auto_adjust=True,
progress=False,
)
if data is None or data.empty:
return []
series = None
if isinstance(data.columns, pd.MultiIndex):
if "Close" in data.columns.get_level_values(0):
close_data = data["Close"]
series = (
close_data.iloc[:, 0]
if isinstance(close_data, pd.DataFrame)
else close_data
)
elif "Close" in data.columns:
series = data["Close"]
if series is None:
series = data.iloc[:, 0]
if isinstance(series, pd.DataFrame):
series = series.iloc[:, 0]
return [float(value) for value in series.dropna().to_list()]
except Exception:
return []
@staticmethod
def _normalize_rankings(rankings: Any) -> List[Dict[str, Any]]:
"""Normalize ranking payload into a list of ranking dicts."""
rankings_list: List[Dict[str, Any]] = []
if isinstance(rankings, str):
try:
import json
rankings = json.loads(rankings)
except Exception:
rankings = []
if isinstance(rankings, dict):
rankings_list = rankings.get("rankings", [])
elif isinstance(rankings, list):
rankings_list = rankings
return rankings_list
@staticmethod
def _reset_plotext(plt: Any) -> None:
"""Clear plotext state between charts."""
for method in ("clf", "clear_figure", "clear_data"):
func = getattr(plt, method, None)
if callable(func):
func()
return