TradingAgents/tradingagents/dataflows/discovery/scanners/semantic_news.py

67 lines
2.4 KiB
Python

"""Semantic news scanner for early catalyst detection."""
from typing import Any, Dict, List
from tradingagents.dataflows.discovery.scanner_registry import BaseScanner, SCANNER_REGISTRY
from tradingagents.dataflows.discovery.utils import Priority
class SemanticNewsScanner(BaseScanner):
"""Scan news for early catalysts using semantic analysis."""
name = "semantic_news"
pipeline = "news"
def __init__(self, config: Dict[str, Any]):
super().__init__(config)
self.sources = self.scanner_config.get("sources", ["google_news"])
self.lookback_hours = self.scanner_config.get("lookback_hours", 6)
self.min_importance = self.scanner_config.get("min_news_importance", 5)
self.min_similarity = self.scanner_config.get("min_similarity", 0.5)
def scan(self, state: Dict[str, Any]) -> List[Dict[str, Any]]:
if not self.is_enabled():
return []
print(f" 📰 Scanning news catalysts...")
try:
from tradingagents.tools.executor import execute_tool
from datetime import datetime
# Get recent global news
date_str = datetime.now().strftime("%Y-%m-%d")
result = execute_tool("get_global_news", date=date_str)
if not result or not isinstance(result, str):
return []
# Extract tickers mentioned in news
import re
ticker_pattern = r'\b([A-Z]{2,5})\b|\$([A-Z]{2,5})'
matches = re.findall(ticker_pattern, result)
tickers = list(set([t[0] or t[1] for t in matches if t[0] or t[1]]))
stop_words = {'NYSE', 'NASDAQ', 'CEO', 'CFO', 'IPO', 'ETF', 'USA', 'SEC', 'NEWS', 'STOCK', 'MARKET'}
tickers = [t for t in tickers if t not in stop_words]
candidates = []
for ticker in tickers[:self.limit]:
candidates.append({
"ticker": ticker,
"source": self.name,
"context": "Mentioned in recent market news",
"priority": Priority.MEDIUM.value,
"strategy": "news_catalyst",
})
print(f" Found {len(candidates)} news mentions")
return candidates
except Exception as e:
print(f" ⚠️ News scan failed: {e}")
return []
SCANNER_REGISTRY.register(SemanticNewsScanner)