TradingAgents/docs/specs/news/design.json

127 lines
5.5 KiB
JSON

{
"requirements": {
"entities": {
"NewsArticle": "Existing domain entity, enhance with structured sentiment and vector embedding support",
"NewsJobConfig": "New configuration entity for scheduled job parameters (tickers, schedule, model settings)"
},
"data_persistence": {
"news_articles_table": "Existing table with vector embedding columns, enhance sentiment_score JSONB column",
"vector_indexes": "pgvectorscale indexes for title_embedding and content_embedding (1536 dimensions)",
"data_flows": [
"APScheduler → NewsService.update_company_news() → NewsRepository.upsert_batch()",
"ArticleData → OpenRouter API → structured sentiment → NewsArticle entity",
"Article content → OpenRouter embeddings API → pgvectorscale storage"
]
},
"api_needed": {
"external_apis": [
"OpenRouter for LLM sentiment analysis using quick_think_llm",
"OpenRouter for embeddings using text-embedding models",
"Existing GoogleNewsClient and ArticleScraperClient"
],
"internal_apis": [
"Enhanced NewsService.update_company_news() method",
"New NewsRepository.find_similar_articles() for semantic search",
"New ScheduledNewsCollector job orchestration class"
]
},
"components": {
"scheduler": "APScheduler integration for daily news collection",
"sentiment_analyzer": "OpenRouter LLM client for structured sentiment analysis",
"embedding_generator": "OpenRouter embeddings client for vector generation",
"job_orchestrator": "ScheduledNewsCollector class for job coordination"
},
"domains": {
"primary": "news (completing final 5%)",
"integration": "Leverages existing Router → Service → Repository → Entity → Database pattern"
},
"business_rules": [
"Best-effort sentiment analysis - LLM failures don't block article storage",
"URL-based deduplication using existing NewsRepository patterns",
"Paywall resilience via existing ArticleScraperClient graceful degradation",
"Date filtering: articles within last 30 days only",
"Sentiment confidence threshold: 0.5 minimum for reliable scores",
"Content length limits: 8000 chars for embedding generation",
"Embedding generation: Both title and content vectors required"
]
},
"technical_needs": {
"domain_model": {
"entities": {
"NewsArticle": {
"status": "exists_needs_enhancement",
"enhancements": [
"Structured sentiment JSON format: {sentiment: positive|negative|neutral, confidence: 0.0-1.0, reasoning: string}",
"Vector embedding support for title and content (1536 dimensions)",
"Enhanced validation for sentiment confidence thresholds"
]
},
"NewsJobConfig": {
"status": "new_entity",
"fields": ["tickers: list[str]", "schedule_hour: int", "sentiment_model: str", "embedding_model: str", "max_articles_per_ticker: int"],
"validation": "Schedule hour 0-23, max articles 50-500 range"
}
},
"services": {
"NewsService": {
"status": "exists_needs_enhancement",
"enhancements": [
"Integrate LLM sentiment analysis in update methods",
"Add vector embedding generation pipeline",
"Enhanced error handling for LLM and embedding failures"
]
},
"ScheduledNewsCollector": {
"status": "new_service",
"responsibilities": [
"Orchestrate daily news collection jobs",
"Manage job configuration and scheduling",
"Monitor job execution and handle failures",
"Integrate with existing NewsService methods"
]
}
}
},
"persistence": {
"database": "PostgreSQL + TimescaleDB + pgvectorscale",
"schema_updates": {
"news_articles": {
"existing_columns": "headline, url, source, published_date, summary, entities, sentiment_score, author, category, title_embedding, content_embedding",
"modifications": [
"Enhance sentiment_score JSONB to support structured format",
"Add vector similarity indexes for title_embedding and content_embedding",
"Add composite index on (symbol, published_date) for News Analyst queries"
]
}
},
"access_patterns": [
"Time-based queries: articles for ticker in date range",
"Semantic similarity: find similar articles using vector search",
"Sentiment filtering: articles by sentiment type and confidence",
"Batch operations: efficient upsert of daily collection results"
]
},
"router": {
"status": "not_needed",
"reason": "News Analysts access via AgentToolkit anti-corruption layer, no direct REST API required"
},
"events": {
"status": "not_applicable",
"reason": "Scheduled batch processing, no real-time event requirements"
},
"dependencies": {
"external": [
"OpenRouter API (existing TradingAgentsConfig integration)",
"OpenRouter embeddings models (existing TradingAgentsConfig integration)",
"APScheduler (new dependency for job scheduling)"
],
"internal": [
"Existing NewsService (95% complete)",
"Existing NewsRepository with async PostgreSQL patterns",
"Existing GoogleNewsClient and ArticleScraperClient",
"DatabaseManager for connection management",
"TradingAgentsConfig for LLM and API configuration"
]
}
}
}