127 lines
5.5 KiB
JSON
127 lines
5.5 KiB
JSON
{
|
|
"requirements": {
|
|
"entities": {
|
|
"NewsArticle": "Existing domain entity, enhance with structured sentiment and vector embedding support",
|
|
"NewsJobConfig": "New configuration entity for scheduled job parameters (tickers, schedule, model settings)"
|
|
},
|
|
"data_persistence": {
|
|
"news_articles_table": "Existing table with vector embedding columns, enhance sentiment_score JSONB column",
|
|
"vector_indexes": "pgvectorscale indexes for title_embedding and content_embedding (1536 dimensions)",
|
|
"data_flows": [
|
|
"APScheduler → NewsService.update_company_news() → NewsRepository.upsert_batch()",
|
|
"ArticleData → OpenRouter API → structured sentiment → NewsArticle entity",
|
|
"Article content → OpenRouter embeddings API → pgvectorscale storage"
|
|
]
|
|
},
|
|
"api_needed": {
|
|
"external_apis": [
|
|
"OpenRouter for LLM sentiment analysis using quick_think_llm",
|
|
"OpenRouter for embeddings using text-embedding models",
|
|
"Existing GoogleNewsClient and ArticleScraperClient"
|
|
],
|
|
"internal_apis": [
|
|
"Enhanced NewsService.update_company_news() method",
|
|
"New NewsRepository.find_similar_articles() for semantic search",
|
|
"New ScheduledNewsCollector job orchestration class"
|
|
]
|
|
},
|
|
"components": {
|
|
"scheduler": "APScheduler integration for daily news collection",
|
|
"sentiment_analyzer": "OpenRouter LLM client for structured sentiment analysis",
|
|
"embedding_generator": "OpenRouter embeddings client for vector generation",
|
|
"job_orchestrator": "ScheduledNewsCollector class for job coordination"
|
|
},
|
|
"domains": {
|
|
"primary": "news (completing final 5%)",
|
|
"integration": "Leverages existing Router → Service → Repository → Entity → Database pattern"
|
|
},
|
|
"business_rules": [
|
|
"Best-effort sentiment analysis - LLM failures don't block article storage",
|
|
"URL-based deduplication using existing NewsRepository patterns",
|
|
"Paywall resilience via existing ArticleScraperClient graceful degradation",
|
|
"Date filtering: articles within last 30 days only",
|
|
"Sentiment confidence threshold: 0.5 minimum for reliable scores",
|
|
"Content length limits: 8000 chars for embedding generation",
|
|
"Embedding generation: Both title and content vectors required"
|
|
]
|
|
},
|
|
"technical_needs": {
|
|
"domain_model": {
|
|
"entities": {
|
|
"NewsArticle": {
|
|
"status": "exists_needs_enhancement",
|
|
"enhancements": [
|
|
"Structured sentiment JSON format: {sentiment: positive|negative|neutral, confidence: 0.0-1.0, reasoning: string}",
|
|
"Vector embedding support for title and content (1536 dimensions)",
|
|
"Enhanced validation for sentiment confidence thresholds"
|
|
]
|
|
},
|
|
"NewsJobConfig": {
|
|
"status": "new_entity",
|
|
"fields": ["tickers: list[str]", "schedule_hour: int", "sentiment_model: str", "embedding_model: str", "max_articles_per_ticker: int"],
|
|
"validation": "Schedule hour 0-23, max articles 50-500 range"
|
|
}
|
|
},
|
|
"services": {
|
|
"NewsService": {
|
|
"status": "exists_needs_enhancement",
|
|
"enhancements": [
|
|
"Integrate LLM sentiment analysis in update methods",
|
|
"Add vector embedding generation pipeline",
|
|
"Enhanced error handling for LLM and embedding failures"
|
|
]
|
|
},
|
|
"ScheduledNewsCollector": {
|
|
"status": "new_service",
|
|
"responsibilities": [
|
|
"Orchestrate daily news collection jobs",
|
|
"Manage job configuration and scheduling",
|
|
"Monitor job execution and handle failures",
|
|
"Integrate with existing NewsService methods"
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"persistence": {
|
|
"database": "PostgreSQL + TimescaleDB + pgvectorscale",
|
|
"schema_updates": {
|
|
"news_articles": {
|
|
"existing_columns": "headline, url, source, published_date, summary, entities, sentiment_score, author, category, title_embedding, content_embedding",
|
|
"modifications": [
|
|
"Enhance sentiment_score JSONB to support structured format",
|
|
"Add vector similarity indexes for title_embedding and content_embedding",
|
|
"Add composite index on (symbol, published_date) for News Analyst queries"
|
|
]
|
|
}
|
|
},
|
|
"access_patterns": [
|
|
"Time-based queries: articles for ticker in date range",
|
|
"Semantic similarity: find similar articles using vector search",
|
|
"Sentiment filtering: articles by sentiment type and confidence",
|
|
"Batch operations: efficient upsert of daily collection results"
|
|
]
|
|
},
|
|
"router": {
|
|
"status": "not_needed",
|
|
"reason": "News Analysts access via AgentToolkit anti-corruption layer, no direct REST API required"
|
|
},
|
|
"events": {
|
|
"status": "not_applicable",
|
|
"reason": "Scheduled batch processing, no real-time event requirements"
|
|
},
|
|
"dependencies": {
|
|
"external": [
|
|
"OpenRouter API (existing TradingAgentsConfig integration)",
|
|
"OpenRouter embeddings models (existing TradingAgentsConfig integration)",
|
|
"APScheduler (new dependency for job scheduling)"
|
|
],
|
|
"internal": [
|
|
"Existing NewsService (95% complete)",
|
|
"Existing NewsRepository with async PostgreSQL patterns",
|
|
"Existing GoogleNewsClient and ArticleScraperClient",
|
|
"DatabaseManager for connection management",
|
|
"TradingAgentsConfig for LLM and API configuration"
|
|
]
|
|
}
|
|
}
|
|
} |