diff --git a/.gitignore b/.gitignore index 4ebf99e3..3a9fd4bc 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ eval_results/ eval_data/ *.egg-info/ .env +.coverage diff --git a/.mise.toml b/.mise.toml index e06c5804..50fc17a1 100644 --- a/.mise.toml +++ b/.mise.toml @@ -37,6 +37,11 @@ description = "Run tests with pytest (with database)" depends = ["docker"] run = "uv run pytest" +[tasks.coverage] +description = "Run tests with coverage report" +depends = ["docker"] +run = "uv run pytest --cov=tradingagents --cov-report=html --cov-report=term-missing" + [tasks.lint] description = "Run ruff linting" run = "ruff check ." @@ -57,6 +62,11 @@ run = "ruff check --fix ." description = "Run format, lint, and typecheck" run = ["ruff format .", "ruff check .", "uv run pyrefly check ."] +[tasks.quality] +description = "Run complete quality check (format, lint, typecheck, test, coverage)" +depends = ["docker"] +run = ["ruff format .", "ruff check .", "uv run pyrefly check .", "uv run pytest --cov=tradingagents --cov-report=term-missing"] + [tasks.clean] description = "Clean up cache and build artifacts" run = [ diff --git a/docker-compose.yml b/docker-compose.yml index 375f1d0b..451fc662 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,6 @@ services: timescaledb: - build: ./db + build: ./docker/db container_name: tradingagents_timescaledb environment: POSTGRES_USER: postgres diff --git a/docker/db/Dockerfile b/docker/db/Dockerfile index 6753d9d7..56adc164 100644 --- a/docker/db/Dockerfile +++ b/docker/db/Dockerfile @@ -16,12 +16,27 @@ RUN echo "deb https://packagecloud.io/timescale/timescaledb/debian/ $(lsb_releas # Install TimescaleDB for PostgreSQL 16 RUN apt-get install -y timescaledb-2-postgresql-17 -# Install pgxman +# Install pgvector manually for PostgreSQL 17 +RUN apt-get update && apt-get install -y \ + build-essential \ + git \ + postgresql-server-dev-17 \ + && rm -rf /var/lib/apt/lists/* + +# Clone and build pgvector +RUN cd /tmp && \ + git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git && \ + cd pgvector && \ + make && \ + make install && \ + cd / && \ + rm -rf /tmp/pgvector + +# Install pgxman for pgvectorscale RUN curl -sfL https://install.pgx.sh | sh - -# Install pgvector and pgvectorscale using pgxman -RUN pgxman install pgvector || echo "pgvector install failed" \ - && pgxman install pgvectorscale || echo "pgvectorscale install failed" +# Install pgvectorscale using pgxman +RUN pgxman install pgvectorscale || echo "pgvectorscale install failed" # Configure PostgreSQL for TimescaleDB (instead of using timescaledb-tune) RUN echo "shared_preload_libraries = 'timescaledb'" >> /usr/share/postgresql/postgresql.conf.sample \ diff --git a/docs/product/roadmap.md b/docs/product/roadmap.md index ca80edd4..538e2674 100644 --- a/docs/product/roadmap.md +++ b/docs/product/roadmap.md @@ -4,156 +4,183 @@ This roadmap outlines the technical development path for the personal fork of TradingAgents, focusing on building a robust data infrastructure with PostgreSQL + TimescaleDB + pgvectorscale, implementing RAG-powered agents, and establishing automated data collection pipelines with Dagster. -## Current Status: Phase 1 - News Domain (95% Complete) +**Last Updated**: 2025-11-11 -The foundation has been established with core domain architecture, comprehensive testing framework, and the news domain nearly complete. +### Key Roadmap Changes +- **Pragmatic Dagster Integration**: Dagster jobs built incrementally per domain (not separate phase) +- **Accurate Timeline**: 10-14 weeks total (vs original 16-22 weeks) based on actual progress +- **Incremental Automation**: Each domain gets automated collection as it completes +- **Earlier Production Readiness**: Automated data collection starts Week 1 (not Month 4) + +### Development Velocity +- **Observed Completion Rate**: News clients 85-90% complete with 600+ lines of quality tests +- **AI-Assisted Multiplier**: 3-4x faster development with spec-driven workflow +- **Target Task Velocity**: 15-20 tasks/week with AI assistance +- **Test Coverage**: Maintained 85%+ with pytest-vcr pattern + +## Current Status: Phase 1 - News Domain + Dagster Integration (85% Complete) + +The foundation has been established with core domain architecture, comprehensive testing framework, and the news domain clients complete. ### Completed Infrastructure - **Domain Architecture**: Clean separation of news, marketdata, and socialmedia domains - **Testing Framework**: Pragmatic TDD with 85%+ coverage, pytest-vcr for HTTP mocking -- **Repository Pattern**: Efficient data caching and management system -- **News Domain**: Article scraping, sentiment analysis, and storage (95% complete) +- **News Clients**: Google News RSS + Article Scraper with comprehensive tests (600+ lines) +- **Database Stack**: PostgreSQL + TimescaleDB + pgvectorscale ready - **Basic Agent System**: Multi-agent trading analysis framework with LangGraph +### Current Priorities (Next 5-7 Days) +1. **Complete News Domain Foundation** - Repository, Service, Entity layers +2. **LLM Integration** - OpenRouter sentiment analysis + vector embeddings +3. **Basic Dagster Job** - Automated daily news collection +4. **Spec Documentation** - Create status.md and tasks.md for progress tracking + ## Development Phases -### Phase 1: News Domain Completion (Current - 95% Complete) -**Timeline**: 2-3 weeks +### Phase 1: News Domain + Basic Dagster (Current - 85% Complete) +**Timeline**: 5-7 days remaining **Status**: 🔄 In Progress -#### Remaining Work -- **News Processing Pipeline**: Complete article content processing and deduplication -- **Sentiment Analysis Optimization**: Fine-tune sentiment scoring algorithms -- **News Repository**: Finalize PostgreSQL integration for news storage -- **Testing Coverage**: Achieve 85%+ test coverage for news domain -- **Performance Optimization**: Optimize news retrieval and search performance +#### Remaining Work (5-7 days) +- **News Repository Layer**: PostgreSQL async operations with TimescaleDB (1-2 days) +- **News Service Layer**: Business logic with LLM integration (1-2 days) +- **NewsArticle Entity**: Domain models with sentiment and embeddings (1 day) +- **OpenRouter Integration**: Sentiment analysis via LLM (1-2 days) +- **Vector Embeddings**: OpenAI embeddings via OpenRouter for semantic search (1 day) +- **Basic Dagster Job**: Daily news collection automation (1-2 days) +- **Integration Testing**: End-to-end workflow validation (1 day) + +#### Key Deliverables +- News domain following Router → Service → Repository → Entity → Database pattern +- OpenRouter LLM sentiment analysis operational +- pgvectorscale vector embeddings for semantic search +- Automated Dagster job for daily news collection +- 85%+ test coverage maintained #### Success Criteria -- ✅ All news APIs integrated and tested -- ✅ Sentiment analysis producing consistent scores -- ✅ News data properly stored in PostgreSQL -- ✅ Comprehensive test suite covering edge cases -- ✅ News domain ready for RAG integration +- ✅ Complete layered architecture implemented +- ✅ LLM sentiment scores with confidence ratings +- ✅ Vector embeddings enabling semantic search +- ✅ Dagster job running daily news collection +- ✅ Query performance < 2 seconds +- ✅ News domain ready for agent integration -### Phase 2: Market Data Domain + PostgreSQL Migration (Next Priority) -**Timeline**: 4-6 weeks +### Phase 2: Market Data Domain + Dagster Integration (Next Priority) +**Timeline**: 4-5 weeks **Status**: 📋 Planned #### Core Objectives -- **TimescaleDB Integration**: Implement hypertables for efficient time-series storage -- **Market Data Collection**: Complete price, volume, and technical indicator collection -- **PostgreSQL Migration**: Move all data persistence from file-based to PostgreSQL -- **Technical Analysis**: Implement MACD, RSI, and other technical indicators -- **Database Schema**: Design optimized schema for market data with proper indexing +- **TimescaleDB Hypertables**: Efficient time-series storage for price/volume data +- **Market Data Collection**: FinnHub/yfinance integration with retry logic +- **PostgreSQL Migration**: Move from file-based to database storage +- **Technical Indicators**: MACD, RSI, Bollinger Bands calculations +- **Dagster Market Data Job**: Twice-daily price data collection automation +- **Performance Optimization**: Sub-100ms queries with proper indexing #### Key Deliverables -- Market data repository with TimescaleDB optimization -- Real-time and historical price data collection -- Technical analysis calculation engine -- Migration scripts for moving existing data +- MarketDataRepository with TimescaleDB optimization +- MarketDataService with technical analysis calculations +- MarketData entities (Price, OHLCV, TechnicalIndicators) +- Dagster job for automated twice-daily collection +- pytest-vcr tests for API clients - Performance benchmarks for time-series queries #### Success Criteria -- ✅ Market data efficiently stored in TimescaleDB hypertables -- ✅ Sub-100ms queries for common market data retrievals -- ✅ All technical indicators calculating accurately +- ✅ TimescaleDB hypertables storing historical price data +- ✅ Sub-100ms queries for price lookups and indicators +- ✅ Technical indicators calculating accurately +- ✅ Dagster job running twice daily (market open/close) - ✅ Complete migration from file-based storage - ✅ Market data domain ready for agent integration -### Phase 3: Social Media Domain (Following Phase 2) -**Timeline**: 3-4 weeks +### Phase 3: Social Media Domain + Dagster Integration +**Timeline**: 2-3 weeks **Status**: 📋 Planned #### Core Objectives -- **Reddit Integration**: Implement Reddit API for financial subreddits -- **Twitter/X Integration**: Add social sentiment from Twitter feeds -- **Social Sentiment Analysis**: Aggregate sentiment scoring across platforms +- **Reddit Integration**: PRAW library for financial subreddits (r/wallstreetbets, r/stocks) +- **Twitter/X Alternative**: Evaluate Reddit-only approach or alternative sources +- **Social Sentiment Analysis**: OpenRouter LLM sentiment across posts - **Cross-Domain Relations**: Link social sentiment to market data and news -- **pgvectorscale Preparation**: Prepare social data for vector search +- **Dagster Social Media Job**: Daily social sentiment collection +- **Vector Embeddings**: Semantic search across social discussions #### Key Deliverables -- Reddit and Twitter data collection clients -- Social sentiment aggregation algorithms -- Social media data repository with PostgreSQL storage -- Cross-domain correlation analysis tools -- Foundation for RAG implementation +- RedditClient with pytest-vcr tests +- SocialMediaRepository with PostgreSQL + pgvectorscale +- SocialMediaService with sentiment aggregation +- Dagster job for daily Reddit data collection +- Cross-domain correlation queries (social ↔ news ↔ price) +- Vector embeddings for semantic post search #### Success Criteria -- ✅ Social media data collected from multiple sources +- ✅ Reddit data collected daily from financial subreddits - ✅ Sentiment scores integrated with market events -- ✅ Cross-domain relationships established in database -- ✅ Social media domain ready for RAG enhancement +- ✅ Cross-domain relationships queryable in database +- ✅ Dagster job running daily social collection +- ✅ Vector embeddings enabling semantic social search - ✅ Three-domain architecture complete -### Phase 4: Dagster Data Collection Orchestration -**Timeline**: 3-4 weeks +#### Blockers to Resolve +- **Reddit API Access**: Obtain REDDIT_CLIENT_ID and REDDIT_CLIENT_SECRET +- **Twitter/X Alternative**: Evaluate API costs or alternative data sources + +### Phase 4: RAG Enhancement + Advanced Orchestration +**Timeline**: 3-4 weeks **Status**: 📋 Planned #### Core Objectives -- **Pipeline Architecture**: Design daily/twice-daily data collection workflows -- **Data Quality Monitoring**: Implement validation and gap detection -- **Automated Backfill**: Handle missing data and API failures gracefully -- **Performance Monitoring**: Track pipeline health and data freshness -- **Alerting System**: Notify on pipeline failures or data quality issues +- **RAG Agent Enhancement**: All agents use vector similarity search for context +- **Historical Pattern Matching**: Semantic search for comparable market scenarios +- **Cross-Domain RAG**: Agents query across news, price, and social data +- **Advanced Dagster Features**: Data quality monitoring, gap detection, backfill +- **Performance Optimization**: Vector query tuning, database optimization +- **Monitoring & Alerting**: Pipeline health tracking and failure notifications #### Key Deliverables -- Dagster asset definitions for all data domains -- Automated data quality checks and validation -- Gap detection and backfill capabilities +- RAG-enhanced agents with similarity-based context retrieval +- Cross-domain vector search (find similar market conditions) +- Dagster data quality checks and validation +- Automated backfill for missing historical data - Monitoring dashboard for pipeline health -- Comprehensive logging and error handling - -#### Success Criteria -- ✅ Fully automated data collection running daily -- ✅ Data quality monitoring with automated alerts -- ✅ Zero-downtime pipeline updates and maintenance -- ✅ Historical data gaps automatically detected and filled -- ✅ Pipeline performance metrics tracked and optimized - -### Phase 5: RAG Implementation + OpenRouter Migration -**Timeline**: 4-5 weeks -**Status**: 📋 Planned - -#### Core Objectives -- **pgvectorscale Integration**: Implement vector storage for historical patterns -- **RAG Agent Enhancement**: Agents use similarity search for context -- **OpenRouter Migration**: Complete migration to unified LLM provider -- **Historical Context**: Agents reference past decisions and market conditions -- **Pattern Recognition**: Semantic similarity for comparable market scenarios - -#### Key Deliverables -- pgvectorscale extension configured and optimized -- Vector embeddings for all historical data -- RAG-enhanced agent decision making -- OpenRouter integration replacing all LLM providers -- Similarity search for historical pattern matching +- Performance benchmarks for vector queries (< 50ms target) #### Success Criteria - ✅ All agents using RAG for contextual decisions -- ✅ Vector search performing sub-50ms similarity queries -- ✅ OpenRouter as sole LLM provider across all agents -- ✅ Agents demonstrating improved decision accuracy -- ✅ Historical pattern matching enhancing trading analysis +- ✅ Vector similarity search < 50ms across all domains +- ✅ Cross-domain queries enabling holistic analysis +- ✅ Dagster monitoring with automated alerts +- ✅ Data quality metrics tracked and reported +- ✅ Historical gaps detected and auto-filled +- ✅ Production-ready data infrastructure complete ## Technical Milestones +### Revised Timeline: 10-14 weeks (vs original 16-22 weeks) + +**Phase Breakdown:** +- Phase 1 (News + Dagster): 5-7 days +- Phase 2 (Market Data + Dagster): 4-5 weeks +- Phase 3 (Social Media + Dagster): 2-3 weeks +- Phase 4 (RAG + Advanced Orchestration): 3-4 weeks + ### Database Architecture -- **Month 1**: Complete PostgreSQL foundation with news domain -- **Month 2**: TimescaleDB hypertables optimized for market data -- **Month 3**: pgvectorscale configured for RAG implementation -- **Month 4**: Full database optimization and performance tuning +- **Week 1**: PostgreSQL + TimescaleDB + pgvectorscale operational (News domain) +- **Week 6**: TimescaleDB hypertables optimized for market data time-series +- **Week 9**: Three-domain database architecture complete with vector embeddings +- **Week 12**: Full RAG implementation with cross-domain similarity search ### Agent Capabilities -- **Month 1**: Basic multi-agent framework operational -- **Month 2**: Agents using PostgreSQL for all data access -- **Month 3**: Cross-domain agent collaboration established -- **Month 4**: RAG-powered agents with historical context +- **Week 1**: News Analysts accessing news with LLM sentiment +- **Week 6**: Technical Analysts using market data with indicators +- **Week 9**: Sentiment Analysts using social media data +- **Week 12**: All agents RAG-enhanced with historical context -### Data Pipeline Maturity -- **Month 1**: Manual data collection with basic automation -- **Month 2**: Automated collection for market data -- **Month 3**: Full three-domain automated collection -- **Month 4**: Production-grade pipeline with monitoring and alerting +### Data Pipeline Maturity (Incremental Dagster) +- **Week 1**: Daily news collection automated via Dagster +- **Week 6**: Twice-daily market data collection automated +- **Week 9**: Daily social media collection automated +- **Week 12**: Production-grade orchestration with monitoring, backfill, and alerting ## Success Metrics diff --git a/docs/specs/news/design.md b/docs/specs/news/design.md index bfcd8b7f..0974a603 100644 --- a/docs/specs/news/design.md +++ b/docs/specs/news/design.md @@ -2,7 +2,7 @@ ## Overview -This document details the technical design for completing the final 5% of the News domain implementation. The existing infrastructure is 95% complete with Google News collection, article scraping, and basic storage implemented. The remaining work focuses on **scheduled execution**, **LLM-powered sentiment analysis**, and **vector embeddings** using OpenRouter as the unified LLM provider. +This document details the technical design for completing the final 5% of the News domain implementation. The existing infrastructure is 95% complete with Google News collection, article scraping, and basic storage implemented. The remaining work focuses on **Dagster-orchestrated scheduled execution**, **LLM-powered sentiment analysis**, and **vector embeddings** using OpenRouter as the unified LLM provider. ## Architecture Overview @@ -10,33 +10,38 @@ This document details the technical design for completing the final 5% of the Ne ```mermaid graph TD - A[APScheduler] --> B[ScheduledNewsCollector] - B --> C[NewsService] - C --> D[GoogleNewsClient] - C --> E[ArticleScraperClient] - C --> F[OpenRouter LLM Client] - C --> G[OpenRouter Embeddings Client] - C --> H[NewsRepository] - H --> I[PostgreSQL + TimescaleDB + pgvectorscale] - - J[News Analysts] --> K[AgentToolkit] - K --> C - K --> H + A[Dagster Scheduler] --> B[Dagster Job: news_collection_daily] + B --> C[Dagster Op: collect_news_for_symbol] + C --> D[NewsService] + D --> E[GoogleNewsClient] + D --> F[ArticleScraperClient] + D --> G[OpenRouter Sentiment Client] + D --> H[OpenRouter Embeddings Client] + D --> I[NewsRepository] + I --> J[PostgreSQL + TimescaleDB + pgvectorscale] + + K[News Analysts] --> L[AgentToolkit] + L --> D + L --> I ``` ### Data Flow Architecture -1. **Scheduled Collection Flow** +1. **Scheduled Collection Flow (Dagster)** ``` - APScheduler → ScheduledNewsCollector → NewsService.update_company_news() - → GoogleNewsClient → ArticleScraperClient → OpenRouter (sentiment + embeddings) + Dagster Schedule → Dagster Job → Dagster Op (per symbol) + → NewsService.update_company_news() + → GoogleNewsClient (RSS) → ArticleScraperClient (content) + → OpenRouter (sentiment + embeddings) → NewsRepository.upsert_batch() → PostgreSQL ``` -2. **Agent Query Flow** +2. **Agent Query Flow (RAG)** ``` - News Analyst → AgentToolkit → NewsService.find_relevant_articles() - → NewsRepository (semantic search) → pgvectorscale vector similarity + News Analyst → AgentToolkit → NewsService.find_similar_news() + → NewsRepository.find_similar_articles() + → pgvectorscale vector similarity (cosine distance) + → Return ranked results with sentiment ``` ### Key Design Principles @@ -44,903 +49,1065 @@ graph TD - **Leverage Existing 95%**: Build on proven GoogleNewsClient and ArticleScraperClient infrastructure - **OpenRouter Unified**: Single API for both sentiment analysis and embeddings - **Best-Effort Processing**: LLM failures don't block article storage -- **Vector-Enhanced Search**: Semantic similarity for News Analysts -- **Fault-Tolerant Scheduling**: Robust error handling and monitoring +- **Vector-Enhanced Search**: Semantic similarity for News Analysts via RAG +- **Dagster Orchestration**: Fault-tolerant scheduling with built-in monitoring and alerting +- **Layered Architecture**: Entity → Repository → Service → Dagster Op → Dagster Job ## Domain Model -### Enhanced NewsArticle Entity +### Enhanced NewsArticle Dataclass -The existing `NewsArticle` entity requires enhancements for structured sentiment and vector support: +The existing `NewsArticle` dataclass requires enhancements for LLM sentiment and vector support: ```python -from typing import Optional, Dict, Any, List -from pydantic import BaseModel, Field, validator -import datetime +from dataclasses import dataclass, field +from datetime import date +from typing import Optional, List -class SentimentScore(BaseModel): - """Structured sentiment analysis result""" - sentiment: Literal["positive", "negative", "neutral"] - confidence: float = Field(ge=0.0, le=1.0) - reasoning: str - - @validator('confidence') - def validate_confidence(cls, v): - if v < 0.5: - raise ValueError("Confidence must be >= 0.5 for reliable sentiment") - return v +@dataclass +class NewsArticle: + """Represents a news article with sentiment and embeddings.""" -class NewsArticle(BaseModel): - """Enhanced NewsArticle entity with sentiment and vector support""" # Existing fields (95% complete) headline: str - url: str = Field(..., regex=r'^https?://') - source: str - published_date: datetime.datetime + url: str # Unique identifier for deduplication + source: str # "Google News", "Finnhub", etc. + published_date: date + + # Optional existing fields summary: Optional[str] = None - entities: List[str] = Field(default_factory=list) + entities: List[str] = field(default_factory=list) author: Optional[str] = None category: Optional[str] = None - - # Enhanced fields (final 5%) - sentiment_score: Optional[SentimentScore] = None - title_embedding: Optional[List[float]] = Field(None, min_items=1536, max_items=1536) - content_embedding: Optional[List[float]] = Field(None, min_items=1536, max_items=1536) - - # Metadata - created_at: datetime.datetime = Field(default_factory=datetime.datetime.now) - updated_at: datetime.datetime = Field(default_factory=datetime.datetime.now) - - @validator('content_embedding', 'title_embedding') - def validate_embeddings(cls, v): - if v and len(v) != 1536: - raise ValueError("Embeddings must be 1536 dimensions for OpenRouter compatibility") - return v - + + # Enhanced fields (final 5% - LLM sentiment) + sentiment_score: Optional[float] = None # -1.0 to 1.0 + sentiment_confidence: Optional[float] = None # 0.0 to 1.0 + sentiment_label: Optional[str] = None # "positive", "negative", "neutral" + + # Enhanced fields (final 5% - vector embeddings) + title_embedding: Optional[List[float]] = None # 1536 dimensions + content_embedding: Optional[List[float]] = None # 1536 dimensions + + def to_entity(self, symbol: Optional[str] = None) -> NewsArticleEntity: + """Convert NewsArticle dataclass to NewsArticleEntity SQLAlchemy model.""" + return NewsArticleEntity( + headline=self.headline, + url=self.url, + source=self.source, + published_date=self.published_date, + summary=self.summary, + entities=self.entities if self.entities else None, + sentiment_score=self.sentiment_score, + sentiment_confidence=self.sentiment_confidence, + sentiment_label=self.sentiment_label, + author=self.author, + category=self.category, + symbol=symbol, + title_embedding=self.title_embedding, + content_embedding=self.content_embedding, + ) + + @staticmethod + def from_entity(entity: NewsArticleEntity) -> 'NewsArticle': + """Convert NewsArticleEntity SQLAlchemy model to NewsArticle dataclass.""" + return NewsArticle( + headline=entity.headline, + url=entity.url, + source=entity.source, + published_date=entity.published_date, + summary=entity.summary, + entities=entity.entities or [], + sentiment_score=entity.sentiment_score, + sentiment_confidence=entity.sentiment_confidence, + sentiment_label=entity.sentiment_label, + author=entity.author, + category=entity.category, + title_embedding=entity.title_embedding, + content_embedding=entity.content_embedding, + ) + def has_reliable_sentiment(self) -> bool: - """Check if sentiment analysis is reliable (confidence >= 0.5)""" - return bool(self.sentiment_score and self.sentiment_score.confidence >= 0.5) - - def to_record(self) -> Dict[str, Any]: - """Convert to database record format""" - record = self.dict() - # Convert sentiment to JSONB format - if self.sentiment_score: - record['sentiment_score'] = self.sentiment_score.dict() - return record - - @classmethod - def from_record(cls, record: Dict[str, Any]) -> 'NewsArticle': - """Create entity from database record""" - if record.get('sentiment_score'): - record['sentiment_score'] = SentimentScore(**record['sentiment_score']) - return cls(**record) + """Check if sentiment analysis is reliable (confidence >= 0.6).""" + return bool( + self.sentiment_score is not None + and self.sentiment_confidence is not None + and self.sentiment_confidence >= 0.6 + ) ``` -### New NewsJobConfig Entity +### NewsArticleEntity SQLAlchemy Model -Configuration entity for scheduled news collection: +The existing SQLAlchemy model already has vector embedding columns. We need to add sentiment fields: ```python -from pydantic import BaseModel, Field, validator -from typing import List +from sqlalchemy import Float, String, Text, DateTime, Date, JSON, Index, func +from sqlalchemy.dialects.postgresql import UUID as PG_UUID +from sqlalchemy.orm import Mapped, mapped_column +from pgvector.sqlalchemy import Vector +import uuid +from datetime import datetime, date -class NewsJobConfig(BaseModel): - """Configuration for scheduled news collection jobs""" - tickers: List[str] = Field(..., min_items=1, max_items=50) - schedule_hour: int = Field(..., ge=0, le=23) - sentiment_model: str = Field(default="anthropic/claude-3.5-haiku") - embedding_model: str = Field(default="text-embedding-3-large") - max_articles_per_ticker: int = Field(default=20, ge=5, le=100) - lookback_days: int = Field(default=7, ge=1, le=30) - - @validator('tickers') - def validate_tickers(cls, v): - # Ensure uppercase stock symbols - return [ticker.upper().strip() for ticker in v] - - @validator('sentiment_model') - def validate_sentiment_model(cls, v): - # Ensure OpenRouter model format - if '/' not in v: - raise ValueError("Model must be in OpenRouter format (provider/model)") - return v - - def to_cron_expression(self) -> str: - """Convert to cron expression for APScheduler""" - return f"0 {self.schedule_hour} * * *" # Daily at specified hour +class NewsArticleEntity(Base): + """SQLAlchemy model for news articles with vector embedding support.""" + + __tablename__ = "news_articles" + __table_args__ = ( + Index("idx_symbol_date", "symbol", "published_date"), + Index("idx_published_date", "published_date"), + Index("idx_url_unique", "url", unique=True), + # Vector index for pgvectorscale similarity search + Index("idx_title_embedding_vector", "title_embedding", postgresql_using="ivfflat"), + ) + + # Primary key + id: Mapped[uuid.UUID] = mapped_column(PG_UUID(as_uuid=True), primary_key=True, default=uuid7) + + # Core article fields + headline: Mapped[str] = mapped_column(Text, nullable=False) + url: Mapped[str] = mapped_column(Text, nullable=False, unique=True) + source: Mapped[str] = mapped_column(String(100), nullable=False) + published_date: Mapped[date] = mapped_column(Date, nullable=False, index=True) + + # Optional fields + summary: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + entities: Mapped[Optional[List[str]]] = mapped_column(JSON, nullable=True) + author: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) + category: Mapped[Optional[str]] = mapped_column(String(100), nullable=True) + symbol: Mapped[Optional[str]] = mapped_column(String(20), index=True, nullable=True) + + # LLM sentiment fields (NEW) + sentiment_score: Mapped[Optional[float]] = mapped_column(Float, nullable=True) + sentiment_confidence: Mapped[Optional[float]] = mapped_column(Float, nullable=True) + sentiment_label: Mapped[Optional[str]] = mapped_column(String(20), nullable=True) + + # Vector embeddings (EXISTING - already in 95% complete infrastructure) + title_embedding: Mapped[Optional[List[float]]] = mapped_column(Vector(1536), nullable=True) + content_embedding: Mapped[Optional[List[float]]] = mapped_column(Vector(1536), nullable=True) + + # Audit timestamps + created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now()) + updated_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now(), onupdate=func.now()) ``` -## Database Design +## Data Access Layer -### Schema Enhancements +### NewsRepository Enhancements -The existing `news_articles` table requires minimal modifications to support the final 5%: - -```sql --- Existing table structure (95% complete) -CREATE TABLE IF NOT EXISTS news_articles ( - id SERIAL PRIMARY KEY, - headline TEXT NOT NULL, - url TEXT UNIQUE NOT NULL, - source TEXT NOT NULL, - published_date TIMESTAMPTZ NOT NULL, - summary TEXT, - entities TEXT[] DEFAULT '{}', - sentiment_score JSONB, -- Enhanced for structured format - author TEXT, - category TEXT, - title_embedding vector(1536), -- New: pgvectorscale vector type - content_embedding vector(1536), -- New: pgvectorscale vector type - created_at TIMESTAMPTZ DEFAULT NOW(), - updated_at TIMESTAMPTZ DEFAULT NOW() -); - --- New indexes for final 5% performance -CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_news_articles_symbol_date - ON news_articles (((entities)), published_date DESC); - -CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_news_articles_title_embedding - ON news_articles USING vectors (title_embedding vector_cosine_ops); - -CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_news_articles_content_embedding - ON news_articles USING vectors (content_embedding vector_cosine_ops); - -CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_news_articles_sentiment - ON news_articles (((sentiment_score->>'sentiment'))) - WHERE sentiment_score IS NOT NULL; -``` - -### Query Patterns - -**Time-based News Queries (News Analysts)** -```sql --- Optimized for Agent queries: recent news for specific ticker -SELECT headline, summary, sentiment_score, published_date -FROM news_articles -WHERE entities @> ARRAY[$1::text] - AND published_date >= NOW() - INTERVAL '30 days' -ORDER BY published_date DESC -LIMIT 20; -``` - -**Semantic Similarity Queries (Vector Search)** -```sql --- Find similar articles using pgvectorscale -SELECT headline, url, summary, - 1 - (title_embedding <=> $1::vector) AS similarity_score -FROM news_articles -WHERE entities @> ARRAY[$2::text] - AND title_embedding IS NOT NULL -ORDER BY title_embedding <=> $1::vector -LIMIT 10; -``` - -**Batch Upsert Operations (Daily Collection)** -```sql --- Efficient upsert for daily news collection -INSERT INTO news_articles (headline, url, source, published_date, summary, entities, sentiment_score, title_embedding, content_embedding) -VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) -ON CONFLICT (url) DO UPDATE SET - headline = EXCLUDED.headline, - summary = EXCLUDED.summary, - entities = EXCLUDED.entities, - sentiment_score = EXCLUDED.sentiment_score, - title_embedding = EXCLUDED.title_embedding, - content_embedding = EXCLUDED.content_embedding, - updated_at = NOW(); -``` - -## API Integration - -### OpenRouter Unified Client - -Single OpenRouter integration for both sentiment analysis and embeddings: +Add RAG-powered vector similarity search methods to the existing repository: ```python -from typing import List, Optional, Dict, Any -import httpx +class NewsRepository: + """Repository for news articles with vector similarity search.""" + + def __init__(self, database_manager: DatabaseManager): + self.db_manager = database_manager + + # ... existing methods (list, get, upsert, delete, list_by_date_range, upsert_batch) ... + + async def find_similar_articles( + self, + embedding: List[float], + limit: int = 10, + threshold: float = 0.7, + symbol: Optional[str] = None + ) -> List[NewsArticle]: + """ + Find articles similar to given embedding using pgvectorscale cosine distance. + + Args: + embedding: Query embedding vector (1536 dimensions) + limit: Maximum number of results to return + threshold: Minimum similarity score (0.0-1.0) + symbol: Optional symbol filter + + Returns: + List of NewsArticle objects ranked by similarity + """ + async with self.db_manager.get_session() as session: + # Cosine similarity: 1 - cosine_distance + # pgvectorscale operator: <=> for cosine distance + query = select( + NewsArticleEntity, + (1 - NewsArticleEntity.title_embedding.cosine_distance(embedding)).label('similarity') + ).filter( + NewsArticleEntity.title_embedding.is_not(None) + ) + + # Optional symbol filter + if symbol: + query = query.filter(NewsArticleEntity.symbol == symbol) + + # Filter by similarity threshold and order by similarity desc + query = query.filter( + (1 - NewsArticleEntity.title_embedding.cosine_distance(embedding)) >= threshold + ).order_by( + NewsArticleEntity.title_embedding.cosine_distance(embedding) + ).limit(limit) + + result = await session.execute(query) + rows = result.all() + + # Convert to NewsArticle dataclass + articles = [NewsArticle.from_entity(row[0]) for row in rows] + + logger.info(f"Found {len(articles)} similar articles (threshold={threshold})") + return articles + + async def batch_update_embeddings( + self, + article_embeddings: List[Tuple[uuid.UUID, List[float], List[float]]] + ) -> int: + """ + Efficiently batch update embeddings for multiple articles. + + Args: + article_embeddings: List of (article_id, title_embedding, content_embedding) tuples + + Returns: + Number of articles updated + """ + if not article_embeddings: + return 0 + + async with self.db_manager.get_session() as session: + # Use bulk update with PostgreSQL + stmt = update(NewsArticleEntity).where( + NewsArticleEntity.id == bindparam('article_id') + ).values( + title_embedding=bindparam('title_emb'), + content_embedding=bindparam('content_emb'), + updated_at=func.now() + ) + + # Prepare batch data + batch_data = [ + { + 'article_id': article_id, + 'title_emb': title_emb, + 'content_emb': content_emb + } + for article_id, title_emb, content_emb in article_embeddings + ] + + await session.execute(stmt, batch_data) + + logger.info(f"Batch updated embeddings for {len(article_embeddings)} articles") + return len(article_embeddings) +``` + +## Service Layer + +### OpenRouter LLM Clients + +#### Sentiment Analysis Client + +```python +from typing import Optional, Dict, Any +import aiohttp +import asyncio from tradingagents.config import TradingAgentsConfig -class OpenRouterClient: - """Unified OpenRouter client for sentiment analysis and embeddings""" - +@dataclass +class SentimentResult: + """Result from sentiment analysis.""" + score: float # -1.0 to 1.0 + confidence: float # 0.0 to 1.0 + label: str # "positive", "negative", "neutral" + reasoning: str + +class OpenRouterSentimentClient: + """Client for sentiment analysis via OpenRouter.""" + def __init__(self, config: TradingAgentsConfig): - self.config = config - self.base_url = "https://openrouter.ai/api/v1" - self.headers = { - "Authorization": f"Bearer {config.openrouter_api_key}", + self.api_key = config.openrouter_api_key + self.model = config.quick_think_llm # claude-3.5-haiku + self.base_url = "https://openrouter.ai/api/v1/chat/completions" + + async def analyze_sentiment( + self, + title: str, + content: str + ) -> SentimentResult: + """ + Analyze sentiment of news article using OpenRouter LLM. + + Args: + title: Article headline + content: Article content/summary + + Returns: + SentimentResult with score, confidence, label, and reasoning + """ + try: + prompt = self._build_sentiment_prompt(title, content) + response = await self._call_openrouter(prompt) + return self._parse_sentiment_response(response) + + except Exception as e: + logger.warning(f"OpenRouter sentiment analysis failed: {e}, using keyword fallback") + return self._fallback_sentiment(title, content) + + def _build_sentiment_prompt(self, title: str, content: str) -> str: + """Build structured prompt for sentiment analysis.""" + return f"""Analyze the financial sentiment of this news article. + +Title: {title} +Content: {content[:1000]}... + +Provide sentiment analysis as JSON: +{{ + "score": , + "confidence": , + "label": "", + "reasoning": "" +}} + +Focus on financial market implications.""" + + async def _call_openrouter(self, prompt: str) -> Dict[str, Any]: + """Call OpenRouter API with retry logic.""" + headers = { + "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json" } - - async def analyze_sentiment(self, text: str, model: Optional[str] = None) -> SentimentScore: - """Generate structured sentiment analysis using LLM""" - model = model or self.config.quick_think_llm - - prompt = f"""Analyze the sentiment of this news article text and respond with ONLY a JSON object: -Article: {text[:2000]} # Truncate for token limits - -Required JSON format: -{{ - "sentiment": "positive|negative|neutral", - "confidence": 0.0-1.0, - "reasoning": "brief explanation" -}}""" - payload = { - "model": model, + "model": self.model, "messages": [{"role": "user", "content": prompt}], - "temperature": 0.1, # Low temperature for consistent structured output - "max_tokens": 200 + "response_format": {"type": "json_object"} } - - async with httpx.AsyncClient() as client: - try: - response = await client.post( - f"{self.base_url}/chat/completions", - headers=self.headers, - json=payload, - timeout=30.0 - ) - response.raise_for_status() - - result = response.json() - content = result["choices"][0]["message"]["content"].strip() - - # Parse JSON response - import json - sentiment_data = json.loads(content) - return SentimentScore(**sentiment_data) - - except Exception as e: - # Best-effort: return neutral sentiment on failure - return SentimentScore( - sentiment="neutral", - confidence=0.3, # Below reliability threshold - reasoning=f"Analysis failed: {str(e)[:100]}" - ) - - async def generate_embeddings(self, texts: List[str], model: Optional[str] = None) -> List[List[float]]: - """Generate embeddings for multiple texts""" - model = model or "text-embedding-3-large" - - # Truncate texts to avoid token limits - truncated_texts = [text[:8000] for text in texts] - - payload = { - "model": model, - "input": truncated_texts - } - - async with httpx.AsyncClient() as client: - try: - response = await client.post( - f"{self.base_url}/embeddings", - headers=self.headers, - json=payload, - timeout=60.0 - ) - response.raise_for_status() - - result = response.json() - return [item["embedding"] for item in result["data"]] - - except Exception as e: - # Return None embeddings on failure (stored as NULL in DB) - return [None] * len(texts) + + async with aiohttp.ClientSession() as session: + for attempt in range(3): # Retry up to 3 times + try: + async with session.post( + self.base_url, + headers=headers, + json=payload, + timeout=aiohttp.ClientTimeout(total=30) + ) as response: + response.raise_for_status() + data = await response.json() + return json.loads(data['choices'][0]['message']['content']) + + except (aiohttp.ClientError, asyncio.TimeoutError) as e: + if attempt == 2: # Last attempt + raise + await asyncio.sleep(2 ** attempt) # Exponential backoff + + def _parse_sentiment_response(self, response: Dict[str, Any]) -> SentimentResult: + """Parse OpenRouter JSON response into SentimentResult.""" + return SentimentResult( + score=float(response['score']), + confidence=float(response['confidence']), + label=response['label'], + reasoning=response.get('reasoning', '') + ) + + def _fallback_sentiment(self, title: str, content: str) -> SentimentResult: + """Keyword-based fallback sentiment analysis.""" + text = f"{title} {content}".lower() + + positive_keywords = ['gain', 'up', 'rise', 'growth', 'profit', 'beat', 'success'] + negative_keywords = ['loss', 'down', 'fall', 'decline', 'miss', 'failure', 'concern'] + + pos_count = sum(1 for keyword in positive_keywords if keyword in text) + neg_count = sum(1 for keyword in negative_keywords if keyword in text) + + if pos_count > neg_count: + return SentimentResult(score=0.3, confidence=0.5, label="positive", reasoning="Keyword-based fallback") + elif neg_count > pos_count: + return SentimentResult(score=-0.3, confidence=0.5, label="negative", reasoning="Keyword-based fallback") + else: + return SentimentResult(score=0.0, confidence=0.5, label="neutral", reasoning="Keyword-based fallback") ``` -### Enhanced NewsService Integration +#### Embeddings Client -Update existing NewsService to integrate LLM capabilities: +```python +class OpenRouterEmbeddingsClient: + """Client for generating embeddings via OpenRouter.""" + + def __init__(self, config: TradingAgentsConfig): + self.api_key = config.openrouter_api_key + self.model = "openai/text-embedding-ada-002" # Via OpenRouter + self.base_url = "https://openrouter.ai/api/v1/embeddings" + + async def generate_embeddings(self, texts: List[str]) -> List[List[float]]: + """ + Generate embeddings for multiple texts. + + Args: + texts: List of text strings to embed + + Returns: + List of 1536-dimensional embedding vectors + """ + if not texts: + return [] + + try: + # Preprocess texts + processed_texts = [self._preprocess_text(text) for text in texts] + + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + + payload = { + "model": self.model, + "input": processed_texts + } + + async with aiohttp.ClientSession() as session: + async with session.post( + self.base_url, + headers=headers, + json=payload, + timeout=aiohttp.ClientTimeout(total=60) + ) as response: + response.raise_for_status() + data = await response.json() + + # Extract embeddings + embeddings = [item['embedding'] for item in data['data']] + + # Validate dimensions + for i, emb in enumerate(embeddings): + if len(emb) != 1536: + raise ValueError(f"Invalid embedding dimension at index {i}: {len(emb)}") + + return embeddings + + except Exception as e: + logger.error(f"Embeddings generation failed: {e}, using zero vectors") + # Return zero vectors as fallback + return [[0.0] * 1536 for _ in texts] + + async def generate_article_embeddings( + self, + article: NewsArticle + ) -> Tuple[List[float], List[float]]: + """ + Generate embeddings for article title and content. + + Args: + article: NewsArticle to generate embeddings for + + Returns: + Tuple of (title_embedding, content_embedding) + """ + texts = [] + + if article.headline: + texts.append(article.headline) + + if article.summary: + # Combine title and summary for comprehensive content embedding + combined = f"{article.headline} {article.summary}" + texts.append(combined) + + if not texts: + return [0.0] * 1536, [0.0] * 1536 + + embeddings = await self.generate_embeddings(texts) + + title_embedding = embeddings[0] if len(embeddings) > 0 else [0.0] * 1536 + content_embedding = embeddings[1] if len(embeddings) > 1 else [0.0] * 1536 + + return title_embedding, content_embedding + + def _preprocess_text(self, text: str) -> str: + """Preprocess text for optimal embedding generation.""" + # Remove extra whitespace + cleaned = " ".join(text.split()) + # Limit to 8000 characters (OpenAI embedding limit) + return cleaned[:8000] +``` + +### Enhanced NewsService + +Integrate LLM clients into the existing NewsService: ```python class NewsService: - """Enhanced NewsService with LLM sentiment and embeddings (final 5%)""" - - def __init__(self, - repository: NewsRepository, - google_client: GoogleNewsClient, - scraper_client: ArticleScraperClient, - openrouter_client: OpenRouterClient): - self.repository = repository + """Service for news data, sentiment analysis, and vector embeddings.""" + + def __init__( + self, + google_client: GoogleNewsClient, + repository: NewsRepository, + article_scraper: ArticleScraperClient, + sentiment_client: OpenRouterSentimentClient, + embeddings_client: OpenRouterEmbeddingsClient, + ): self.google_client = google_client - self.scraper_client = scraper_client - self.openrouter_client = openrouter_client - - async def update_company_news(self, - symbol: str, - lookback_days: int = 7, - max_articles: int = 20, - include_sentiment: bool = True, - include_embeddings: bool = True) -> List[NewsArticle]: - """Enhanced method with LLM sentiment analysis and embeddings""" - - # Step 1: Use existing 95% infrastructure for collection - cutoff_date = datetime.datetime.now() - datetime.timedelta(days=lookback_days) - - # Fetch from Google News (existing) - google_results = await self.google_client.fetch_company_news(symbol, max_articles) - - articles = [] - for result in google_results: - if result.published_date < cutoff_date: - continue - - # Scrape full content (existing) - scraped_content = await self.scraper_client.scrape_article(result.url) - - # Create base article (existing pattern) - article = NewsArticle( - headline=result.title, - url=result.url, - source=result.source, - published_date=result.published_date, - summary=scraped_content.summary if scraped_content else result.description, - entities=[symbol], - author=scraped_content.author if scraped_content else None - ) - - # Step 2: NEW - Add LLM sentiment analysis - if include_sentiment and scraped_content and scraped_content.content: - article.sentiment_score = await self.openrouter_client.analyze_sentiment( - scraped_content.content - ) - - articles.append(article) - - # Step 3: NEW - Batch generate embeddings - if include_embeddings and articles: - titles = [a.headline for a in articles] - contents = [a.summary or a.headline for a in articles] - - title_embeddings = await self.openrouter_client.generate_embeddings(titles) - content_embeddings = await self.openrouter_client.generate_embeddings(contents) - - for i, article in enumerate(articles): - if i < len(title_embeddings) and title_embeddings[i]: - article.title_embedding = title_embeddings[i] - if i < len(content_embeddings) and content_embeddings[i]: - article.content_embedding = content_embeddings[i] - - # Step 4: Batch persist (existing pattern) - await self.repository.upsert_batch(articles) - return articles - - async def find_similar_articles(self, - query_text: str, - symbol: Optional[str] = None, - limit: int = 10) -> List[NewsArticle]: - """NEW: Semantic similarity search for News Analysts""" - - # Generate query embedding - query_embeddings = await self.openrouter_client.generate_embeddings([query_text]) - if not query_embeddings[0]: - # Fallback to text search - return await self.repository.find_by_text_search(query_text, symbol, limit) - - return await self.repository.find_similar_articles( - query_embeddings[0], symbol, limit - ) -``` + self.repository = repository + self.article_scraper = article_scraper + self.sentiment_client = sentiment_client + self.embeddings_client = embeddings_client -## Job Scheduling Architecture + async def update_company_news(self, symbol: str) -> NewsUpdateResult: + """ + Update company news with full LLM enrichment pipeline. -### APScheduler Integration + Flow: + 1. Fetch RSS feed from Google News + 2. Scrape article content + 3. Generate LLM sentiment analysis + 4. Generate vector embeddings + 5. Store in PostgreSQL with embeddings -Robust scheduled execution using APScheduler: + Args: + symbol: Stock ticker symbol -```python -from apscheduler.schedulers.asyncio import AsyncIOScheduler -from apscheduler.jobstores.redis import RedisJobStore # Optional: persistent job store -from apscheduler.executors.asyncio import AsyncIOExecutor -import logging - -class ScheduledNewsCollector: - """Orchestrates scheduled news collection jobs""" - - def __init__(self, - news_service: NewsService, - config: TradingAgentsConfig, - job_config: NewsJobConfig): - self.news_service = news_service - self.config = config - self.job_config = job_config - - # Configure APScheduler - jobstores = { - 'default': {'type': 'memory'} # Use Redis for production - } - executors = { - 'default': AsyncIOExecutor(), - } - job_defaults = { - 'coalesce': False, # Don't combine missed jobs - 'max_instances': 1, # One job per ticker at a time - 'misfire_grace_time': 300 # 5 minute grace period - } - - self.scheduler = AsyncIOScheduler( - jobstores=jobstores, - executors=executors, - job_defaults=job_defaults, - timezone='UTC' - ) - - async def start(self): - """Start the scheduler and register jobs""" - - for ticker in self.job_config.tickers: - # Schedule daily collection for each ticker - self.scheduler.add_job( - func=self._collect_ticker_news, - trigger='cron', - hour=self.job_config.schedule_hour, - minute=0, - args=[ticker], - id=f"news_collection_{ticker}", - replace_existing=True, - max_instances=1 - ) - - self.scheduler.start() - logging.info(f"Started news collection scheduler for {len(self.job_config.tickers)} tickers") - - async def stop(self): - """Gracefully stop the scheduler""" - if self.scheduler.running: - self.scheduler.shutdown(wait=True) - - async def _collect_ticker_news(self, ticker: str): - """Execute news collection for a single ticker""" - - start_time = datetime.datetime.now() - + Returns: + NewsUpdateResult with statistics + """ try: - logging.info(f"Starting news collection for {ticker}") - - articles = await self.news_service.update_company_news( - symbol=ticker, - lookback_days=self.job_config.lookback_days, - max_articles=self.job_config.max_articles_per_ticker, - include_sentiment=True, - include_embeddings=True + logger.info(f"Updating company news for {symbol}") + + # 1. Get RSS feed data + google_articles = self.google_client.get_company_news(symbol) + + if not google_articles: + logger.warning(f"No articles found for {symbol}") + return NewsUpdateResult( + status="completed", + articles_found=0, + articles_scraped=0, + articles_failed=0, + symbol=symbol, + ) + + # 2. Scrape article content + scraped_articles = await self._scrape_articles(google_articles) + + # 3. Enrich with LLM sentiment and embeddings + enriched_articles = await self._enrich_articles(scraped_articles) + + # 4. Store in repository + stored_articles = await self.repository.upsert_batch(enriched_articles, symbol) + + logger.info(f"Completed news update for {symbol}: {len(stored_articles)} articles stored") + + return NewsUpdateResult( + status="completed", + articles_found=len(google_articles), + articles_scraped=len(scraped_articles), + articles_failed=len(google_articles) - len(scraped_articles), + symbol=symbol, ) - - # Log metrics - sentiment_count = sum(1 for a in articles if a.has_reliable_sentiment()) - embedding_count = sum(1 for a in articles if a.title_embedding) - - duration = (datetime.datetime.now() - start_time).total_seconds() - - logging.info( - f"Completed news collection for {ticker}: " - f"{len(articles)} articles, {sentiment_count} with sentiment, " - f"{embedding_count} with embeddings in {duration:.1f}s" - ) - + except Exception as e: - logging.error(f"News collection failed for {ticker}: {str(e)}") - # Don't raise - let scheduler continue with other tickers - - def get_job_status(self) -> Dict[str, Any]: - """Get status of all scheduled jobs""" - jobs = self.scheduler.get_jobs() - return { - "scheduler_running": self.scheduler.running, - "job_count": len(jobs), - "jobs": [ - { - "id": job.id, - "next_run": job.next_run_time.isoformat() if job.next_run_time else None, - "trigger": str(job.trigger) - } - for job in jobs - ] - } + logger.error(f"Error updating company news for {symbol}: {e}") + raise + + async def _scrape_articles( + self, + google_articles: List[GoogleNewsArticle] + ) -> List[NewsArticle]: + """Scrape content for Google News RSS articles.""" + scraped = [] + + for article in google_articles: + if not article.link: + continue + + scrape_result = self.article_scraper.scrape_article(article.link) + + if scrape_result.status in ["SUCCESS", "ARCHIVE_SUCCESS"]: + news_article = NewsArticle( + headline=scrape_result.title or article.title, + url=article.link, + source=article.source, + published_date=date.fromisoformat( + scrape_result.publish_date or article.published.strftime("%Y-%m-%d") + ), + summary=scrape_result.content, + author=scrape_result.author, + ) + scraped.append(news_article) + + return scraped + + async def _enrich_articles( + self, + articles: List[NewsArticle] + ) -> List[NewsArticle]: + """Enrich articles with LLM sentiment and vector embeddings.""" + enriched = [] + + for article in articles: + try: + # Generate sentiment + sentiment_result = await self.sentiment_client.analyze_sentiment( + article.headline, + article.summary or "" + ) + + article.sentiment_score = sentiment_result.score + article.sentiment_confidence = sentiment_result.confidence + article.sentiment_label = sentiment_result.label + + # Generate embeddings + title_emb, content_emb = await self.embeddings_client.generate_article_embeddings(article) + article.title_embedding = title_emb + article.content_embedding = content_emb + + enriched.append(article) + + except Exception as e: + logger.warning(f"Failed to enrich article {article.url}: {e}, storing without enrichment") + enriched.append(article) + + return enriched + + async def find_similar_news( + self, + query_text: str, + symbol: Optional[str] = None, + limit: int = 5 + ) -> List[NewsArticle]: + """ + Find news articles similar to query text using RAG vector search. + + Args: + query_text: Text to search for similar articles + symbol: Optional symbol filter + limit: Maximum number of results + + Returns: + List of similar NewsArticle objects + """ + # Generate embedding for query text + query_embeddings = await self.embeddings_client.generate_embeddings([query_text]) + query_embedding = query_embeddings[0] + + # Search for similar articles + similar_articles = await self.repository.find_similar_articles( + embedding=query_embedding, + limit=limit, + threshold=0.7, + symbol=symbol + ) + + return similar_articles ``` -### Error Handling and Monitoring +## Dagster Orchestration Layer -Comprehensive error handling for production reliability: +### Directory Structure + +``` +tradingagents/data/ +├── __init__.py +├── jobs/ +│ ├── __init__.py +│ └── news_collection.py +├── ops/ +│ ├── __init__.py +│ └── news_ops.py +├── schedules/ +│ ├── __init__.py +│ └── news_schedules.py +└── sensors/ + ├── __init__.py + └── news_sensors.py +``` + +### Dagster Ops (Operations) ```python -class NewsCollectionMonitor: - """Monitor and handle news collection job failures""" - - def __init__(self, collector: ScheduledNewsCollector): - self.collector = collector - self.failure_counts = defaultdict(int) - self.max_failures = 3 - - async def handle_job_failure(self, ticker: str, error: Exception): - """Handle job failure with exponential backoff""" - - self.failure_counts[ticker] += 1 - - if self.failure_counts[ticker] >= self.max_failures: - logging.error(f"Max failures reached for {ticker}, disabling job") - self.collector.scheduler.remove_job(f"news_collection_{ticker}") - # Could send alert here - else: - # Schedule retry with exponential backoff - delay_minutes = 2 ** self.failure_counts[ticker] - retry_time = datetime.datetime.now() + datetime.timedelta(minutes=delay_minutes) - - self.collector.scheduler.add_job( - func=self.collector._collect_ticker_news, - trigger='date', - run_date=retry_time, - args=[ticker], - id=f"news_retry_{ticker}_{int(retry_time.timestamp())}", - max_instances=1 - ) - - def reset_failure_count(self, ticker: str): - """Reset failure count on successful job""" - if ticker in self.failure_counts: - del self.failure_counts[ticker] +# tradingagents/data/ops/news_ops.py +from dagster import op, OpExecutionContext, Out, Output, DagsterEventType +from tradingagents.domains.news.news_service import NewsService +from tradingagents.config import TradingAgentsConfig +from tradingagents.lib.database import DatabaseManager + +@op( + required_resource_keys={"database_manager"}, + out=Out(dict), + tags={"kind": "news", "domain": "news"}, +) +def collect_news_for_symbol(context: OpExecutionContext, symbol: str) -> dict: + """ + Collect and process news for a single stock symbol. + + Args: + symbol: Stock ticker symbol + + Returns: + Dictionary with collection statistics + """ + context.log.info(f"Starting news collection for {symbol}") + + try: + # Build NewsService with dependencies + config = TradingAgentsConfig.from_env() + db_manager = context.resources.database_manager + news_service = NewsService.build(db_manager, config) + + # Execute news update + result = await news_service.update_company_news(symbol) + + context.log.info( + f"Completed news collection for {symbol}: " + f"{result.articles_found} found, {result.articles_scraped} scraped" + ) + + return { + "symbol": symbol, + "articles_found": result.articles_found, + "articles_scraped": result.articles_scraped, + "articles_failed": result.articles_failed, + "status": result.status, + } + + except Exception as e: + context.log.error(f"News collection failed for {symbol}: {e}") + raise ``` -## Implementation Strategy +### Dagster Jobs -### Phase 1: Entity and Database Enhancements (Week 1) +```python +# tradingagents/data/jobs/news_collection.py +from dagster import job, DynamicOut, DynamicOutput, OpExecutionContext, op +from tradingagents.data.ops.news_ops import collect_news_for_symbol -**Deliverables:** -- [ ] Enhanced `NewsArticle` entity with `SentimentScore` and vector support -- [ ] New `NewsJobConfig` entity with validation -- [ ] Database migration for vector indexes and sentiment_score JSONB enhancement -- [ ] Repository method `find_similar_articles()` with pgvectorscale integration +@op(out=DynamicOut()) +def get_symbols_to_collect(context: OpExecutionContext) -> Generator[DynamicOutput, None, None]: + """ + Get list of symbols to collect news for. -**Testing Focus:** -- Unit tests for entity validation and serialization -- Repository integration tests with vector similarity queries -- Database migration verification + Yields: + DynamicOutput for each symbol + """ + # This could be loaded from Dagster config, database, or external source + symbols = context.op_config.get("symbols", ["AAPL", "GOOGL", "MSFT", "TSLA"]) -### Phase 2: OpenRouter Integration (Week 2) + context.log.info(f"Collecting news for {len(symbols)} symbols: {symbols}") -**Deliverables:** -- [ ] `OpenRouterClient` with sentiment analysis and embeddings -- [ ] Enhanced `NewsService.update_company_news()` with LLM integration -- [ ] Error handling for LLM failures (best-effort approach) -- [ ] Integration tests with OpenRouter API (using pytest-vcr) + for symbol in symbols: + yield DynamicOutput(symbol, mapping_key=symbol) -**Testing Focus:** -- Mock OpenRouter responses for consistent testing -- Error handling scenarios (API failures, malformed responses) -- Embedding dimension validation +@job( + tags={"dagster/priority": "high", "domain": "news"}, +) +def news_collection_daily(): + """ + Daily news collection job for all configured symbols. -### Phase 3: Job Scheduling System (Week 3) + Workflow: + 1. Get symbols to collect + 2. Fan out: collect news for each symbol in parallel + 3. Aggregate results + """ + get_symbols_to_collect().map(collect_news_for_symbol) +``` -**Deliverables:** -- [ ] `ScheduledNewsCollector` with APScheduler integration -- [ ] `NewsCollectionMonitor` for error handling and retries -- [ ] Configuration management for job scheduling -- [ ] Graceful startup and shutdown procedures +### Dagster Schedules -**Testing Focus:** -- Scheduler lifecycle testing -- Job execution and failure handling -- Configuration validation +```python +# tradingagents/data/schedules/news_schedules.py +from dagster import schedule, ScheduleEvaluationContext, RunRequest +from tradingagents.data.jobs.news_collection import news_collection_daily -### Phase 4: Testing and Performance Optimization (Week 4) +@schedule( + job=news_collection_daily, + cron_schedule="0 6 * * *", # Daily at 6 AM UTC + execution_timezone="UTC", +) +def news_collection_daily_schedule(context: ScheduleEvaluationContext): + """ + Schedule for daily news collection at 6 AM UTC. -**Deliverables:** -- [ ] Complete test coverage maintaining >85% threshold -- [ ] Performance optimization for vector queries -- [ ] Documentation and deployment guides -- [ ] Integration with existing News Analyst AgentToolkit + Returns: + RunRequest with job configuration + """ + return RunRequest( + run_key=f"news_collection_{context.scheduled_execution_time.isoformat()}", + run_config={ + "ops": { + "get_symbols_to_collect": { + "config": { + "symbols": ["AAPL", "GOOGL", "MSFT", "TSLA", "AMZN", "META", "NVDA"] + } + } + } + }, + tags={ + "scheduled_time": context.scheduled_execution_time.isoformat(), + "job_type": "news_collection", + }, + ) +``` -**Testing Focus:** -- End-to-end integration tests -- Performance benchmarks for vector similarity queries -- Load testing for scheduled job execution +### Dagster Sensors (Failure Alerting) + +```python +# tradingagents/data/sensors/news_sensors.py +from dagster import sensor, SensorEvaluationContext, DagsterEventType, RunFailureSensorContext +from dagster import run_failure_sensor + +@run_failure_sensor( + name="news_collection_failure_sensor", + monitored_jobs=[news_collection_daily], +) +def news_collection_failure_alert(context: RunFailureSensorContext): + """ + Alert when news collection job fails. + + This could send notifications via Slack, PagerDuty, email, etc. + """ + context.log.error( + f"News collection job failed!\n" + f"Run ID: {context.dagster_run.run_id}\n" + f"Failure info: {context.failure_event.event_specific_data}" + ) + + # TODO: Implement alerting (Slack, PagerDuty, email) + # send_slack_alert(...) +``` + +## Database Schema Changes + +### Migration Script (Alembic) + +```python +# alembic/versions/20250111_add_sentiment_fields.py +"""Add sentiment fields to news_articles + +Revision ID: add_sentiment_fields +Revises: previous_revision +Create Date: 2025-01-11 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers +revision = 'add_sentiment_fields' +down_revision = 'previous_revision' +branch_labels = None +depends_on = None + +def upgrade(): + # Add sentiment analysis fields + op.add_column('news_articles', sa.Column('sentiment_confidence', sa.Float(), nullable=True)) + op.add_column('news_articles', sa.Column('sentiment_label', sa.String(20), nullable=True)) + + # Vector columns already exist from 95% complete infrastructure: + # - title_embedding vector(1536) + # - content_embedding vector(1536) + # - sentiment_score float + + # Add index on sentiment_label for filtering + op.create_index('idx_news_sentiment_label', 'news_articles', ['sentiment_label']) + +def downgrade(): + op.drop_index('idx_news_sentiment_label', table_name='news_articles') + op.drop_column('news_articles', 'sentiment_label') + op.drop_column('news_articles', 'sentiment_confidence') +``` ## Testing Strategy -### Test Architecture +### Unit Tests (Mock Boundaries) -Following the existing pragmatic TDD approach with mock boundaries: - -``` -tests/domains/news/ -├── __init__.py -├── test_news_entities.py # Entity validation and serialization -├── test_news_service.py # Mock repository and OpenRouter client -├── test_news_repository.py # PostgreSQL test database -├── test_openrouter_client.py # pytest-vcr for API responses -├── test_scheduled_collector.py # Mock APScheduler and services -└── integration/ - ├── test_sentiment_pipeline.py # End-to-end sentiment analysis - ├── test_embedding_pipeline.py # End-to-end embedding generation - └── test_scheduled_execution.py # Full job execution cycle -``` - -### Key Test Categories - -**Entity Tests (Fast Unit Tests)** ```python -def test_news_article_sentiment_validation(): - """Test sentiment score validation and reliability checks""" - - # Valid sentiment - sentiment = SentimentScore( - sentiment="positive", - confidence=0.8, - reasoning="Strong positive language" - ) - - article = NewsArticle( - headline="Test headline", - url="https://example.com", - source="Test Source", - published_date=datetime.datetime.now(), - sentiment_score=sentiment - ) - - assert article.has_reliable_sentiment() == True - - # Low confidence sentiment - low_confidence = SentimentScore( - sentiment="neutral", - confidence=0.3, - reasoning="Ambiguous language" - ) - - article.sentiment_score = low_confidence - assert article.has_reliable_sentiment() == False +# tests/domains/news/test_news_service_llm.py +import pytest +from unittest.mock import AsyncMock +from tradingagents.domains.news.news_service import NewsService +from tradingagents.domains.news.openrouter_sentiment_client import SentimentResult -def test_news_article_vector_validation(): - """Test vector embedding validation""" - - # Valid 1536-dimension embedding - valid_embedding = [0.1] * 1536 - article = NewsArticle( - headline="Test", - url="https://example.com", - source="Test", - published_date=datetime.datetime.now(), - title_embedding=valid_embedding - ) - - assert len(article.title_embedding) == 1536 - - # Invalid dimension should raise ValidationError - with pytest.raises(ValidationError): - NewsArticle( - headline="Test", - url="https://example.com", - source="Test", - published_date=datetime.datetime.now(), - title_embedding=[0.1] * 512 # Wrong dimension - ) -``` +@pytest.fixture +def mock_sentiment_client(): + return AsyncMock() -**Service Integration Tests (Mock Boundaries)** -```python -@pytest.mark.asyncio -async def test_news_service_with_sentiment_analysis(mock_openrouter_client, mock_repository): - """Test NewsService integration with mocked LLM client""" - - # Mock successful sentiment analysis - mock_sentiment = SentimentScore( - sentiment="positive", - confidence=0.9, - reasoning="Optimistic financial outlook" +@pytest.fixture +def mock_embeddings_client(): + return AsyncMock() + +async def test_enrich_articles_handles_llm_failures_gracefully( + mock_sentiment_client, + mock_embeddings_client +): + """Test that LLM failures don't block article storage.""" + # Mock sentiment failure + mock_sentiment_client.analyze_sentiment.side_effect = Exception("API Error") + + # Mock embeddings success + mock_embeddings_client.generate_article_embeddings.return_value = ( + [0.1] * 1536, [0.2] * 1536 ) - mock_openrouter_client.analyze_sentiment.return_value = mock_sentiment - - # Mock embeddings - mock_openrouter_client.generate_embeddings.return_value = [ - [0.1] * 1536, # title embedding - [0.2] * 1536 # content embedding - ] - + service = NewsService( - repository=mock_repository, - google_client=mock_google_client, - scraper_client=mock_scraper_client, - openrouter_client=mock_openrouter_client + google_client=AsyncMock(), + repository=AsyncMock(), + article_scraper=AsyncMock(), + sentiment_client=mock_sentiment_client, + embeddings_client=mock_embeddings_client, ) - - articles = await service.update_company_news("AAPL", include_sentiment=True) - - # Verify LLM integration + + articles = [create_test_article()] + enriched = await service._enrich_articles(articles) + + # Article should still be returned even though sentiment failed + assert len(enriched) == 1 + assert enriched[0].url == articles[0].url +``` + +### Integration Tests (Real Database) + +```python +# tests/domains/news/integration/test_news_workflow.py +import pytest +from tradingagents.lib.database import create_test_database_manager +from tradingagents.domains.news.news_service import NewsService + +@pytest.mark.asyncio +async def test_complete_news_pipeline_end_to_end(test_db_manager): + """Test complete pipeline: RSS → Scrape → LLM → Vector → Store.""" + config = TradingAgentsConfig.from_test_env() + service = NewsService.build(test_db_manager, config) + + # Execute full pipeline + result = await service.update_company_news("AAPL") + + # Verify results + assert result.status == "completed" + assert result.articles_scraped > 0 + + # Verify database storage + articles = await service.repository.list_by_date_range( + symbol="AAPL", + start_date=date.today(), + end_date=date.today() + ) + assert len(articles) > 0 - assert articles[0].sentiment_score == mock_sentiment - assert articles[0].title_embedding == [0.1] * 1536 - assert mock_openrouter_client.analyze_sentiment.called - assert mock_openrouter_client.generate_embeddings.called + + # Verify LLM enrichment + for article in articles: + assert article.sentiment_score is not None + assert article.title_embedding is not None + assert len(article.title_embedding) == 1536 ``` -**Repository Integration Tests (Real Database)** +### Dagster Tests + ```python -@pytest.mark.asyncio -async def test_repository_vector_similarity_search(test_db): - """Test vector similarity search with real pgvectorscale""" - - repository = NewsRepository(test_db) - - # Insert articles with embeddings - article1 = NewsArticle( - headline="Apple reports strong iPhone sales", - url="https://example.com/1", - source="TechNews", - published_date=datetime.datetime.now(), - entities=["AAPL"], - title_embedding=[0.1, 0.2] + [0.0] * 1534 # Similar to query +# tests/data/jobs/test_news_collection.py +from dagster import build_op_context +from tradingagents.data.ops.news_ops import collect_news_for_symbol + +def test_collect_news_for_symbol_op(): + """Test Dagster op for news collection.""" + context = build_op_context( + resources={"database_manager": mock_database_manager} ) - - article2 = NewsArticle( - headline="Microsoft launches new Azure features", - url="https://example.com/2", - source="CloudNews", - published_date=datetime.datetime.now(), - entities=["MSFT"], - title_embedding=[0.9, 0.8] + [0.0] * 1534 # Different from query + + result = collect_news_for_symbol(context, "AAPL") + + assert result["symbol"] == "AAPL" + assert result["status"] == "completed" + assert result["articles_found"] >= 0 +``` + +## Performance Optimization + +### Query Performance Targets + +- **News retrieval**: < 2 seconds for 30-day lookback +- **Vector similarity search**: < 1 second for top-10 results +- **Batch insertion**: < 5 seconds for 50 articles + +### Optimization Strategies + +1. **Vector Indexes**: Use pgvectorscale IVFFlat indexes for similarity search +2. **Batch Operations**: Use `executemany()` for bulk inserts and updates +3. **Connection Pooling**: Configure asyncpg connection pool (min=5, max=20) +4. **Async Operations**: All I/O operations are async (HTTP, database) +5. **Caching**: Dagster asset materialization for computed aggregates + +## Monitoring and Observability + +### Dagster UI Monitoring + +- **Job runs**: View execution history and status +- **Asset lineage**: Track data dependencies +- **Performance metrics**: Execution time, success rate +- **Logs**: Structured logging with context + +### Custom Metrics + +```python +from dagster import Output, MetadataValue + +def collect_news_for_symbol(context, symbol): + # ... collection logic ... + + yield Output( + result, + metadata={ + "articles_found": MetadataValue.int(result["articles_found"]), + "articles_scraped": MetadataValue.int(result["articles_scraped"]), + "success_rate": MetadataValue.float( + result["articles_scraped"] / result["articles_found"] + ), + "execution_time": MetadataValue.float(execution_time_seconds), + } ) - - await repository.upsert_batch([article1, article2]) - - # Query with similar embedding - query_embedding = [0.15, 0.25] + [0.0] * 1534 - similar_articles = await repository.find_similar_articles( - query_embedding, symbol="AAPL", limit=1 +``` + +## Error Handling and Resilience + +### LLM Failure Strategies + +1. **Sentiment Analysis Failures**: Fall back to keyword-based sentiment +2. **Embedding Failures**: Use zero vectors, log for manual review +3. **API Rate Limits**: Exponential backoff with jitter +4. **Timeout Handling**: 30s timeout for sentiment, 60s for embeddings + +### Dagster Retry Policies + +```python +from dagster import RetryPolicy + +@op( + retry_policy=RetryPolicy( + max_retries=3, + delay=10, # seconds + backoff=BackoffPolicy.EXPONENTIAL, ) - - assert len(similar_articles) == 1 - assert similar_articles[0].headline == "Apple reports strong iPhone sales" -``` - -**API Integration Tests (pytest-vcr)** -```python -@pytest.mark.vcr -@pytest.mark.asyncio -async def test_openrouter_sentiment_analysis(): - """Test real OpenRouter API calls with VCR cassettes""" - - config = TradingAgentsConfig.from_env() - client = OpenRouterClient(config) - - test_text = "Apple's quarterly earnings exceeded expectations with strong iPhone sales." - - sentiment = await client.analyze_sentiment(test_text) - - assert isinstance(sentiment, SentimentScore) - assert sentiment.sentiment in ["positive", "negative", "neutral"] - assert 0.0 <= sentiment.confidence <= 1.0 - assert len(sentiment.reasoning) > 0 - -@pytest.mark.vcr -@pytest.mark.asyncio -async def test_openrouter_embeddings_generation(): - """Test real OpenRouter embeddings API with VCR""" - - config = TradingAgentsConfig.from_env() - client = OpenRouterClient(config) - - texts = ["Apple stock rises", "Market volatility increases"] - - embeddings = await client.generate_embeddings(texts) - - assert len(embeddings) == 2 - assert all(len(emb) == 1536 for emb in embeddings) - assert all(isinstance(val, float) for emb in embeddings for val in emb) -``` - -### Coverage Requirements - -Maintain existing >85% coverage with new components: - -- **Entity Layer**: 95% coverage (comprehensive validation testing) -- **Service Layer**: 90% coverage (mock external dependencies) -- **Repository Layer**: 85% coverage (real database integration tests) -- **Client Layer**: 80% coverage (pytest-vcr for API calls) -- **Integration Tests**: End-to-end scenarios covering complete workflows - -### Performance Testing - -```python -@pytest.mark.performance -@pytest.mark.asyncio -async def test_vector_similarity_performance(): - """Ensure vector similarity queries perform under 100ms""" - - repository = NewsRepository(test_db) - - # Insert 1000 articles with embeddings - articles = [create_test_article_with_embedding() for _ in range(1000)] - await repository.upsert_batch(articles) - - query_embedding = [random.random() for _ in range(1536)] - - start_time = time.time() - results = await repository.find_similar_articles(query_embedding, limit=10) - duration = time.time() - start_time - - assert duration < 0.1 # Under 100ms - assert len(results) == 10 -``` - -## Integration Points - -### News Analyst AgentToolkit Integration - -The completed News domain integrates seamlessly with existing News Analyst agents: - -```python -class NewsAnalystToolkit: - """Enhanced toolkit with semantic search capabilities""" - - def __init__(self, news_service: NewsService): - self.news_service = news_service - - async def get_relevant_news(self, - ticker: str, - query: Optional[str] = None, - days_back: int = 30) -> List[Dict[str, Any]]: - """Get news with optional semantic search""" - - if query: - # Use semantic similarity search - articles = await self.news_service.find_similar_articles( - query_text=query, - symbol=ticker, - limit=20 - ) - else: - # Use time-based search (existing) - articles = await self.news_service.find_recent_news( - symbol=ticker, - days_back=days_back - ) - - return [ - { - "headline": article.headline, - "summary": article.summary, - "published_date": article.published_date.isoformat(), - "sentiment": article.sentiment_score.sentiment if article.sentiment_score else "unknown", - "confidence": article.sentiment_score.confidence if article.sentiment_score else 0.0, - "source": article.source, - "url": article.url - } - for article in articles - ] -``` - -### Configuration Integration - -Seamless integration with existing `TradingAgentsConfig`: - -```python -# Enhanced configuration for news domain completion -config = TradingAgentsConfig( - # Existing LLM configuration - llm_provider="openrouter", - openrouter_api_key=os.getenv("OPENROUTER_API_KEY"), - quick_think_llm="anthropic/claude-3.5-haiku", # For sentiment analysis - - # New news-specific settings - news_collection_enabled=True, - news_schedule_hour=6, # UTC - news_sentiment_enabled=True, - news_embeddings_enabled=True, - news_max_articles_per_ticker=20, - - # Database (existing) - database_url=os.getenv("DATABASE_URL"), -) - -# Job configuration -news_job_config = NewsJobConfig( - tickers=["AAPL", "GOOGL", "MSFT", "TSLA", "NVDA"], - schedule_hour=6, # 6 AM UTC daily collection - sentiment_model=config.quick_think_llm, - embedding_model="text-embedding-3-large", - max_articles_per_ticker=20 ) +def collect_news_for_symbol(context, symbol): + # ... implementation ... ``` -This design completes the final 5% of the News domain while leveraging the existing 95% infrastructure, maintaining architectural consistency, and providing the robust scheduled execution, LLM-powered sentiment analysis, and vector embeddings needed for advanced News Analyst capabilities. \ No newline at end of file +## Success Criteria + +✅ **Layered Architecture**: Entity → Repository → Service → Dagster Op → Dagster Job +✅ **LLM Sentiment**: OpenRouter structured sentiment with confidence and fallback +✅ **Vector RAG**: pgvectorscale semantic search operational with <1s query time +✅ **Dagster Orchestration**: Daily automated collection via Dagster schedules +✅ **Test Coverage**: >85% maintained with pytest-vcr for HTTP mocking +✅ **Performance**: Query < 2s, vector search < 1s, batch insert < 5s +✅ **Error Resilience**: Graceful fallbacks for all LLM and API failures +✅ **Monitoring**: Dagster UI provides complete observability and alerting + +## Timeline + +**Phase 1**: Entity + Migration (2-3h) +**Phase 2**: Repository RAG methods (2-3h) +**Phase 3**: LLM Clients (4-5h) +**Phase 4**: Service Enhancement (2-3h) +**Phase 5**: Dagster Orchestration (3-4h) +**Phase 6**: Testing & Documentation (2-3h) + +**Total: 15-20 hours with AI assistance** diff --git a/docs/specs/news/spec-lite.md b/docs/specs/news/spec-lite.md index 77bf0cd4..3373fd4f 100644 --- a/docs/specs/news/spec-lite.md +++ b/docs/specs/news/spec-lite.md @@ -10,12 +10,12 @@ Complete final 5% of news domain: add scheduled execution, LLM sentiment analysi ### 1. Scheduled Execution - Daily job at 6 AM UTC for all configured tickers -- APScheduler integration (no Dagster dependency) -- Graceful error handling with comprehensive logging +- Dagster orchestration with partitioned schedules +- Graceful error handling with Dagster sensors and alerting -### 2. LLM Sentiment Analysis +### 2. LLM Sentiment Analysis - OpenRouter integration using `quick_think_llm` (claude-3.5-haiku) -- Structured output: `{"sentiment": "positive|negative|neutral", "confidence": 0.0-1.0}` +- Structured output: `{"sentiment": "positive|negative|neutral", "confidence": 0.0-1.0, "label": "positive|negative|neutral"}` - Best-effort processing - failures don't stop pipeline ### 3. Vector Embeddings @@ -27,54 +27,75 @@ Complete final 5% of news domain: add scheduled execution, LLM sentiment analysi ### Architecture Pattern ``` -ScheduledNewsJob → NewsService → NewsRepository → NewsArticle → PostgreSQL+pgvectorscale +Dagster Job → Dagster Op → NewsService → NewsRepository → NewsArticle → PostgreSQL+pgvectorscale ``` ### Database Changes ```sql -ALTER TABLE news_articles -ADD COLUMN sentiment_score JSONB, -ADD COLUMN title_embedding vector(1536), -ADD COLUMN content_embedding vector(1536); +ALTER TABLE news_articles +ADD COLUMN sentiment_confidence FLOAT, +ADD COLUMN sentiment_label VARCHAR(20); + +-- Vector columns already exist from 95% complete infrastructure +-- title_embedding vector(1536) +-- content_embedding vector(1536) ``` ### Key Integration Points -- **Existing NewsService**: Enhance `update_news_for_symbol` method -- **LLM Integration**: OpenRouter unified provider for sentiment -- **Vector Generation**: text-embedding-3-small model (1536 dims) -- **Job Scheduling**: APScheduler with cron trigger +- **Existing NewsService**: Enhance `update_company_news` method +- **LLM Integration**: OpenRouter unified provider for sentiment and embeddings +- **Vector Generation**: OpenAI text-embedding-ada-002 via OpenRouter (1536 dims) +- **Job Scheduling**: Dagster jobs with daily partitioned schedules ## Implementation Phases -1. **Scheduled Execution** (2-3h): APScheduler + config management -2. **LLM Sentiment** (3-4h): OpenRouter integration + structured prompts -3. **Vector Embeddings** (2-3h): Embedding generation + database schema -4. **Testing & Monitoring** (2h): Coverage + performance validation +1. **Entity Layer** (2-3h): Enhance NewsArticle dataclass + migration +2. **Repository Layer** (2-3h): RAG vector similarity search methods +3. **LLM Integration** (4-5h): OpenRouter sentiment + embeddings clients +4. **Service Enhancement** (2-3h): Integrate LLM clients into NewsService +5. **Dagster Orchestration** (3-4h): Jobs, ops, and schedules +6. **Testing & Monitoring** (2-3h): Coverage + performance validation -**Total: 9-12 hours** +**Total: 15-20 hours** ## Success Criteria -- ✅ Daily automated news collection without manual intervention +- ✅ Daily automated news collection via Dagster without manual intervention - ✅ News retrieval with sentiment scores < 2 seconds response time - ✅ Vector embeddings enable semantic search for News Analysts - ✅ >95% article processing success rate despite paywall/blocking - ✅ Maintain >85% test coverage including new components +- ✅ Dagster UI provides monitoring and alerting for job failures ## Dependencies -- **APIs**: OpenRouter (sentiment), OpenAI (embeddings) +- **APIs**: OpenRouter (sentiment + embeddings via unified provider) - **Infrastructure**: PostgreSQL + TimescaleDB + pgvectorscale -- **New Package**: `apscheduler` for job scheduling -- **Existing**: 95% complete news domain components +- **Orchestration**: Dagster for job scheduling and monitoring +- **Existing**: 95% complete news domain components (clients, repository, service) ## Configuration +```yaml +# Dagster workspace.yaml +schedules: + news_collection_daily: + cron_schedule: "0 6 * * *" # Daily at 6 AM UTC + execution_timezone: "UTC" + +# Dagster run config +ops: + collect_news: + config: + symbols: ["AAPL", "GOOGL", "MSFT", "TSLA"] + lookback_days: 1 +``` + ```bash -OPENROUTER_API_KEY="sk-or-..." -OPENAI_API_KEY="sk-..." -NEWS_SCHEDULE_HOUR=6 -NEWS_TICKERS="AAPL,GOOGL,MSFT,TSLA" +# Environment variables +OPENROUTER_API_KEY="sk-or-..." # Unified LLM provider +DATABASE_URL="postgresql+asyncpg://..." ``` ## Risk Mitigation - **API Rate Limits**: Exponential backoff + batch processing -- **Paywall Blocking**: Metadata-only storage with warnings -- **Job Failures**: Monitoring + alerting for operational visibility -- **Performance**: Vector indexes + query optimization for <2s target \ No newline at end of file +- **Paywall Blocking**: Metadata-only storage with warnings +- **Job Failures**: Dagster sensors + alerting for operational visibility +- **Performance**: Vector indexes + query optimization for <2s target +- **LLM Failures**: Keyword-based fallback for sentiment, zero-vector fallback for embeddings diff --git a/docs/specs/news/spec.md b/docs/specs/news/spec.md index ebf7b2c2..27b328e8 100644 --- a/docs/specs/news/spec.md +++ b/docs/specs/news/spec.md @@ -99,10 +99,10 @@ Complete the final 5% of the news domain by adding scheduled execution, LLM sent ### Architecture Alignment -Follows established **Router → Service → Repository → Entity → Database** pattern: +Follows established **Router → Service → Repository → Entity → Database** pattern with Dagster orchestration: ``` -ScheduledNewsJob → NewsService → NewsRepository → NewsArticle → PostgreSQL+pgvectorscale +Dagster Schedule → Dagster Job → NewsService → NewsRepository → NewsArticle → PostgreSQL+pgvectorscale ``` ### Database Schema Integration @@ -156,29 +156,70 @@ content_embedding = await embedding_client.create_embedding( ### Scheduled Execution Framework -Use APScheduler for job orchestration (Dagster not in current dependencies): +Use Dagster for job orchestration (existing dependency in project): ```python -from apscheduler.schedulers.asyncio import AsyncIOScheduler - -scheduler = AsyncIOScheduler() -scheduler.add_job( - run_news_collection, - 'cron', - hour=6, # 6 AM UTC - minute=0, - timezone=timezone.utc, - id='daily_news_collection' +from dagster import ( + job, + schedule, + ScheduleDefinition, + op, + In, + Out, + AssetMaterialization ) +from dagster._core.scheduler import ScheduleExecutionContext + +@op +def fetch_news_for_tickers(context, tickers: list[str]) -> list[dict]: + """Fetch news articles for configured tickers""" + pass + +@op +def process_articles_with_sentiment(context, articles: list[dict]) -> list[dict]: + """Process articles with LLM sentiment analysis and embeddings""" + pass + +@op +def store_articles(context, processed_articles: list[dict]) -> None: + """Store articles with sentiment and embeddings in database""" + pass + +@job +def daily_news_collection_job(): + """Daily news collection pipeline""" + tickers = ["AAPL", "GOOGL", "MSFT", "TSLA"] # From config + articles = fetch_news_for_tickers(tickers) + processed = process_articles_with_sentiment(articles) + store_articles(processed) + +@schedule( + cron_schedule="0 6 * * *", # Daily at 6 AM UTC + job=daily_news_collection_job, + execution_timezone="UTC" +) +def daily_news_collection_schedule(context: ScheduleExecutionContext): + """Schedule for daily news collection""" + run_config = { + "ops": { + "fetch_news_for_tickers": { + "inputs": { + "tickers": ["AAPL", "GOOGL", "MSFT", "TSLA"] + } + } + } + } + return run_config ``` ## Implementation Approach -### Phase 1: Scheduled Execution (2-3 hours) -1. Configure APScheduler for daily news collection -2. Create job configuration management for ticker lists -3. Implement job monitoring and status tracking -4. Add manual execution capability for testing +### Phase 1: Dagster Scheduling Integration (2-3 hours) +1. Create Dagster ops for news collection pipeline +2. Configure daily schedule with cron expression +3. Set up job configuration management for ticker lists +4. Add manual job execution capability for testing +5. Implement job monitoring and asset tracking ### Phase 2: LLM Sentiment Integration (3-4 hours) 1. Integrate OpenRouter LLM for sentiment analysis @@ -218,11 +259,12 @@ scheduler.add_job( - `NewsRepository` with async PostgreSQL operations - `NewsArticle` domain model with validation - Comprehensive test coverage with pytest-vcr +- Dagster framework for data orchestration (existing dependency) ### New Dependencies -- `apscheduler` for job scheduling - Enhanced vector embedding capabilities - LLM client integration for sentiment analysis +- Dagster scheduling integration (existing dependency) ## Configuration Management diff --git a/docs/specs/news/status.md b/docs/specs/news/status.md index 162421dc..d847c261 100644 --- a/docs/specs/news/status.md +++ b/docs/specs/news/status.md @@ -1,336 +1,310 @@ -# News Domain Completion - Progress Status - -## Overview - -**Feature**: News Domain Final 5% Completion -**Status**: Ready for Implementation -**Total Estimated Time**: 12-16 hours with AI assistance -**Target Timeline**: 3-4 days -**Current Progress**: 95% complete (infrastructure ready) - ---- - -## Progress Summary - -### Overall Completion: 0% (95% + 0% of final 5%) - -| Phase | Status | Progress | Duration | Completion | -|-------|--------|----------|----------|------------| -| Phase 1: Foundation | ⏳ Not Started | 0/3 tasks | 0h/4-7h | ⬜⬜⬜⬜⬜⬜⬜ | -| Phase 2: Data Access | ⏳ Not Started | 0/1 tasks | 0h/2-3h | ⬜⬜⬜ | -| Phase 3: LLM Integration | ⏳ Not Started | 0/3 tasks | 0h/5-8h | ⬜⬜⬜⬜⬜⬜⬜⬜ | -| Phase 4: Scheduling | ⏳ Not Started | 0/2 tasks | 0h/4-6h | ⬜⬜⬜⬜⬜⬜ | -| Phase 5: Validation | ⏳ Not Started | 0/2 tasks | 0h/3-5h | ⬜⬜⬜⬜⬜ | - -**Legend**: ✅ Complete | 🟡 In Progress | ⏳ Not Started | ❌ Blocked - ---- - -## Task Status Tracking - -### Phase 1: Foundation (0% Complete) - -#### ⏳ T001: Database Migration - NewsJobConfig Table -- **Status**: Not Started -- **Priority**: Critical -- **Estimated**: 1-2 hours -- **Dependencies**: None -- **Progress**: 0% -- **Acceptance Criteria**: 0/4 completed - - [ ] `news_job_configs` table created with UUID primary key - - [ ] JSONB fields for symbols and categories with validation - - [ ] Proper indexes for enabled/frequency queries - - [ ] Migration script tests with rollback capability -- **Blocking Issues**: None -- **Next Actions**: Create Alembic migration script - -#### ⏳ T002: Enhance NewsArticle Entity - Sentiment and Embeddings -- **Status**: Not Started -- **Priority**: Critical -- **Estimated**: 2-3 hours -- **Dependencies**: T001 -- **Progress**: 0% -- **Acceptance Criteria**: 0/5 completed - - [ ] Add sentiment_score, sentiment_confidence, sentiment_label fields - - [ ] Add title_embedding and content_embedding vector fields - - [ ] Enhanced validate() method with sentiment range checks - - [ ] Updated transformations for vector handling - - [ ] Embedding dimension validation (1536) -- **Blocking Issues**: None -- **Next Actions**: Extend NewsArticle dataclass - -#### ⏳ T003: Create NewsJobConfig Entity -- **Status**: Not Started -- **Priority**: Critical -- **Estimated**: 1-2 hours -- **Dependencies**: T001 -- **Progress**: 0% -- **Acceptance Criteria**: 0/5 completed - - [ ] NewsJobConfig dataclass with all required fields - - [ ] Business rule validation for job configuration - - [ ] Cron expression validation for frequency - - [ ] Symbol list validation - - [ ] JSON serialization for database storage -- **Blocking Issues**: None -- **Next Actions**: Create new entity file - -### Phase 2: Data Access (0% Complete) - -#### ⏳ T004: Enhance NewsRepository - Vector and Job Operations -- **Status**: Not Started -- **Priority**: Critical -- **Estimated**: 2-3 hours -- **Dependencies**: T002, T003 -- **Progress**: 0% -- **Acceptance Criteria**: 0/5 completed - - [ ] Vector similarity search with cosine distance - - [ ] Batch embedding update operations - - [ ] NewsJobConfig CRUD methods - - [ ] Optimized query performance for vector operations - - [ ] Proper async connection handling -- **Blocking Issues**: Waiting for T002, T003 -- **Next Actions**: Extend NewsRepository class - -### Phase 3: LLM Integration (0% Complete) - -#### ⏳ T005: OpenRouter Client - Sentiment Analysis -- **Status**: Not Started -- **Priority**: Critical -- **Estimated**: 2-3 hours -- **Dependencies**: T002 -- **Progress**: 0% -- **Acceptance Criteria**: 0/5 completed - - [ ] OpenRouter API integration for sentiment analysis - - [ ] Structured prompts for financial news sentiment - - [ ] Response parsing with Pydantic models - - [ ] Error handling with graceful fallbacks - - [ ] Retry logic with exponential backoff -- **Blocking Issues**: Waiting for T002 -- **Next Actions**: Create OpenRouter sentiment client - -#### ⏳ T006: OpenRouter Client - Vector Embeddings -- **Status**: Not Started -- **Priority**: Critical -- **Estimated**: 1-2 hours -- **Dependencies**: T002 -- **Progress**: 0% -- **Acceptance Criteria**: 0/5 completed - - [ ] OpenRouter embeddings API integration - - [ ] Text preprocessing for embedding generation - - [ ] Batch processing for multiple articles - - [ ] 1536-dimensional vector validation - - [ ] Proper error handling and retries -- **Blocking Issues**: Waiting for T002 -- **Next Actions**: Create OpenRouter embeddings client - -#### ⏳ T007: Enhance NewsService - LLM Integration -- **Status**: Not Started -- **Priority**: Critical -- **Estimated**: 2-3 hours -- **Dependencies**: T005, T006 -- **Progress**: 0% -- **Acceptance Criteria**: 0/5 completed - - [ ] Replace keyword sentiment with LLM analysis - - [ ] Add embedding generation to article processing - - [ ] End-to-end article processing pipeline - - [ ] Proper error handling and fallback strategies - - [ ] Integration with existing service methods -- **Blocking Issues**: Waiting for T005, T006 -- **Next Actions**: Integrate LLM clients into NewsService - -### Phase 4: Scheduling (0% Complete) - -#### ⏳ T008: APScheduler Integration - Job Scheduling -- **Status**: Not Started -- **Priority**: High -- **Estimated**: 3-4 hours -- **Dependencies**: T003, T004, T007 -- **Progress**: 0% -- **Acceptance Criteria**: 0/5 completed - - [ ] APScheduler setup with PostgreSQL job store - - [ ] Scheduled job execution with proper error handling - - [ ] Job configuration loading and validation - - [ ] Status monitoring and failure recovery - - [ ] CLI integration for job management -- **Blocking Issues**: Waiting for T003, T004, T007 -- **Next Actions**: Implement ScheduledNewsCollector - -#### ⏳ T009: CLI Integration - Job Management Commands -- **Status**: Not Started -- **Priority**: Medium -- **Estimated**: 1-2 hours -- **Dependencies**: T008 -- **Progress**: 0% -- **Acceptance Criteria**: 0/5 completed - - [ ] CLI commands for job creation/management - - [ ] Manual job execution commands - - [ ] Job status and monitoring commands - - [ ] Integration with existing CLI structure - - [ ] Proper error handling and user feedback -- **Blocking Issues**: Waiting for T008 -- **Next Actions**: Extend CLI with news job commands - -### Phase 5: Validation (0% Complete) - -#### ⏳ T010: Integration Tests - End-to-End Workflow -- **Status**: Not Started -- **Priority**: High -- **Estimated**: 2-3 hours -- **Dependencies**: T007, T008 -- **Progress**: 0% -- **Acceptance Criteria**: 0/5 completed - - [ ] End-to-end workflow tests from RSS to vector storage - - [ ] Agent integration tests via AgentToolkit - - [ ] Performance tests for daily collection volumes - - [ ] Error recovery and fallback tests - - [ ] Test coverage maintained above 85% -- **Blocking Issues**: Waiting for T007, T008 -- **Next Actions**: Create comprehensive integration test suite - -#### ⏳ T011: Documentation and Monitoring -- **Status**: Not Started -- **Priority**: Medium -- **Estimated**: 1-2 hours -- **Dependencies**: T010 -- **Progress**: 0% -- **Acceptance Criteria**: 0/5 completed - - [ ] Updated API documentation for new methods - - [ ] Job scheduling configuration examples - - [ ] Performance monitoring dashboard queries - - [ ] Troubleshooting guide for common issues - - [ ] Agent integration documentation -- **Blocking Issues**: Waiting for T010 -- **Next Actions**: Update documentation and monitoring - ---- - -## Success Criteria Validation - -### Technical Requirements Status -- [ ] **OpenRouter-only LLM Integration**: Not started -- [ ] **Vector Embeddings with pgvectorscale**: Not started -- [ ] **APScheduler Job Execution**: Not started -- [ ] **Test Coverage >85%**: Baseline established (needs monitoring) -- [ ] **Query Performance <100ms**: Not tested -- [ ] **Vector Search Performance <1s**: Not tested -- [ ] **Backward Compatibility**: Not validated - -### Functional Requirements Status -- [ ] **Sentiment Analysis Pipeline**: Not implemented -- [ ] **Embedding Generation Pipeline**: Not implemented -- [ ] **Scheduled News Collection**: Not implemented -- [ ] **CLI Job Management**: Not implemented -- [ ] **AgentToolkit Integration**: Not validated -- [ ] **Error Handling & Fallbacks**: Not implemented - -### Quality Requirements Status -- [ ] **TDD Implementation**: Process defined, not applied -- [ ] **Layered Architecture**: Pattern defined, not validated -- [ ] **Async Connection Pooling**: Not implemented -- [ ] **Production Monitoring**: Not implemented -- [ ] **Documentation Completeness**: Not updated - ---- - -## Current Blocking Issues - -### Critical Blockers -**None currently** - All dependencies are internal to this implementation - -### Potential Risk Areas -1. **OpenRouter API Access**: Requires valid API keys and model access -2. **Database Migration**: Need proper PostgreSQL permissions for schema changes -3. **Vector Extension**: pgvectorscale must be properly installed and configured -4. **Performance Testing**: Need realistic data volumes for benchmark validation - ---- - -## Weekly Progress Targets - -### Week 1 Target (Days 1-2) -- **Goal**: Complete Phase 1 & 2 (Foundation + Data Access) -- **Expected Completion**: T001, T002, T003, T004 -- **Target Progress**: 45% overall completion - -### Week 1 Target (Days 3-4) -- **Goal**: Complete Phase 3 & 4 (LLM Integration + Scheduling) -- **Expected Completion**: T005, T006, T007, T008, T009 -- **Target Progress**: 90% overall completion - -### Week 2 Target (Day 1) -- **Goal**: Complete Phase 5 (Validation) -- **Expected Completion**: T010, T011 -- **Target Progress**: 100% overall completion - ---- - -## Metrics Dashboard - -### Code Coverage -- **Current**: 95% (existing infrastructure) -- **Target**: >85% (including new functionality) -- **Status**: ⏳ Pending implementation - -### Performance Benchmarks -- **Query Performance**: Not measured (Target: <100ms) -- **Vector Search**: Not measured (Target: <1s) -- **Batch Processing**: Not measured (Target: TBD) -- **Status**: ⏳ Pending implementation - -### Test Execution -- **Unit Tests**: 0/11 tasks have tests -- **Integration Tests**: 0/11 tasks have integration tests -- **VCR Tests**: 0/3 API clients have VCR tests -- **Status**: ⏳ Pending implementation - ---- - -## Communication & Reporting - -### Daily Standup Format -``` -Yesterday: [Tasks completed with IDs] -Today: [Tasks planned with IDs] -Blockers: [Any issues requiring attention] -Help Needed: [Specific areas for collaboration] -``` - -### Weekly Status Report Format -``` -Completed: [Phase progress with task counts] -In Progress: [Current focus areas] -Upcoming: [Next phase priorities] -Risks: [Technical or timeline concerns] -Metrics: [Coverage, performance, test results] -``` - -### Milestone Checkpoints -- **Checkpoint 1** (End of Day 2): Foundation Complete (T001-T004) -- **Checkpoint 2** (End of Day 4): LLM Integration Complete (T005-T009) -- **Checkpoint 3** (End of Day 5): Full Implementation Complete (T001-T011) - ---- - -## Notes - -### Implementation Context -- Building on 95% complete news domain infrastructure -- Focus on OpenRouter-only LLM integration (no other providers) -- Maintaining backward compatibility with AgentToolkit -- Following established TDD and layered architecture patterns - -### Key Success Factors -1. **Incremental Progress**: Validate each layer before proceeding -2. **Comprehensive Testing**: Maintain test coverage throughout -3. **Performance Monitoring**: Validate benchmarks at each step -4. **Error Resilience**: Implement fallbacks for all LLM dependencies -5. **Documentation**: Keep implementation and usage docs current - -### Last Updated -**Date**: 2024-08-30 -**By**: System -**Next Review**: Daily during implementation - ---- - -*This status document will be updated as implementation progresses. Use this as a single source of truth for current progress and blocking issues.* \ No newline at end of file + 1→# News Domain Completion - Implementation Status + 2→ + 3→**Last Updated**: 2025-01-11 + 4→**Overall Progress**: 6.67% (1/15 tasks completed) + 5→**Architecture**: Dagster orchestration + OpenRouter LLM + RAG vector search + 6→ + 7→--- + 8→ + 9→## Current Phase + 10→ + 11→**Phase 1: Entity Layer** + 12→Status: In Progress + 13→Progress: 50% (1/2 tasks completed) + 14→Estimated Time Remaining: 1-2 hours + 15→ + 16→--- + 17→ + 18→## Task Status Summary + 19→ + 20→### Phase 1: Entity Layer (1/2 completed) + 21→ + 22→| Task | Status | Priority | Time | Assigned | Completion | Completed At | + 23→|------|--------|----------|------|----------|------------|--------------| + 24→| T001: Enhance NewsArticle Dataclass | ✅ Completed | Critical | 1-2h | - | 100% | 2025-01-11 | + 25→| T002: Database Migration - Sentiment Fields | ⬜ Not Started | Critical | 1h | - | 0% | - | + 26→ + 27→### Phase 2: Repository Layer (0/2 completed) + 28→ + 29→| Task | Status | Priority | Time | Assigned | Completion | + 30→|------|--------|----------|------|----------|------------| + 31→| T003: NewsRepository - Vector Similarity Search | ⬜ Not Started | Critical | 2-3h | - | 0% | + 32→| T004: NewsRepository - Batch Embedding Updates | ⬜ Not Started | Medium | 1h | - | 0% | + 33→ + 34→### Phase 3: LLM Integration (0/3 completed) + 35→ + 36→| Task | Status | Priority | Time | Assigned | Completion | + 37→|------|--------|----------|------|----------|------------| + 38→| T005: OpenRouter Sentiment Client | ⬜ Not Started | Critical | 2-3h | - | 0% | + 39→| T006: OpenRouter Embeddings Client | ⬜ Not Started | Critical | 1-2h | - | 0% | + 40→| T007: Enhance NewsService - LLM Integration | ⬜ Not Started | Critical | 2-3h | - | 0% | + 41→ + 42→### Phase 4: Dagster Orchestration (0/5 completed) + 43→ + 44→| Task | Status | Priority | Time | Assigned | Completion | + 45→|------|--------|----------|------|----------|------------| + 46→| T008: Dagster Directory Structure | ⬜ Not Started | High | 30min | - | 0% | + 47→| T009: Dagster Ops - News Collection | ⬜ Not Started | High | 2-3h | - | 0% | + 48→| T010: Dagster Job - Daily News Collection | ⬜ Not Started | High | 1-2h | - | 0% | + 49→| T011: Dagster Schedule - Daily Trigger | ⬜ Not Started | High | 1h | - | 0% | + 50→| T012: Dagster Sensor - Failure Alerting | ⬜ Not Started | Medium | 1h | - | 0% | + 51→ + 52→### Phase 5: Testing & Documentation (0/3 completed) + 53→ + 54→| Task | Status | Priority | Time | Assigned | Completion | + 55→|------|--------|----------|------|----------|------------| + 56→| T013: Integration Tests - End-to-End Workflow | ⬜ Not Started | High | 2-3h | - | 0% | + 57→| T014: Dagster Tests | ⬜ Not Started | Medium | 1h | - | 0% | + 58→| T015: Documentation Updates | ⬜ Not Started | Medium | 1-2h | - | 0% | + 59→ + 60→--- + 61→ + 62→## Dependency Graph + 63→ + 64→``` + 65→T001 ─┬─→ T002 ──→ T003 ─────────→ T007 ──→ T009 ──→ T010 ──→ T013 + 66→ │ ↑ ↑ ↑ ↑ + 67→ │ │ │ │ │ + 68→ └──→ T005 ────────────────────┘ │ │ │ + 69→ T006 ──────────────────────────────┘ │ │ + 70→ T008 ──────────────────────────────────────┘ │ + 71→ T011 ───────────────────────────────────────────────┘ + 72→ T014 ───────────────────────────────────────────────┘ + 73→``` + 74→ + 75→**Critical Path**: T001 → T002 → T003 → T007 → T009 → T010 → T013 + 76→ + 77→**Parallel Opportunities**: + 78→- T005 & T006 can be developed in parallel (LLM clients) + 79→- T009, T010, T011 can be developed in parallel after T008 (Dagster components) + 80→ + 81→--- + 82→ + 83→## Progress by Phase + 84→ + 85→### Phase 1: Entity Layer + 86→- **Status**: In Progress + 87→- **Progress**: 50% (1/2 tasks) + 88→- **Estimated Time**: 1-2 hours + 89→- **Blockers**: None + 90→- **Next Action**: Start T002 - Database Migration for Sentiment Fields + 91→ + 92→### Phase 2: Repository Layer + 93→- **Status**: Not Started + 94→- **Progress**: 0% (0/2 tasks) + 95→- **Estimated Time**: 2-3 hours + 96→- **Blockers**: T001, T002 must complete first + 97→- **Next Action**: Waiting for Phase 1 completion + 98→ + 99→### Phase 3: LLM Integration + 100→- **Status**: Not Started + 101→- **Progress**: 0% (0/3 tasks) + 102→- **Estimated Time**: 4-5 hours + 103→- **Blockers**: T001 must complete for client development + 104→- **Next Action**: Can start T005 & T006 in parallel after T001 + 105→ + 106→### Phase 4: Dagster Orchestration + 107→- **Status**: Not Started + 108→- **Progress**: 0% (0/5 tasks) + 109→- **Estimated Time**: 3-4 hours + 110→- **Blockers**: T007 must complete for ops/jobs, T008 has no dependencies + 111→- **Next Action**: Can start T008 anytime (directory structure) + 112→ + 113→### Phase 5: Testing & Documentation + 114→- **Status**: Not Started + 115→- **Progress**: 0% (0/3 tasks) + 116→- **Estimated Time**: 2-3 hours + 117→- **Blockers**: T007, T010 must complete for integration testing + 118→- **Next Action**: Waiting for earlier phases + 119→ + 120→--- + 121→ + 122→## Test Coverage Status + 123→ + 124→**Current Coverage**: Baseline (from 95% complete infrastructure) + 125→**Target Coverage**: ≥85% + 126→**New Code Coverage**: 0% (no new code yet) + 127→ + 128→### Coverage by Component + 129→ + 130→| Component | Coverage | Target | Status | + 131→|-----------|----------|--------|--------| + 132→| NewsArticle (Entity) | - | ≥85% | ⬜ Pending | + 133→| NewsRepository (RAG) | - | ≥85% | ⬜ Pending | + 134→| OpenRouter Sentiment Client | - | ≥85% | ⬜ Pending | + 135→| OpenRouter Embeddings Client | - | ≥85% | ⬜ Pending | + 136→| NewsService (LLM Integration) | - | ≥85% | ⬜ Pending | + 137→| Dagster Ops | - | ≥85% | ⬜ Pending | + 138→| Dagster Jobs | - | ≥85% | ⬜ Pending | + 139→ + 140→--- + 141→ + 142→## Performance Benchmarks + 143→ + 144→### Current Performance + 145→- **Query Time (30-day lookback)**: Not measured yet + 146→- **Vector Search (top-10)**: Not measured yet + 147→- **Batch Insert (50 articles)**: Not measured yet + 148→ + 149→### Target Performance + 150→- **Query Time**: < 2 seconds for 30-day lookback + 151→- **Vector Search**: < 1 second for top-10 results + 152→- **Batch Insert**: < 5 seconds for 50 articles + 153→ + 154→### Performance Test Status + 155→- [ ] Query performance baseline established + 156→- [ ] Vector search performance baseline established + 157→- [ ] Batch insert performance baseline established + 158→- [ ] All performance targets met + 159→ + 160→--- + 161→ + 162→## Risk Assessment + 163→ + 164→### High Risk Items + 165→1. **OpenRouter API Availability** - Mitigated with fallback strategies (keyword sentiment, zero vectors) + 166→2. **Vector Search Performance** - Mitigated with proper pgvectorscale indexes + 167→3. **Dagster Integration Complexity** - Mitigated with incremental testing approach + 168→ + 169→### Medium Risk Items + 170→1. **LLM API Costs** - Monitor usage during development + 171→2. **Database Performance at Scale** - Test with realistic data volumes + 172→3. **Test Coverage Maintenance** - Enforce ≥85% coverage requirement + 173→ + 174→### Low Risk Items + 175→1. **Code Quality** - Enforced through TDD approach + 176→2. **Documentation** - Tracked as explicit task (T015) + 177→3. **Error Handling** - Comprehensive fallback strategies + 178→ + 179→--- + 180→ + 181→## Known Issues + 182→ + 183→### Blocking Issues + 184→None currently + 185→ + 186→### Non-Blocking Issues + 187→None currently + 188→ + 189→### Technical Debt + 190→- Existing keyword-based sentiment analysis should be replaced with LLM sentiment (tracked as T005) + 191→- No automated vector embedding generation currently (tracked as T006) + 192→- No scheduled news collection (tracked as T008-T012) + 193→ + 194→--- + 195→ + 196→## Milestone Schedule + 197→ + 198→### Milestone 1: Entity & Repository Foundation + 199→**Target**: Day 1-2 + 200→**Tasks**: T001, T002, T003, T004 + 201→**Status**: In Progress + 202→**Deliverables**: + 203→- NewsArticle dataclass with sentiment fields + 204→- Database migration for sentiment columns + 205→- RAG vector similarity search functional + 206→- Batch embedding updates operational + 207→ + 208→### Milestone 2: LLM Integration + 209→**Target**: Day 2-3 + 210→**Tasks**: T005, T006, T007 + 211→**Status**: Not Started + 212→**Deliverables**: + 213→- OpenRouter sentiment client operational with fallbacks + 214→- OpenRouter embeddings client operational with fallbacks + 215→- NewsService enrichment pipeline functional + 216→- find_similar_news() RAG method operational + 217→ + 218→### Milestone 3: Dagster Orchestration + 219→**Target**: Day 3-4 + 220→**Tasks**: T008, T009, T010, T011, T012 + 221→**Status**: Not Started + 222→**Deliverables**: + 223→- Dagster directory structure created + 224→- News collection op functional + 225→- Daily collection job operational + 226→- Schedule configured for 6 AM UTC + 227→- Failure sensor monitoring job + 228→ + 229→### Milestone 4: Testing & Documentation + 230→**Target**: Day 4-5 + 231→**Tasks**: T013, T014, T015 + 232→**Status**: Not Started + 233→**Deliverables**: + 234→- End-to-end integration tests passing + 235→- Dagster component tests passing + 236→- Performance benchmarks met + 237→- Documentation updated + 238→ + 239→--- + 240→ + 241→## Next Actions + 242→ + 243→### Immediate Next Steps (Today) + 244→1. **T002**: Start database migration for sentiment fields + 245→2. **T008**: Create Dagster directory structure in parallel (no dependencies) + 246→ + 247→### This Week + 248→1. Complete Phase 1 (Entity Layer) + 249→2. Start Phase 2 (Repository Layer) + 250→3. Begin Phase 3 (LLM Integration) in parallel + 251→ + 252→### Next Week + 253→1. Complete Phase 3 & 4 (LLM + Dagster) + 254→2. Complete Phase 5 (Testing & Documentation) + 255→3. Deploy and monitor Dagster schedules + 256→ + 257→--- + 258→ + 259→## Team Notes + 260→ + 261→### Development Environment + 262→- PostgreSQL + TimescaleDB + pgvectorscale running locally + 263→- OpenRouter API key configured + 264→- Dagster installation complete + 265→- Python 3.13 with mise/uv + 266→ + 267→### Communication + 268→- Spec documents updated to reflect Dagster architecture (spec-lite.md, design.md, tasks.md) + 269→- APScheduler references removed from all specs + 270→- Architecture aligned with project roadmap + 271→ + 272→### Resources Needed + 273→- OpenRouter API access for development/testing + 274→- Test database with sample news articles + 275→- Dagster UI for monitoring during development + 276→ + 277→--- + 278→ + 279→## Success Criteria Checklist + 280→ + 281→**Technical Success**: + 282→- [ ] Test coverage ≥85% maintained + 283→- [ ] Query performance <2s for 30-day lookback + 284→- [ ] Vector search <1s for top-10 results + 285→- [ ] Zero breaking changes to AgentToolkit + 286→- [ ] Dagster jobs execute successfully + 287→ + 288→**Functional Success**: + 289→- [ ] OpenRouter sentiment analysis operational + 290→- [ ] Vector embeddings enable semantic search + 291→- [ ] Dagster schedules running daily + 292→- [ ] Agent context enriched with sentiment + 293→ + 294→**Quality Success**: + 295→- [x] 1/15 tasks completed + 296→- [ ] All acceptance criteria met + 297→- [ ] Comprehensive error handling + 298→- [ ] Production-ready monitoring + 299→- [ ] Complete documentation + 300→ + 301→--- + 302→ + 303→**Status Key**: + 304→- ⬜ Not Started + 305→- 🔄 In Progress + 306→- ✅ Completed + 307→- 🚫 Blocked + 308→- ⚠️ At Risk + 309→ + 310→**Last Status Update**: 2025-01-11 - T001 completed, updated progress tracking \ No newline at end of file diff --git a/docs/specs/news/tasks.md b/docs/specs/news/tasks.md index 19bfd021..fa543581 100644 --- a/docs/specs/news/tasks.md +++ b/docs/specs/news/tasks.md @@ -2,445 +2,405 @@ ## Overview -Complete the final 5% of the news domain by implementing OpenRouter-only LLM sentiment analysis, vector embeddings, and APScheduler job execution. This builds on 95% complete infrastructure with PostgreSQL + TimescaleDB + pgvectorscale stack. +Complete the final 5% of the news domain by implementing **Dagster orchestration**, **OpenRouter-powered LLM sentiment analysis**, **vector embeddings**, and **RAG-powered semantic search**. This builds on 95% complete infrastructure with PostgreSQL + TimescaleDB + pgvectorscale stack. -**Total Estimated Time**: 12-16 hours with AI assistance -**Target Completion**: 3-4 days -**Test Coverage Requirement**: Maintain >85% -**Architecture Pattern**: Database → Entity → Repository → Service → Scheduling +**Total Estimated Time**: 15-20 hours with AI assistance +**Target Completion**: 4-5 days +**Test Coverage Requirement**: Maintain >85% +**Architecture Pattern**: Entity → Repository → Service → Dagster Op → Dagster Job ## Implementation Phases -### Phase 1: Foundation (4-7 hours) +### Phase 1: Entity Layer (2-3 hours) Database and entity layer enhancements for LLM integration -### Phase 2: Data Access (2-3 hours) -Repository layer enhancements for vector and job operations +### Phase 2: Repository Layer (2-3 hours) +RAG-powered vector similarity search methods -### Phase 3: LLM Integration (5-8 hours) -OpenRouter clients and service integration +### Phase 3: LLM Integration (4-5 hours) +OpenRouter clients for sentiment and embeddings -### Phase 4: Scheduling (4-6 hours) -Job scheduling and CLI integration +### Phase 4: Service Enhancement (2-3 hours) +Integrate LLM clients into NewsService workflow -### Phase 5: Validation (3-5 hours) -Testing, documentation, and monitoring +### Phase 5: Dagster Orchestration (3-4 hours) +Jobs, ops, schedules, and sensors for automated collection + +### Phase 6: Testing & Documentation (2-3 hours) +Integration tests, performance validation, and documentation updates --- ## Task Breakdown -### Phase 1: Foundation +### Phase 1: Entity Layer -#### T001: Database Migration - NewsJobConfig Table +#### T001: Enhance NewsArticle Dataclass - Sentiment Fields **Priority**: Critical | **Duration**: 1-2 hours | **Dependencies**: None -**Description**: Create database migration for news job configurations table with proper indexes +**Description**: Add LLM sentiment fields to existing NewsArticle dataclass **Acceptance Criteria**: -- [ ] `news_job_configs` table created with UUID primary key -- [ ] JSONB fields for symbols and categories with validation -- [ ] Proper indexes for enabled/frequency queries -- [ ] Migration script tests with rollback capability - -**Implementation Details**: -```python -# Migration structure -def upgrade(): - op.create_table( - 'news_job_configs', - sa.Column('id', postgresql.UUID(), primary_key=True), - sa.Column('name', sa.String(255), nullable=False), - sa.Column('symbols', postgresql.JSONB(), nullable=False), - sa.Column('categories', postgresql.JSONB(), nullable=False), - sa.Column('frequency_cron', sa.String(100), nullable=False), - sa.Column('enabled', sa.Boolean(), default=True), - sa.Column('last_run', sa.DateTime(timezone=True)), - sa.Column('created_at', sa.DateTime(timezone=True), default=func.now()), - sa.Column('updated_at', sa.DateTime(timezone=True), default=func.now()) - ) - - # Indexes - op.create_index('idx_news_jobs_enabled_frequency', 'news_job_configs', - ['enabled', 'frequency_cron']) - op.create_index('idx_news_jobs_last_run', 'news_job_configs', - ['last_run'], postgresql_where=sa.text('enabled = true')) -``` - -**Files to Modify**: -- `/Users/martinrichards/code/TradingAgents/tradingagents/data/migrations/add_news_job_configs.py` - -**Test Requirements**: -- Migration up/down tests -- Index performance validation -- Constraint validation tests - ---- - -#### T002: Enhance NewsArticle Entity - Sentiment and Embeddings -**Priority**: Critical | **Duration**: 2-3 hours | **Dependencies**: T001 - -**Description**: Add LLM sentiment fields and embedding validation to NewsArticle entity - -**Acceptance Criteria**: -- [ ] Add `sentiment_score`, `sentiment_confidence`, `sentiment_label` fields -- [ ] Add `title_embedding` and `content_embedding` vector fields -- [ ] Enhanced `validate()` method with sentiment range checks -- [ ] Updated transformations for vector handling -- [ ] Embedding dimension validation (1536) +- [ ] Add `sentiment_confidence: Optional[float]` field (0.0-1.0 range) +- [ ] Add `sentiment_label: Optional[str]` field ("positive", "negative", "neutral") +- [ ] Update `to_entity()` method to include new sentiment fields +- [ ] Update `from_entity()` method to populate new sentiment fields +- [ ] Add `has_reliable_sentiment()` helper method (confidence >= 0.6) **Implementation Details**: ```python @dataclass class NewsArticle: # Existing fields... - - # LLM sentiment fields - sentiment_score: Optional[float] = None # [-1.0, 1.0] - sentiment_confidence: Optional[float] = None # [0.0, 1.0] + sentiment_score: Optional[float] = None # Already exists + + # New LLM sentiment fields + sentiment_confidence: Optional[float] = None # 0.0 to 1.0 sentiment_label: Optional[str] = None # "positive", "negative", "neutral" - - # Vector embedding fields - title_embedding: Optional[List[float]] = None # 1536 dimensions - content_embedding: Optional[List[float]] = None # 1536 dimensions - - def validate(self) -> Dict[str, List[str]]: - errors = super().validate() - - # Sentiment validation - if self.sentiment_score is not None: - if not -1.0 <= self.sentiment_score <= 1.0: - errors["sentiment_score"] = ["Must be between -1.0 and 1.0"] - - if self.sentiment_confidence is not None: - if not 0.0 <= self.sentiment_confidence <= 1.0: - errors["sentiment_confidence"] = ["Must be between 0.0 and 1.0"] - - # Vector dimension validation - for field, vector in [("title_embedding", self.title_embedding), - ("content_embedding", self.content_embedding)]: - if vector is not None and len(vector) != 1536: - errors[field] = ["Must be exactly 1536 dimensions"] - - return errors - - def to_record(self) -> Dict[str, Any]: - record = super().to_record() - # Convert vectors to pgvector format if present - if self.title_embedding: - record["title_embedding"] = self.title_embedding - if self.content_embedding: - record["content_embedding"] = self.content_embedding - return record + + # Vector fields already exist from 95% complete infrastructure + title_embedding: Optional[List[float]] = None + content_embedding: Optional[List[float]] = None + + def has_reliable_sentiment(self) -> bool: + """Check if sentiment analysis is reliable.""" + return bool( + self.sentiment_score is not None + and self.sentiment_confidence is not None + and self.sentiment_confidence >= 0.6 + ) ``` **Files to Modify**: -- `/Users/martinrichards/code/TradingAgents/tradingagents/domains/news/entities/news_article.py` +- `/Users/martinrichards/code/TradingAgents/tradingagents/domains/news/news_repository.py` (NewsArticle dataclass section) **Test Requirements**: -- Sentiment validation tests (range checks) -- Vector dimension validation tests -- Transformation method tests -- Business rule violation tests +- Dataclass instantiation with new fields +- `to_entity()` and `from_entity()` roundtrip conversion +- `has_reliable_sentiment()` validation logic +- Edge cases (None values, boundary conditions) --- -#### T003: Create NewsJobConfig Entity -**Priority**: Critical | **Duration**: 1-2 hours | **Dependencies**: T001 +#### T002: Database Migration - Sentiment Fields +**Priority**: Critical | **Duration**: 1 hour | **Dependencies**: T001 -**Description**: Implement NewsJobConfig entity for scheduled job management +**Description**: Create Alembic migration to add sentiment fields to news_articles table **Acceptance Criteria**: -- [ ] NewsJobConfig dataclass with all required fields -- [ ] Business rule validation for job configuration -- [ ] Cron expression validation for frequency -- [ ] Symbol list validation -- [ ] JSON serialization for database storage +- [ ] Create Alembic migration script `add_sentiment_fields.py` +- [ ] Add `sentiment_confidence FLOAT` column (nullable) +- [ ] Add `sentiment_label VARCHAR(20)` column (nullable) +- [ ] Add index on `sentiment_label` for filtering +- [ ] Migration tested with upgrade and downgrade +- [ ] Rollback capability verified **Implementation Details**: ```python -@dataclass -class NewsJobConfig: - id: Optional[UUID] = None - name: str = "" - symbols: List[str] = field(default_factory=list) - categories: List[str] = field(default_factory=list) - frequency_cron: str = "" - enabled: bool = True - last_run: Optional[datetime] = None - created_at: Optional[datetime] = None - updated_at: Optional[datetime] = None - - def validate(self) -> Dict[str, List[str]]: - errors = {} - - # Name validation - if not self.name or len(self.name) > 255: - errors["name"] = ["Name required and must be <= 255 characters"] - - # Symbol validation - if not self.symbols: - errors["symbols"] = ["At least one symbol required"] - for symbol in self.symbols: - if not symbol.isupper() or not symbol.isalpha(): - errors["symbols"] = ["Symbols must be uppercase letters only"] - - # Cron validation - try: - from croniter import croniter - if not croniter.is_valid(self.frequency_cron): - errors["frequency_cron"] = ["Invalid cron expression"] - except ImportError: - # Fallback validation for simple intervals - if self.frequency_cron not in ["hourly", "daily", "weekly"]: - errors["frequency_cron"] = ["Invalid frequency"] - - return errors +# alembic/versions/20250111_add_sentiment_fields.py +def upgrade(): + op.add_column('news_articles', sa.Column('sentiment_confidence', sa.Float(), nullable=True)) + op.add_column('news_articles', sa.Column('sentiment_label', sa.String(20), nullable=True)) + op.create_index('idx_news_sentiment_label', 'news_articles', ['sentiment_label']) + +def downgrade(): + op.drop_index('idx_news_sentiment_label', table_name='news_articles') + op.drop_column('news_articles', 'sentiment_label') + op.drop_column('news_articles', 'sentiment_confidence') ``` **Files to Create**: -- `/Users/martinrichards/code/TradingAgents/tradingagents/domains/news/entities/news_job_config.py` +- `/Users/martinrichards/code/TradingAgents/alembic/versions/20250111_add_sentiment_fields.py` **Test Requirements**: -- Job configuration validation tests -- Schedule parsing tests -- Symbol validation tests -- Serialization/deserialization tests +- Migration upgrade succeeds +- Migration downgrade succeeds +- Index is created properly +- Existing data remains intact --- -### Phase 2: Data Access +### Phase 2: Repository Layer -#### T004: Enhance NewsRepository - Vector and Job Operations -**Priority**: Critical | **Duration**: 2-3 hours | **Dependencies**: T002, T003 +#### T003: NewsRepository - Vector Similarity Search +**Priority**: Critical | **Duration**: 2-3 hours | **Dependencies**: T001, T002 -**Description**: Add vector similarity search and NewsJobConfig CRUD operations +**Description**: Add RAG-powered vector similarity search using pgvectorscale **Acceptance Criteria**: -- [ ] Vector similarity search with cosine distance -- [ ] Batch embedding update operations -- [ ] NewsJobConfig CRUD methods -- [ ] Optimized query performance for vector operations -- [ ] Proper async connection handling +- [ ] Implement `find_similar_articles()` method with cosine distance +- [ ] Support similarity threshold filtering (0.0-1.0) +- [ ] Support optional symbol filtering +- [ ] Results ordered by similarity descending +- [ ] Proper async/await with session management +- [ ] Logging for debugging and monitoring **Implementation Details**: ```python -class NewsRepository: - # Existing methods... - - async def find_similar_articles(self, - embedding: List[float], - limit: int = 10, - threshold: float = 0.8) -> List[NewsArticle]: - """Find articles similar to given embedding using cosine distance""" - query = """ - SELECT *, 1 - (title_embedding <=> %s::vector) as similarity - FROM news_articles - WHERE title_embedding IS NOT NULL - AND 1 - (title_embedding <=> %s::vector) > %s - ORDER BY title_embedding <=> %s::vector - LIMIT %s - """ - - async with self._get_connection() as conn: - rows = await conn.fetch(query, embedding, embedding, threshold, embedding, limit) - return [NewsArticle.from_record(dict(row)) for row in rows] - - async def batch_update_embeddings(self, - articles: List[NewsArticle]) -> None: - """Efficiently update embeddings for multiple articles""" - if not articles: - return - - query = """ - UPDATE news_articles - SET title_embedding = %s, content_embedding = %s, updated_at = now() - WHERE id = %s - """ - - async with self._get_connection() as conn: - await conn.executemany(query, [ - (article.title_embedding, article.content_embedding, article.id) - for article in articles - if article.id and (article.title_embedding or article.content_embedding) - ]) - - # NewsJobConfig CRUD operations - async def create_job_config(self, config: NewsJobConfig) -> NewsJobConfig: - """Create new job configuration""" - query = """ - INSERT INTO news_job_configs (id, name, symbols, categories, frequency_cron, enabled) - VALUES (%s, %s, %s, %s, %s, %s) - RETURNING * - """ - - config.id = config.id or uuid4() - async with self._get_connection() as conn: - row = await conn.fetchrow(query, - config.id, config.name, json.dumps(config.symbols), - json.dumps(config.categories), config.frequency_cron, config.enabled) - return NewsJobConfig.from_record(dict(row)) - - async def get_active_job_configs(self) -> List[NewsJobConfig]: - """Get all enabled job configurations""" - query = "SELECT * FROM news_job_configs WHERE enabled = true" - async with self._get_connection() as conn: - rows = await conn.fetch(query) - return [NewsJobConfig.from_record(dict(row)) for row in rows] +async def find_similar_articles( + self, + embedding: List[float], + limit: int = 10, + threshold: float = 0.7, + symbol: Optional[str] = None +) -> List[NewsArticle]: + """ + Find articles similar to given embedding using pgvectorscale cosine distance. + + pgvectorscale operator: <=> for cosine distance + Cosine similarity = 1 - cosine_distance + """ + async with self.db_manager.get_session() as session: + # Build query with vector similarity + query = select( + NewsArticleEntity, + (1 - NewsArticleEntity.title_embedding.cosine_distance(embedding)).label('similarity') + ).filter( + NewsArticleEntity.title_embedding.is_not(None) + ) + + # Optional symbol filter + if symbol: + query = query.filter(NewsArticleEntity.symbol == symbol) + + # Filter by similarity threshold and order by distance + query = query.filter( + (1 - NewsArticleEntity.title_embedding.cosine_distance(embedding)) >= threshold + ).order_by( + NewsArticleEntity.title_embedding.cosine_distance(embedding) + ).limit(limit) + + result = await session.execute(query) + rows = result.all() + + articles = [NewsArticle.from_entity(row[0]) for row in rows] + logger.info(f"Found {len(articles)} similar articles (threshold={threshold})") + return articles ``` **Files to Modify**: -- `/Users/martinrichards/code/TradingAgents/tradingagents/domains/news/repositories/news_repository.py` +- `/Users/martinrichards/code/TradingAgents/tradingagents/domains/news/news_repository.py` (add method to NewsRepository class) **Test Requirements**: -- Vector similarity search tests with mock data -- Batch operation performance tests -- Job config CRUD tests -- Database connection pooling tests +- Vector similarity returns correct results with test data +- Similarity threshold filtering works correctly +- Symbol filtering works correctly +- Empty result handling +- Performance test (<1s for typical queries) + +--- + +#### T004: NewsRepository - Batch Embedding Updates +**Priority**: Medium | **Duration**: 1 hour | **Dependencies**: T003 + +**Description**: Add efficient batch embedding update method + +**Acceptance Criteria**: +- [ ] Implement `batch_update_embeddings()` method +- [ ] Use PostgreSQL bulk update operations +- [ ] Support title and content embeddings +- [ ] Update timestamp on modification +- [ ] Return count of updated articles + +**Implementation Details**: +```python +async def batch_update_embeddings( + self, + article_embeddings: List[Tuple[UUID, List[float], List[float]]] +) -> int: + """Efficiently batch update embeddings for multiple articles.""" + if not article_embeddings: + return 0 + + async with self.db_manager.get_session() as session: + stmt = update(NewsArticleEntity).where( + NewsArticleEntity.id == bindparam('article_id') + ).values( + title_embedding=bindparam('title_emb'), + content_embedding=bindparam('content_emb'), + updated_at=func.now() + ) + + batch_data = [ + { + 'article_id': article_id, + 'title_emb': title_emb, + 'content_emb': content_emb + } + for article_id, title_emb, content_emb in article_embeddings + ] + + await session.execute(stmt, batch_data) + logger.info(f"Batch updated embeddings for {len(article_embeddings)} articles") + return len(article_embeddings) +``` + +**Files to Modify**: +- `/Users/martinrichards/code/TradingAgents/tradingagents/domains/news/news_repository.py` + +**Test Requirements**: +- Batch update modifies correct articles +- Performance test (sub-second for 50 articles) +- Empty list handling +- Database rollback on errors --- ### Phase 3: LLM Integration -#### T005: OpenRouter Client - Sentiment Analysis -**Priority**: Critical | **Duration**: 2-3 hours | **Dependencies**: T002 +#### T005: OpenRouter Sentiment Client +**Priority**: Critical | **Duration**: 2-3 hours | **Dependencies**: T001 **Description**: Implement OpenRouter client for LLM sentiment analysis **Acceptance Criteria**: -- [ ] OpenRouter API integration for sentiment analysis -- [ ] Structured prompts for financial news sentiment -- [ ] Response parsing with Pydantic models -- [ ] Error handling with graceful fallbacks -- [ ] Retry logic with exponential backoff +- [ ] OpenRouter API integration using `quick_think_llm` (claude-3.5-haiku) +- [ ] Structured JSON output: score, confidence, label, reasoning +- [ ] Financial news-focused prompts +- [ ] Exponential backoff retry logic (3 attempts) +- [ ] Keyword-based fallback on API failures +- [ ] Proper error handling and logging **Implementation Details**: ```python +@dataclass +class SentimentResult: + """Result from sentiment analysis.""" + score: float # -1.0 to 1.0 + confidence: float # 0.0 to 1.0 + label: str # "positive", "negative", "neutral" + reasoning: str + class OpenRouterSentimentClient: + """Client for sentiment analysis via OpenRouter.""" + def __init__(self, config: TradingAgentsConfig): self.api_key = config.openrouter_api_key - self.model = config.quick_think_llm - self.base_url = "https://openrouter.ai/api/v1" - + self.model = config.quick_think_llm # claude-3.5-haiku + self.base_url = "https://openrouter.ai/api/v1/chat/completions" + async def analyze_sentiment(self, title: str, content: str) -> SentimentResult: - """Analyze sentiment of news article""" - prompt = f""" - Analyze the sentiment of this financial news article: - - Title: {title} - Content: {content[:1000]}... - - Provide sentiment analysis as JSON: - {{ - "score": float between -1.0 (very negative) and 1.0 (very positive), - "confidence": float between 0.0 and 1.0, - "label": "positive" | "negative" | "neutral", - "reasoning": "brief explanation" - }} - """ - + """Analyze sentiment with fallback to keyword-based analysis.""" try: - async with aiohttp.ClientSession() as session: - response = await self._make_request(session, prompt) - return self._parse_sentiment_response(response) + prompt = self._build_sentiment_prompt(title, content) + response = await self._call_openrouter(prompt) + return self._parse_sentiment_response(response) except Exception as e: - logger.warning(f"LLM sentiment analysis failed: {e}") + logger.warning(f"OpenRouter sentiment failed: {e}, using fallback") return self._fallback_sentiment(title, content) - + def _fallback_sentiment(self, title: str, content: str) -> SentimentResult: - """Keyword-based fallback sentiment analysis""" - # Simple keyword-based sentiment as fallback - positive_words = ["gain", "profit", "up", "growth", "buy"] - negative_words = ["loss", "down", "decline", "sell", "drop"] - - text = (title + " " + content).lower() - pos_count = sum(word in text for word in positive_words) - neg_count = sum(word in text for word in negative_words) - + """Keyword-based fallback for sentiment analysis.""" + text = f"{title} {content}".lower() + positive_keywords = ['gain', 'up', 'rise', 'growth', 'profit', 'beat'] + negative_keywords = ['loss', 'down', 'fall', 'decline', 'miss', 'concern'] + + pos_count = sum(1 for kw in positive_keywords if kw in text) + neg_count = sum(1 for kw in negative_keywords if kw in text) + if pos_count > neg_count: - return SentimentResult(score=0.3, confidence=0.5, label="positive") + return SentimentResult(0.3, 0.5, "positive", "Keyword-based fallback") elif neg_count > pos_count: - return SentimentResult(score=-0.3, confidence=0.5, label="negative") + return SentimentResult(-0.3, 0.5, "negative", "Keyword-based fallback") else: - return SentimentResult(score=0.0, confidence=0.5, label="neutral") + return SentimentResult(0.0, 0.5, "neutral", "Keyword-based fallback") ``` **Files to Create**: - `/Users/martinrichards/code/TradingAgents/tradingagents/domains/news/clients/openrouter_sentiment_client.py` **Test Requirements**: -- Sentiment analysis API tests with VCR -- Error handling tests -- Response parsing tests +- API response parsing tests with VCR +- Retry logic tests - Fallback mechanism tests +- Error handling tests +- Integration test with real API (optional) --- -#### T006: OpenRouter Client - Vector Embeddings -**Priority**: Critical | **Duration**: 1-2 hours | **Dependencies**: T002 +#### T006: OpenRouter Embeddings Client +**Priority**: Critical | **Duration**: 1-2 hours | **Dependencies**: T001 **Description**: Implement OpenRouter client for vector embeddings generation **Acceptance Criteria**: -- [ ] OpenRouter embeddings API integration -- [ ] Text preprocessing for embedding generation -- [ ] Batch processing for multiple articles +- [ ] OpenRouter embeddings API integration (text-embedding-ada-002) +- [ ] Text preprocessing (8000 char limit) +- [ ] Batch processing support for multiple texts - [ ] 1536-dimensional vector validation -- [ ] Proper error handling and retries +- [ ] Zero-vector fallback on API failures +- [ ] Proper error handling and logging **Implementation Details**: ```python class OpenRouterEmbeddingsClient: + """Client for generating embeddings via OpenRouter.""" + def __init__(self, config: TradingAgentsConfig): self.api_key = config.openrouter_api_key self.model = "openai/text-embedding-ada-002" # Via OpenRouter - self.base_url = "https://openrouter.ai/api/v1" - + self.base_url = "https://openrouter.ai/api/v1/embeddings" + async def generate_embeddings(self, texts: List[str]) -> List[List[float]]: - """Generate embeddings for multiple texts""" + """Generate 1536-dim embeddings for multiple texts.""" if not texts: return [] - + try: + processed_texts = [self._preprocess_text(text) for text in texts] + + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + + payload = {"model": self.model, "input": processed_texts} + async with aiohttp.ClientSession() as session: - response = await self._make_embeddings_request(session, texts) - embeddings = self._parse_embeddings_response(response) - - # Validate dimensions - for i, embedding in enumerate(embeddings): - if len(embedding) != 1536: - raise ValueError(f"Invalid embedding dimension at index {i}: {len(embedding)}") - - return embeddings + async with session.post( + self.base_url, + headers=headers, + json=payload, + timeout=aiohttp.ClientTimeout(total=60) + ) as response: + response.raise_for_status() + data = await response.json() + embeddings = [item['embedding'] for item in data['data']] + + # Validate dimensions + for i, emb in enumerate(embeddings): + if len(emb) != 1536: + raise ValueError(f"Invalid embedding dimension: {len(emb)}") + + return embeddings + except Exception as e: - logger.error(f"Embeddings generation failed: {e}") - # Return zero vectors as fallback + logger.error(f"Embeddings generation failed: {e}, using zero vectors") return [[0.0] * 1536 for _ in texts] - - async def generate_article_embeddings(self, article: NewsArticle) -> Tuple[List[float], List[float]]: - """Generate embeddings for article title and content""" + + async def generate_article_embeddings( + self, + article: NewsArticle + ) -> Tuple[List[float], List[float]]: + """Generate embeddings for article title and content.""" texts = [] - - # Prepare texts for embedding - if article.title: - texts.append(self._preprocess_text(article.title)) + if article.headline: + texts.append(article.headline) if article.summary: - # Combine title and summary for comprehensive embedding - combined_text = f"{article.title} {article.summary}" - texts.append(self._preprocess_text(combined_text)) - + combined = f"{article.headline} {article.summary}" + texts.append(combined) + if not texts: return [0.0] * 1536, [0.0] * 1536 - + embeddings = await self.generate_embeddings(texts) title_embedding = embeddings[0] if len(embeddings) > 0 else [0.0] * 1536 content_embedding = embeddings[1] if len(embeddings) > 1 else [0.0] * 1536 - + return title_embedding, content_embedding - + def _preprocess_text(self, text: str) -> str: - """Preprocess text for optimal embedding generation""" - # Remove extra whitespace and limit length + """Preprocess text for optimal embedding generation.""" cleaned = " ".join(text.split()) return cleaned[:8000] # OpenAI embedding limit ``` @@ -449,510 +409,565 @@ class OpenRouterEmbeddingsClient: - `/Users/martinrichards/code/TradingAgents/tradingagents/domains/news/clients/openrouter_embeddings_client.py` **Test Requirements**: -- Embeddings API tests with VCR +- API response parsing tests with VCR - Batch processing tests - Vector dimension validation tests - Text preprocessing tests +- Zero-vector fallback tests --- -#### T007: Enhance NewsService - LLM Integration +#### T007: Enhance NewsService - LLM Integration **Priority**: Critical | **Duration**: 2-3 hours | **Dependencies**: T005, T006 **Description**: Integrate OpenRouter LLM clients into NewsService workflow **Acceptance Criteria**: -- [ ] Replace keyword sentiment with LLM analysis -- [ ] Add embedding generation to article processing -- [ ] End-to-end article processing pipeline -- [ ] Proper error handling and fallback strategies -- [ ] Integration with existing service methods +- [ ] Add LLM clients to NewsService `__init__()` +- [ ] Implement `_enrich_articles()` method for LLM processing +- [ ] Update `update_company_news()` to call enrichment +- [ ] Implement `find_similar_news()` for RAG queries +- [ ] Best-effort processing (failures don't block storage) +- [ ] Proper error handling and logging **Implementation Details**: ```python class NewsService: - def __init__(self, - repository: NewsRepository, - config: TradingAgentsConfig): + def __init__( + self, + google_client: GoogleNewsClient, + repository: NewsRepository, + article_scraper: ArticleScraperClient, + sentiment_client: OpenRouterSentimentClient, + embeddings_client: OpenRouterEmbeddingsClient, + ): + self.google_client = google_client self.repository = repository - self.config = config - self.sentiment_client = OpenRouterSentimentClient(config) - self.embeddings_client = OpenRouterEmbeddingsClient(config) - - async def process_articles_with_llm(self, articles: List[NewsArticle]) -> List[NewsArticle]: - """Process articles with LLM sentiment analysis and embeddings""" - processed_articles = [] - + self.article_scraper = article_scraper + self.sentiment_client = sentiment_client + self.embeddings_client = embeddings_client + + async def update_company_news(self, symbol: str) -> NewsUpdateResult: + """ + Update company news with full LLM enrichment pipeline. + + Flow: RSS → Scrape → LLM Sentiment → Embeddings → Store + """ + # 1. Get RSS feed + google_articles = self.google_client.get_company_news(symbol) + + # 2. Scrape content + scraped_articles = await self._scrape_articles(google_articles) + + # 3. Enrich with LLM (sentiment + embeddings) + enriched_articles = await self._enrich_articles(scraped_articles) + + # 4. Store in repository + stored_articles = await self.repository.upsert_batch(enriched_articles, symbol) + + return NewsUpdateResult(...) + + async def _enrich_articles( + self, + articles: List[NewsArticle] + ) -> List[NewsArticle]: + """Enrich articles with LLM sentiment and vector embeddings.""" + enriched = [] + for article in articles: try: - # Generate sentiment analysis + # Generate sentiment sentiment_result = await self.sentiment_client.analyze_sentiment( - article.title, article.summary or "" + article.headline, + article.summary or "" ) - - # Generate embeddings - title_embedding, content_embedding = await self.embeddings_client.generate_article_embeddings(article) - - # Update article with LLM results + article.sentiment_score = sentiment_result.score article.sentiment_confidence = sentiment_result.confidence article.sentiment_label = sentiment_result.label - article.title_embedding = title_embedding - article.content_embedding = content_embedding - - processed_articles.append(article) - + + # Generate embeddings + title_emb, content_emb = await self.embeddings_client.generate_article_embeddings(article) + article.title_embedding = title_emb + article.content_embedding = content_emb + + enriched.append(article) + except Exception as e: - logger.warning(f"Failed to process article {article.id}: {e}") - # Add article without LLM processing - processed_articles.append(article) - - return processed_articles - - async def collect_and_process_news(self, symbols: List[str]) -> List[NewsArticle]: - """Complete pipeline: collect → process → store with LLM analysis""" - # Collect raw articles (existing functionality) - raw_articles = await self.collect_news_articles(symbols) - - # Process with LLM - processed_articles = await self.process_articles_with_llm(raw_articles) - - # Store processed articles - stored_articles = [] - for article in processed_articles: - stored_article = await self.repository.create_article(article) - stored_articles.append(stored_article) - - # Batch update embeddings for efficiency - articles_with_embeddings = [a for a in stored_articles - if a.title_embedding or a.content_embedding] - if articles_with_embeddings: - await self.repository.batch_update_embeddings(articles_with_embeddings) - - return stored_articles + logger.warning(f"Failed to enrich article {article.url}: {e}") + enriched.append(article) # Store without enrichment + + return enriched + + async def find_similar_news( + self, + query_text: str, + symbol: Optional[str] = None, + limit: int = 5 + ) -> List[NewsArticle]: + """Find news articles similar to query text using RAG vector search.""" + # Generate embedding for query + query_embeddings = await self.embeddings_client.generate_embeddings([query_text]) + query_embedding = query_embeddings[0] + + # Search for similar articles + similar_articles = await self.repository.find_similar_articles( + embedding=query_embedding, + limit=limit, + threshold=0.7, + symbol=symbol + ) + + return similar_articles ``` **Files to Modify**: -- `/Users/martinrichards/code/TradingAgents/tradingagents/domains/news/services/news_service.py` +- `/Users/martinrichards/code/TradingAgents/tradingagents/domains/news/news_service.py` **Test Requirements**: -- Integration tests with mocked LLM clients -- Article processing pipeline tests +- Mock LLM clients for unit tests +- Integration test with real services - Error handling and fallback tests -- Performance tests for batch operations +- Performance test for batch enrichment --- -### Phase 4: Scheduling +### Phase 4: Dagster Orchestration -#### T008: APScheduler Integration - Job Scheduling -**Priority**: High | **Duration**: 3-4 hours | **Dependencies**: T003, T004, T007 +#### T008: Dagster Directory Structure +**Priority**: High | **Duration**: 30 minutes | **Dependencies**: None -**Description**: Implement scheduled news collection using APScheduler +**Description**: Create directory structure for Dagster jobs, ops, and schedules **Acceptance Criteria**: -- [ ] APScheduler setup with PostgreSQL job store -- [ ] Scheduled job execution with proper error handling -- [ ] Job configuration loading and validation -- [ ] Status monitoring and failure recovery -- [ ] CLI integration for job management +- [ ] Create `tradingagents/data/` directory +- [ ] Create subdirectories: `jobs/`, `ops/`, `schedules/`, `sensors/` +- [ ] Create `__init__.py` files for all directories +- [ ] Import structure allows clean imports **Implementation Details**: -```python -class ScheduledNewsCollector: - def __init__(self, - news_service: NewsService, - repository: NewsRepository, - config: TradingAgentsConfig): - self.news_service = news_service - self.repository = repository - self.config = config - self.scheduler = None - - async def initialize_scheduler(self): - """Initialize APScheduler with PostgreSQL job store""" - from apscheduler.schedulers.asyncio import AsyncIOScheduler - from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore - - jobstore = SQLAlchemyJobStore(url=self.config.database_url, - tablename='apscheduler_jobs') - - self.scheduler = AsyncIOScheduler() - self.scheduler.add_jobstore(jobstore, 'default') - - async def load_job_configurations(self): - """Load and schedule all active job configurations""" - job_configs = await self.repository.get_active_job_configs() - - for config in job_configs: - try: - await self._schedule_job(config) - except Exception as e: - logger.error(f"Failed to schedule job {config.name}: {e}") - - async def _schedule_job(self, job_config: NewsJobConfig): - """Schedule a single job configuration""" - job_id = f"news_collection_{job_config.id}" - - # Remove existing job if present - if self.scheduler.get_job(job_id): - self.scheduler.remove_job(job_id) - - # Add new job - from apscheduler.triggers.cron import CronTrigger - trigger = CronTrigger.from_crontab(job_config.frequency_cron) - - self.scheduler.add_job( - self._execute_news_collection, - trigger=trigger, - id=job_id, - args=[job_config], - name=f"News collection: {job_config.name}", - replace_existing=True - ) - - async def _execute_news_collection(self, job_config: NewsJobConfig): - """Execute news collection for a job configuration""" - try: - logger.info(f"Starting news collection job: {job_config.name}") - - # Collect and process news - articles = await self.news_service.collect_and_process_news(job_config.symbols) - - # Update job last run timestamp - job_config.last_run = datetime.now(timezone.utc) - await self.repository.update_job_config(job_config) - - logger.info(f"Completed news collection job: {job_config.name}, " - f"collected {len(articles)} articles") - - except Exception as e: - logger.error(f"News collection job failed: {job_config.name}, error: {e}") - # Could implement notification/alerting here - - async def start_scheduler(self): - """Start the scheduler""" - if not self.scheduler: - await self.initialize_scheduler() - - await self.load_job_configurations() - self.scheduler.start() - logger.info("News collection scheduler started") - - async def stop_scheduler(self): - """Stop the scheduler""" - if self.scheduler: - self.scheduler.shutdown(wait=True) - logger.info("News collection scheduler stopped") +``` +tradingagents/data/ +├── __init__.py +├── jobs/ +│ ├── __init__.py +│ └── news_collection.py +├── ops/ +│ ├── __init__.py +│ └── news_ops.py +├── schedules/ +│ ├── __init__.py +│ └── news_schedules.py +└── sensors/ + ├── __init__.py + └── news_sensors.py ``` **Files to Create**: -- `/Users/martinrichards/code/TradingAgents/tradingagents/domains/news/services/scheduled_news_collector.py` +- All directory and `__init__.py` files above **Test Requirements**: -- Job scheduling tests with test scheduler -- Job execution tests with mocked dependencies -- Error handling and retry tests -- Job configuration validation tests +- Import tests for all modules +- Directory structure validation --- -#### T009: CLI Integration - Job Management Commands -**Priority**: Medium | **Duration**: 1-2 hours | **Dependencies**: T008 +#### T009: Dagster Ops - News Collection +**Priority**: High | **Duration**: 2-3 hours | **Dependencies**: T007, T008 -**Description**: Add CLI commands for news job management and manual execution +**Description**: Implement Dagster op for news collection per symbol **Acceptance Criteria**: -- [ ] CLI commands for job creation/management -- [ ] Manual job execution commands -- [ ] Job status and monitoring commands -- [ ] Integration with existing CLI structure -- [ ] Proper error handling and user feedback +- [ ] `collect_news_for_symbol` op implemented +- [ ] Proper resource management (database_manager) +- [ ] Error handling and logging +- [ ] Output metadata (articles_found, articles_scraped, etc.) +- [ ] Retry policy configured +- [ ] Op tested with build_op_context **Implementation Details**: ```python -# Add to cli/commands/news_commands.py -@click.group() -def news(): - """News domain management commands""" - pass +# tradingagents/data/ops/news_ops.py +from dagster import op, OpExecutionContext, Out, RetryPolicy -@news.group() -def job(): - """Job management commands""" - pass +@op( + required_resource_keys={"database_manager"}, + out=Out(dict), + tags={"kind": "news", "domain": "news"}, + retry_policy=RetryPolicy(max_retries=3, delay=10, backoff=BackoffPolicy.EXPONENTIAL), +) +def collect_news_for_symbol(context: OpExecutionContext, symbol: str) -> dict: + """ + Collect and process news for a single stock symbol. + + Returns dict with collection statistics. + """ + context.log.info(f"Starting news collection for {symbol}") -@job.command() -@click.option('--name', required=True, help='Job name') -@click.option('--symbols', required=True, help='Comma-separated stock symbols') -@click.option('--frequency', required=True, help='Cron expression or simple frequency') -@click.option('--categories', help='Comma-separated news categories') -async def create(name: str, symbols: str, frequency: str, categories: str): - """Create a new news collection job""" try: - symbol_list = [s.strip().upper() for s in symbols.split(',')] - category_list = [c.strip() for c in (categories or "").split(',')] if categories else [] - - config = NewsJobConfig( - name=name, - symbols=symbol_list, - categories=category_list, - frequency_cron=frequency, - enabled=True - ) - - # Validate configuration - errors = config.validate() - if errors: - click.echo(f"❌ Invalid configuration: {errors}") - return - - # Create job - repository = NewsRepository(get_database_config()) - created_config = await repository.create_job_config(config) - - click.echo(f"✅ Created job: {created_config.name} (ID: {created_config.id})") - - except Exception as e: - click.echo(f"❌ Failed to create job: {e}") + config = TradingAgentsConfig.from_env() + db_manager = context.resources.database_manager + news_service = NewsService.build(db_manager, config) -@job.command() -async def list(): - """List all job configurations""" - try: - repository = NewsRepository(get_database_config()) - configs = await repository.get_all_job_configs() - - if not configs: - click.echo("No jobs configured") - return - - click.echo("\n📋 News Collection Jobs:") - click.echo("=" * 60) - - for config in configs: - status = "🟢 Enabled" if config.enabled else "🔴 Disabled" - last_run = config.last_run.strftime("%Y-%m-%d %H:%M") if config.last_run else "Never" - - click.echo(f"{config.name}") - click.echo(f" Status: {status}") - click.echo(f" Symbols: {', '.join(config.symbols)}") - click.echo(f" Schedule: {config.frequency_cron}") - click.echo(f" Last Run: {last_run}") - click.echo() - - except Exception as e: - click.echo(f"❌ Failed to list jobs: {e}") + result = await news_service.update_company_news(symbol) + + context.log.info(f"Completed: {result.articles_scraped} articles for {symbol}") + + return { + "symbol": symbol, + "articles_found": result.articles_found, + "articles_scraped": result.articles_scraped, + "articles_failed": result.articles_failed, + "status": result.status, + } -@job.command() -@click.argument('job_id', type=str) -async def run(job_id: str): - """Manually execute a job""" - try: - repository = NewsRepository(get_database_config()) - config = await repository.get_job_config(UUID(job_id)) - - if not config: - click.echo(f"❌ Job not found: {job_id}") - return - - click.echo(f"🚀 Running job: {config.name}") - - # Execute job - service = NewsService(repository, get_trading_config()) - articles = await service.collect_and_process_news(config.symbols) - - click.echo(f"✅ Completed: collected {len(articles)} articles") - except Exception as e: - click.echo(f"❌ Job execution failed: {e}") + context.log.error(f"News collection failed for {symbol}: {e}") + raise ``` -**Files to Modify**: -- `/Users/martinrichards/code/TradingAgents/cli/commands/news_commands.py` +**Files to Create**: +- `/Users/martinrichards/code/TradingAgents/tradingagents/data/ops/news_ops.py` **Test Requirements**: -- CLI command tests with mocked services -- User input validation tests -- Output formatting tests +- Op execution tests with mock resources +- Error handling tests +- Retry logic tests +- Metadata validation tests --- -### Phase 5: Validation +#### T010: Dagster Job - Daily News Collection +**Priority**: High | **Duration**: 1-2 hours | **Dependencies**: T009 -#### T010: Integration Tests - End-to-End Workflow -**Priority**: High | **Duration**: 2-3 hours | **Dependencies**: T007, T008 +**Description**: Implement Dagster job that orchestrates news collection across symbols + +**Acceptance Criteria**: +- [ ] `news_collection_daily` job implemented +- [ ] Dynamic op mapping for parallel symbol processing +- [ ] Proper job tags and metadata +- [ ] Configuration for symbol list +- [ ] Job tested with execute_in_process + +**Implementation Details**: +```python +# tradingagents/data/jobs/news_collection.py +from dagster import job, DynamicOut, DynamicOutput, OpExecutionContext, op +from tradingagents.data.ops.news_ops import collect_news_for_symbol + +@op(out=DynamicOut()) +def get_symbols_to_collect(context: OpExecutionContext) -> Generator[DynamicOutput, None, None]: + """Get list of symbols to collect news for from config.""" + symbols = context.op_config.get("symbols", ["AAPL", "GOOGL", "MSFT", "TSLA"]) + context.log.info(f"Collecting news for {len(symbols)} symbols: {symbols}") + + for symbol in symbols: + yield DynamicOutput(symbol, mapping_key=symbol) + +@job(tags={"dagster/priority": "high", "domain": "news"}) +def news_collection_daily(): + """ + Daily news collection job for all configured symbols. + + Workflow: + 1. Get symbols to collect + 2. Fan out: collect news for each symbol in parallel + 3. Aggregate results + """ + get_symbols_to_collect().map(collect_news_for_symbol) +``` + +**Files to Create**: +- `/Users/martinrichards/code/TradingAgents/tradingagents/data/jobs/news_collection.py` + +**Test Requirements**: +- Job execution tests +- Dynamic mapping tests +- Configuration tests +- Parallel execution validation + +--- + +#### T011: Dagster Schedule - Daily Trigger +**Priority**: High | **Duration**: 1 hour | **Dependencies**: T010 + +**Description**: Implement Dagster schedule for daily news collection at 6 AM UTC + +**Acceptance Criteria**: +- [ ] `news_collection_daily_schedule` schedule implemented +- [ ] Cron expression: `0 6 * * *` (daily at 6 AM UTC) +- [ ] RunRequest configuration with symbol list +- [ ] Proper tags and metadata +- [ ] Schedule tested with evaluate_tick + +**Implementation Details**: +```python +# tradingagents/data/schedules/news_schedules.py +from dagster import schedule, ScheduleEvaluationContext, RunRequest +from tradingagents.data.jobs.news_collection import news_collection_daily + +@schedule( + job=news_collection_daily, + cron_schedule="0 6 * * *", # Daily at 6 AM UTC + execution_timezone="UTC", +) +def news_collection_daily_schedule(context: ScheduleEvaluationContext): + """Schedule for daily news collection at 6 AM UTC.""" + return RunRequest( + run_key=f"news_collection_{context.scheduled_execution_time.isoformat()}", + run_config={ + "ops": { + "get_symbols_to_collect": { + "config": { + "symbols": ["AAPL", "GOOGL", "MSFT", "TSLA", "AMZN", "META", "NVDA"] + } + } + } + }, + tags={ + "scheduled_time": context.scheduled_execution_time.isoformat(), + "job_type": "news_collection", + }, + ) +``` + +**Files to Create**: +- `/Users/martinrichards/code/TradingAgents/tradingagents/data/schedules/news_schedules.py` + +**Test Requirements**: +- Schedule evaluation tests +- Cron schedule validation +- RunRequest configuration tests +- Timezone handling tests + +--- + +#### T012: Dagster Sensor - Failure Alerting +**Priority**: Medium | **Duration**: 1 hour | **Dependencies**: T010 + +**Description**: Implement Dagster sensor for job failure alerting + +**Acceptance Criteria**: +- [ ] `news_collection_failure_sensor` run failure sensor implemented +- [ ] Monitors `news_collection_daily` job +- [ ] Logs failure details +- [ ] Placeholder for external alerting (Slack, PagerDuty, etc.) +- [ ] Sensor tested with run failure events + +**Implementation Details**: +```python +# tradingagents/data/sensors/news_sensors.py +from dagster import run_failure_sensor, RunFailureSensorContext +from tradingagents.data.jobs.news_collection import news_collection_daily + +@run_failure_sensor( + name="news_collection_failure_sensor", + monitored_jobs=[news_collection_daily], +) +def news_collection_failure_alert(context: RunFailureSensorContext): + """Alert when news collection job fails.""" + context.log.error( + f"News collection job failed!\n" + f"Run ID: {context.dagster_run.run_id}\n" + f"Failure: {context.failure_event.event_specific_data}" + ) + + # TODO: Implement external alerting + # send_slack_alert(...) + # send_pagerduty_alert(...) +``` + +**Files to Create**: +- `/Users/martinrichards/code/TradingAgents/tradingagents/data/sensors/news_sensors.py` + +**Test Requirements**: +- Sensor evaluation tests +- Failure detection tests +- Logging validation tests + +--- + +### Phase 5: Testing & Documentation + +#### T013: Integration Tests - End-to-End Workflow +**Priority**: High | **Duration**: 2-3 hours | **Dependencies**: T007, T010 **Description**: Comprehensive integration tests for complete news domain workflow **Acceptance Criteria**: -- [ ] End-to-end workflow tests from RSS to vector storage -- [ ] Agent integration tests via AgentToolkit -- [ ] Performance tests for daily collection volumes +- [ ] End-to-end workflow test: RSS → Scrape → LLM → Vector → Store +- [ ] RAG query test: Vector similarity search with semantic matching +- [ ] AgentToolkit integration test +- [ ] Performance tests (< 2s queries, < 1s vector search) - [ ] Error recovery and fallback tests - [ ] Test coverage maintained above 85% **Implementation Details**: ```python # tests/domains/news/integration/test_news_workflow.py -class TestNewsWorkflowIntegration: - - @pytest.mark.asyncio - async def test_complete_news_processing_pipeline(self, test_db, mock_openrouter): - """Test complete pipeline from RSS to vector storage""" - # Setup - config = TradingAgentsConfig.from_test_config() - repository = NewsRepository(test_db) - service = NewsService(repository, config) - - # Mock OpenRouter responses - mock_openrouter.sentiment_response = { - "score": 0.7, - "confidence": 0.85, - "label": "positive" - } - mock_openrouter.embeddings_response = [[0.1] * 1536] - - # Execute pipeline - articles = await service.collect_and_process_news(["AAPL"]) - - # Verify results - assert len(articles) > 0 - assert all(a.sentiment_score is not None for a in articles) - assert all(a.title_embedding is not None for a in articles) - - # Verify database storage - stored_articles = await repository.get_articles_by_symbol("AAPL") - assert len(stored_articles) == len(articles) - - # Test vector similarity search - similar = await repository.find_similar_articles( - articles[0].title_embedding, limit=5 - ) - assert len(similar) > 0 - - @pytest.mark.asyncio - async def test_agent_toolkit_integration(self, test_db): - """Test integration with AgentToolkit for RAG queries""" - from tradingagents.agents.libs.toolkit import AgentToolkit - - # Setup with real data - toolkit = AgentToolkit(test_db) - - # Test news context retrieval - context = await toolkit.get_news_context("AAPL", days=7) - assert "articles" in context - assert "sentiment_summary" in context - - # Test vector similarity for context - similar_context = await toolkit.get_similar_news( - "Apple earnings beat expectations", limit=5 - ) - assert len(similar_context) <= 5 - - @pytest.mark.asyncio - async def test_scheduler_integration(self, test_db): - """Test APScheduler integration with job management""" - config = TradingAgentsConfig.from_test_config() - repository = NewsRepository(test_db) - service = NewsService(repository, config) - scheduler = ScheduledNewsCollector(service, repository, config) - - # Create test job configuration - job_config = NewsJobConfig( - name="test_job", - symbols=["AAPL"], - frequency_cron="0 */6 * * *", # Every 6 hours - enabled=True - ) - await repository.create_job_config(job_config) - - # Test scheduler initialization - await scheduler.initialize_scheduler() - await scheduler.load_job_configurations() - - # Verify job was scheduled - assert scheduler.scheduler.get_job(f"news_collection_{job_config.id}") is not None - - # Test manual job execution - await scheduler._execute_news_collection(job_config) - - # Verify execution updated last_run - updated_config = await repository.get_job_config(job_config.id) - assert updated_config.last_run is not None - - @pytest.mark.asyncio - async def test_error_recovery_and_fallbacks(self, test_db): - """Test error handling and fallback mechanisms""" - config = TradingAgentsConfig.from_test_config() - repository = NewsRepository(test_db) - service = NewsService(repository, config) - - # Test with failing LLM client - with patch.object(service.sentiment_client, 'analyze_sentiment', side_effect=Exception("API Error")): - articles = await service.collect_and_process_news(["AAPL"]) - - # Should still process articles with fallback - assert len(articles) > 0 - # Should have fallback sentiment values - assert any(a.sentiment_score is not None for a in articles) - - @pytest.mark.asyncio - async def test_performance_benchmarks(self, test_db): - """Test performance meets requirements""" - config = TradingAgentsConfig.from_test_config() - repository = NewsRepository(test_db) - - # Create test articles with embeddings - test_articles = await self._create_test_articles_with_embeddings(repository, count=1000) - - # Test query performance (< 100ms requirement) - start_time = time.time() - articles = await repository.get_recent_articles_by_symbol("AAPL", days=30) - query_time = (time.time() - start_time) * 1000 - - assert query_time < 100, f"Query took {query_time}ms, should be < 100ms" - - # Test vector similarity performance (< 1s requirement) - start_time = time.time() - similar = await repository.find_similar_articles( - test_articles[0].title_embedding, limit=10 - ) - vector_time = (time.time() - start_time) * 1000 - - assert vector_time < 1000, f"Vector search took {vector_time}ms, should be < 1s" + +@pytest.mark.asyncio +async def test_complete_news_pipeline_end_to_end(test_db_manager): + """Test complete pipeline: RSS → Scrape → LLM → Vector → Store.""" + config = TradingAgentsConfig.from_test_env() + service = NewsService.build(test_db_manager, config) + + # Execute full pipeline + result = await service.update_company_news("AAPL") + + # Verify results + assert result.status == "completed" + assert result.articles_scraped > 0 + + # Verify database storage + articles = await service.repository.list_by_date_range( + symbol="AAPL", + start_date=date.today(), + end_date=date.today() + ) + + assert len(articles) > 0 + + # Verify LLM enrichment + for article in articles: + assert article.sentiment_score is not None + assert article.sentiment_confidence is not None + assert article.title_embedding is not None + assert len(article.title_embedding) == 1536 + +@pytest.mark.asyncio +async def test_rag_vector_similarity_search(test_db_manager): + """Test RAG vector similarity search functionality.""" + service = NewsService.build(test_db_manager, TradingAgentsConfig.from_test_env()) + + # Find similar articles + similar_articles = await service.find_similar_news( + query_text="Apple earnings beat expectations", + symbol="AAPL", + limit=5 + ) + + assert len(similar_articles) <= 5 + # Verify articles are relevant (high similarity scores) + +@pytest.mark.asyncio +async def test_performance_benchmarks(test_db_manager): + """Test performance meets requirements.""" + repository = NewsRepository(test_db_manager) + + # Test query performance (< 2s requirement) + start_time = time.time() + articles = await repository.list_by_date_range( + symbol="AAPL", + start_date=date.today() - timedelta(days=30), + end_date=date.today() + ) + query_time = time.time() - start_time + + assert query_time < 2.0, f"Query took {query_time}s, should be < 2s" + + # Test vector similarity performance (< 1s requirement) + test_embedding = [0.1] * 1536 + start_time = time.time() + similar = await repository.find_similar_articles(test_embedding, limit=10) + vector_time = time.time() - start_time + + assert vector_time < 1.0, f"Vector search took {vector_time}s, should be < 1s" ``` **Files to Create**: - `/Users/martinrichards/code/TradingAgents/tests/domains/news/integration/test_news_workflow.py` **Test Requirements**: -- Full workflow integration tests -- AgentToolkit integration tests -- Performance benchmark tests -- Error scenario tests +- All integration tests pass +- Performance benchmarks met +- Test coverage > 85% --- -#### T011: Documentation and Monitoring -**Priority**: Medium | **Duration**: 1-2 hours | **Dependencies**: T010 +#### T014: Dagster Tests +**Priority**: Medium | **Duration**: 1 hour | **Dependencies**: T010, T011 -**Description**: Update documentation and add monitoring for new functionality +**Description**: Unit tests for Dagster ops, jobs, and schedules **Acceptance Criteria**: -- [ ] Updated API documentation for new methods -- [ ] Job scheduling configuration examples -- [ ] Performance monitoring dashboard queries +- [ ] Op execution tests with mocked resources +- [ ] Job execution tests +- [ ] Schedule evaluation tests +- [ ] Error handling tests +- [ ] All Dagster components tested + +**Implementation Details**: +```python +# tests/data/ops/test_news_ops.py +from dagster import build_op_context +from tradingagents.data.ops.news_ops import collect_news_for_symbol + +def test_collect_news_for_symbol_op(): + """Test Dagster op for news collection.""" + context = build_op_context( + resources={"database_manager": mock_database_manager} + ) + + result = collect_news_for_symbol(context, "AAPL") + + assert result["symbol"] == "AAPL" + assert result["status"] == "completed" + assert result["articles_found"] >= 0 + +# tests/data/jobs/test_news_collection.py +from dagster import execute_in_process +from tradingagents.data.jobs.news_collection import news_collection_daily + +def test_news_collection_daily_job(): + """Test Dagster job execution.""" + result = execute_in_process( + news_collection_daily, + run_config={ + "ops": { + "get_symbols_to_collect": { + "config": {"symbols": ["AAPL"]} + } + } + } + ) + + assert result.success +``` + +**Files to Create**: +- `/Users/martinrichards/code/TradingAgents/tests/data/ops/test_news_ops.py` +- `/Users/martinrichards/code/TradingAgents/tests/data/jobs/test_news_collection.py` +- `/Users/martinrichards/code/TradingAgents/tests/data/schedules/test_news_schedules.py` + +**Test Requirements**: +- All Dagster tests pass +- Coverage > 85% for Dagster code + +--- + +#### T015: Documentation Updates +**Priority**: Medium | **Duration**: 1-2 hours | **Dependencies**: T013, T014 + +**Description**: Update documentation and monitoring for new functionality + +**Acceptance Criteria**: +- [ ] Update API documentation for new methods +- [ ] Dagster job configuration examples +- [ ] Performance monitoring queries - [ ] Troubleshooting guide for common issues -- [ ] Agent integration documentation +- [ ] AgentToolkit integration documentation +- [ ] README updates **Files to Modify**: - `/Users/martinrichards/code/TradingAgents/docs/domains/news.md` - `/Users/martinrichards/code/TradingAgents/docs/api-reference.md` +- `/Users/martinrichards/code/TradingAgents/README.md` **Test Requirements**: - Documentation accuracy validation - Configuration example testing +- Link validation --- @@ -963,57 +978,44 @@ class TestNewsWorkflowIntegration: **Tasks T005 & T006** can be developed in parallel: - Both are independent OpenRouter client implementations - Different LLM capabilities (sentiment vs embeddings) -- Can be tested independently with VCR cassettes +- Can be tested independently with pytest-vcr -**Phase 1 Tasks (T001, T002, T003)** have minimal dependencies: -- T002 and T003 both depend on T001 but can be developed simultaneously -- Entity layer changes are independent of each other +**Tasks T009, T010, T011** can be developed in parallel after T008: +- Ops, jobs, and schedules are independent components +- Can be tested separately +- Integration testing happens in T014 ### Critical Path Analysis -**Critical Path**: T001 → T002/T003 → T004 → T005/T006 → T007 → T008 +**Critical Path**: T001 → T002 → T003 → T007 → T009 → T010 → T013 -**Parallel Opportunities**: -1. **Foundation Phase**: T002 + T003 (after T001) -2. **LLM Integration**: T005 + T006 (after T002) +**Parallel Branches**: +1. **LLM Clients**: T005 + T006 (parallel with T003-T004) +2. **Dagster Components**: T009 + T010 + T011 (after T008) 3. **Testing**: Unit tests alongside implementation -### Risk Mitigation Strategies - -**LLM API Dependencies**: -- Implement comprehensive fallback strategies -- Use VCR for deterministic testing -- Mock clients for unit tests - -**Database Performance**: -- Test with realistic data volumes -- Monitor query performance during development -- Use proper indexes for vector operations - -**Integration Complexity**: -- Build incrementally with testing at each step -- Maintain backward compatibility -- Use feature flags for gradual rollout - --- ## Success Metrics **Technical Metrics**: - Test coverage >85% maintained -- Query performance <100ms -- Vector search performance <1s +- Query performance <2s for 30-day lookback +- Vector search performance <1s for top-10 results - Zero breaking changes to AgentToolkit +- Dagster jobs execute successfully **Functional Metrics**: -- Successful OpenRouter-only LLM integration -- Scheduled jobs executing reliably +- OpenRouter LLM sentiment analysis operational +- Vector embeddings enable semantic search +- Dagster schedules running daily without failures - Agent context enriched with sentiment and similarity **Quality Metrics**: -- All acceptance criteria met -- Comprehensive error handling -- Production-ready monitoring and documentation +- All acceptance criteria met for each task +- Comprehensive error handling and fallbacks +- Production-ready monitoring via Dagster UI +- Complete documentation for all new features --- @@ -1023,17 +1025,39 @@ class TestNewsWorkflowIntegration: **Every task follows**: Write test → Write code → Refactor ### Layered Architecture Pattern -**Strict adherence to**: Database → Entity → Repository → Service → Scheduling +**Strict adherence to**: Entity → Repository → Service → Dagster Op → Dagster Job ### Error Handling Strategy -**Graceful fallbacks** for all LLM API dependencies +**Graceful fallbacks** for all LLM API dependencies (keyword sentiment, zero vectors) -### Performance Requirements +### Performance Requirements **Async operations** with proper connection pooling throughout ### Testing Strategy -**Unit tests + Integration tests + VCR** for external API calls +**Unit tests + Integration tests + pytest-vcr** for external API calls --- -This comprehensive task breakdown provides clear implementation guidance for completing the final 5% of the news domain while maintaining architectural consistency and leveraging AI-assisted development patterns. \ No newline at end of file +## Risk Mitigation Strategies + +### LLM API Dependencies +- Implement comprehensive fallback strategies +- Use pytest-vcr for deterministic testing +- Mock clients for unit tests +- Monitor API costs and rate limits + +### Database Performance +- Test with realistic data volumes +- Monitor query performance during development +- Use proper indexes for vector operations +- Regular performance profiling + +### Dagster Integration +- Start with simple ops and jobs +- Test incrementally before full integration +- Use Dagster UI for debugging +- Implement comprehensive logging + +--- + +This comprehensive task breakdown provides clear implementation guidance for completing the final 5% of the news domain while maintaining architectural consistency with Dagster orchestration and leveraging AI-assisted development patterns. diff --git a/pyproject.toml b/pyproject.toml index 19f30081..7426ded1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,8 @@ dependencies = [ "alembic>=1.13.0", "pgvector>=0.4.1", "uuid-utils>=0.11.0", + "dagster>=1.8.0", + "dagster-postgres>=0.24.0", ] [project.optional-dependencies] @@ -60,6 +62,10 @@ tradingagents = "cli.main:app" requires = ["setuptools>=61.0", "wheel"] build-backend = "setuptools.build_meta" +[tool.setuptools.packages.find] +include = ["tradingagents*", "cli*"] +exclude = ["docker*", "assets*", "alembic*", "docs*", "tests*"] + [tool.ruff] line-length = 88 target-version = "py310" diff --git a/tests/domains/news/test_news_repository.py b/tests/domains/news/test_news_repository.py index d557c74f..44b35436 100644 --- a/tests/domains/news/test_news_repository.py +++ b/tests/domains/news/test_news_repository.py @@ -388,6 +388,172 @@ class TestNewsRepository: assert result == [] +class TestNewsArticleSentimentFields: + """Test suite for new sentiment fields in NewsArticle.""" + + def test_news_article_with_sentiment_fields(self): + """Test dataclass instantiation with new sentiment fields.""" + # Arrange & Act + article = NewsArticle( + headline="Test Article", + url="https://example.com/test", + source="Test Source", + published_date=date(2024, 1, 15), + sentiment_score=0.8, + sentiment_confidence=0.95, + sentiment_label="positive", + ) + + # Assert + assert article.sentiment_score == 0.8 + assert article.sentiment_confidence == 0.95 + assert article.sentiment_label == "positive" + + async def test_news_article_to_entity_includes_sentiment_fields( + self, test_db_manager + ): + """Test to_entity() maps new sentiment fields correctly.""" + # Arrange + article = NewsArticle( + headline="Test Article", + url="https://example.com/test", + source="Test Source", + published_date=date(2024, 1, 15), + sentiment_score=0.75, + sentiment_confidence=0.88, + sentiment_label="positive", + ) + + # Act + entity = article.to_entity(symbol="TEST") + + # Assert + assert entity.sentiment_score == 0.75 + assert entity.sentiment_confidence == 0.88 + assert entity.sentiment_label == "positive" + + async def test_news_article_from_entity_includes_sentiment_fields(self, repository): + """Test from_entity() populates new sentiment fields correctly.""" + # Arrange - Create an article with sentiment fields + article = NewsArticle( + headline="Test Article", + url="https://example.com/test-from-entity", + source="Test Source", + published_date=date(2024, 1, 15), + sentiment_score=0.65, + sentiment_confidence=0.92, + sentiment_label="negative", + ) + + # Act - Store and retrieve + await repository.upsert(article, symbol="TEST") + retrieved_articles = await repository.list("TEST", date(2024, 1, 15)) + + # Assert + assert len(retrieved_articles) == 1 + retrieved = retrieved_articles[0] + assert retrieved.sentiment_score == 0.65 + assert retrieved.sentiment_confidence == 0.92 + assert retrieved.sentiment_label == "negative" + + def test_has_reliable_sentiment_with_valid_confidence(self): + """Test has_reliable_sentiment() returns True when confidence >= 0.6.""" + # Arrange + article = NewsArticle( + headline="Test Article", + url="https://example.com/test", + source="Test Source", + published_date=date(2024, 1, 15), + sentiment_score=0.8, + sentiment_confidence=0.6, # Exactly at threshold + ) + + # Act & Assert + assert article.has_reliable_sentiment() is True + + # Test with higher confidence + article.sentiment_confidence = 0.95 + assert article.has_reliable_sentiment() is True + + def test_has_reliable_sentiment_with_low_confidence(self): + """Test has_reliable_sentiment() returns False when confidence < 0.6.""" + # Arrange + article = NewsArticle( + headline="Test Article", + url="https://example.com/test", + source="Test Source", + published_date=date(2024, 1, 15), + sentiment_score=0.8, + sentiment_confidence=0.59, # Just below threshold + ) + + # Act & Assert + assert article.has_reliable_sentiment() is False + + # Test with very low confidence + article.sentiment_confidence = 0.1 + assert article.has_reliable_sentiment() is False + + def test_has_reliable_sentiment_with_none_values(self): + """Test has_reliable_sentiment() returns False when fields are None.""" + # Arrange - Article with no sentiment data + article = NewsArticle( + headline="Test Article", + url="https://example.com/test", + source="Test Source", + published_date=date(2024, 1, 15), + ) + + # Act & Assert + assert article.has_reliable_sentiment() is False + + # Test with only sentiment_score + article.sentiment_score = 0.8 + assert article.has_reliable_sentiment() is False + + # Test with only sentiment_confidence + article.sentiment_score = None + article.sentiment_confidence = 0.9 + assert article.has_reliable_sentiment() is False + + async def test_news_article_roundtrip_conversion(self, repository): + """Test to_entity() → from_entity() preserves all fields including new sentiment fields.""" + # Arrange - Create article with all fields including new sentiment fields + original = NewsArticle( + headline="Roundtrip Test Article", + url="https://example.com/roundtrip-test", + source="Test Source", + published_date=date(2024, 1, 15), + summary="Test summary", + entities=["Entity1", "Entity2"], + sentiment_score=0.72, + sentiment_confidence=0.87, + sentiment_label="neutral", + author="Test Author", + category="test-category", + ) + + # Act - Store and retrieve (full roundtrip) + await repository.upsert(original, symbol="TEST") + retrieved_articles = await repository.list("TEST", date(2024, 1, 15)) + + # Assert - All fields preserved + assert len(retrieved_articles) == 1 + retrieved = retrieved_articles[0] + + assert retrieved.headline == original.headline + assert retrieved.url == original.url + assert retrieved.source == original.source + assert retrieved.published_date == original.published_date + assert retrieved.summary == original.summary + assert retrieved.entities == original.entities + assert retrieved.sentiment_score == original.sentiment_score + assert retrieved.sentiment_confidence == original.sentiment_confidence + assert retrieved.sentiment_label == original.sentiment_label + assert retrieved.author == original.author + assert retrieved.category == original.category + + class TestDatabaseConnectionManagement: """Test database connection and session management.""" diff --git a/tradingagents/config.py b/tradingagents/config.py index a36aff6b..ec557ae4 100644 --- a/tradingagents/config.py +++ b/tradingagents/config.py @@ -46,6 +46,13 @@ class TradingAgentsConfig: default_lookback_days: int = 30 default_ta_lookback_days: int = 30 + # Database settings + database_url: str = field( + default_factory=lambda: os.getenv( + "DATABASE_URL", "postgresql://localhost:5432/tradingagents" + ) + ) + def __post_init__(self): """Set computed fields after initialization.""" self.data_cache_dir = os.path.join(self.project_dir, "dataflows/data_cache") @@ -85,6 +92,9 @@ class TradingAgentsConfig: online_tools=os.getenv("ONLINE_TOOLS", "true").lower() == "true", default_lookback_days=int(os.getenv("DEFAULT_LOOKBACK_DAYS", "30")), default_ta_lookback_days=int(os.getenv("DEFAULT_TA_LOOKBACK_DAYS", "30")), + database_url=os.getenv( + "DATABASE_URL", "postgresql://localhost:5432/tradingagents" + ), ) def to_dict(self) -> dict: @@ -104,6 +114,7 @@ class TradingAgentsConfig: "online_tools": self.online_tools, "default_lookback_days": self.default_lookback_days, "default_ta_lookback_days": self.default_ta_lookback_days, + "database_url": self.database_url, } def copy(self) -> "TradingAgentsConfig": @@ -122,6 +133,7 @@ class TradingAgentsConfig: online_tools=self.online_tools, default_lookback_days=self.default_lookback_days, default_ta_lookback_days=self.default_ta_lookback_days, + database_url=self.database_url, ) diff --git a/tradingagents/domains/news/news_repository.py b/tradingagents/domains/news/news_repository.py index 876ab2ea..c344289d 100644 --- a/tradingagents/domains/news/news_repository.py +++ b/tradingagents/domains/news/news_repository.py @@ -46,6 +46,8 @@ class NewsArticle: summary: str | None = None entities: list[str] = field(default_factory=list) sentiment_score: float | None = None + sentiment_confidence: float | None = None # New field + sentiment_label: str | None = None # New field author: str | None = None category: str | None = None @@ -59,6 +61,8 @@ class NewsArticle: summary=self.summary, entities=self.entities if self.entities else None, sentiment_score=self.sentiment_score, + sentiment_confidence=self.sentiment_confidence, + sentiment_label=self.sentiment_label, author=self.author, category=self.category, symbol=symbol, @@ -77,10 +81,28 @@ class NewsArticle: summary=cast("str | None", entity.summary), entities=cast("list[str] | None", entity.entities) or [], sentiment_score=cast("float | None", entity.sentiment_score), + sentiment_confidence=cast("float | None", entity.sentiment_confidence), + sentiment_label=cast("str | None", entity.sentiment_label), author=cast("str | None", entity.author), category=cast("str | None", entity.category), ) + def has_reliable_sentiment(self) -> bool: + """ + Check if the article has reliable sentiment data. + + Returns True when sentiment_score is not None AND sentiment_confidence is not None + AND sentiment_confidence >= 0.6 + + Returns: + bool: True if sentiment data is reliable, False otherwise + """ + return bool( + self.sentiment_score is not None + and self.sentiment_confidence is not None + and self.sentiment_confidence >= 0.6 + ) + class NewsArticleEntity(Base): """SQLAlchemy model for news articles with vector embedding support.""" @@ -113,6 +135,12 @@ class NewsArticleEntity(Base): JSON, nullable=True ) # Store list[str] as JSON array sentiment_score: Mapped[float | None] = mapped_column(Float, nullable=True) + sentiment_confidence: Mapped[float | None] = mapped_column( + Float, nullable=True + ) # New field + sentiment_label: Mapped[str | None] = mapped_column( + String(50), nullable=True + ) # New field author: Mapped[str | None] = mapped_column(String(255), nullable=True) category: Mapped[str | None] = mapped_column(String(100), nullable=True) @@ -227,6 +255,8 @@ class NewsRepository: "summary": article.summary, "entities": article.entities if article.entities else None, "sentiment_score": article.sentiment_score, + "sentiment_confidence": article.sentiment_confidence, + "sentiment_label": article.sentiment_label, "author": article.author, "category": article.category, "symbol": symbol, @@ -243,6 +273,8 @@ class NewsRepository: "summary": stmt.excluded.summary, "entities": stmt.excluded.entities, "sentiment_score": stmt.excluded.sentiment_score, + "sentiment_confidence": stmt.excluded.sentiment_confidence, + "sentiment_label": stmt.excluded.sentiment_label, "author": stmt.excluded.author, "category": stmt.excluded.category, "symbol": stmt.excluded.symbol, @@ -370,6 +402,8 @@ class NewsRepository: "summary": article.summary, "entities": article.entities if article.entities else None, "sentiment_score": article.sentiment_score, + "sentiment_confidence": article.sentiment_confidence, + "sentiment_label": article.sentiment_label, "author": article.author, "category": article.category, "symbol": symbol, @@ -388,6 +422,8 @@ class NewsRepository: "summary": stmt.excluded.summary, "entities": stmt.excluded.entities, "sentiment_score": stmt.excluded.sentiment_score, + "sentiment_confidence": stmt.excluded.sentiment_confidence, + "sentiment_label": stmt.excluded.sentiment_label, "author": stmt.excluded.author, "category": stmt.excluded.category, "symbol": stmt.excluded.symbol, diff --git a/tradingagents/lib/database.py b/tradingagents/lib/database.py index 85e2e0c9..cdef8c29 100644 --- a/tradingagents/lib/database.py +++ b/tradingagents/lib/database.py @@ -156,8 +156,10 @@ class DatabaseManager: def create_test_database_manager() -> DatabaseManager: """Create a test database manager for tests.""" - # Use a test database URL with credentials - test_db_url = "postgresql://postgres:postgres@localhost:5432/tradingagents_test" + # Use a test database URL with credentials matching docker setup + test_db_url = ( + "postgresql://postgres:tradingagents@localhost:5432/tradingagents_test" + ) # Create a test-specific database manager with NullPool db_manager = DatabaseManager(test_db_url) diff --git a/uv.lock b/uv.lock index 27ad685d..ecc2d09e 100644 --- a/uv.lock +++ b/uv.lock @@ -111,6 +111,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/b9/6ffb48e82c5e97b03cecee872d134a6b6666c2767b2d32ed709f3a60a8fe/anthropic-0.54.0-py3-none-any.whl", hash = "sha256:c1062a0a905daeec17ca9c06c401e4b3f24cb0495841d29d752568a1d4018d56", size = 288774, upload-time = "2025-06-11T02:46:25.578Z" }, ] +[[package]] +name = "antlr4-python3-runtime" +version = "4.13.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/33/5f/2cdf6f7aca3b20d3f316e9f505292e1f256a32089bd702034c29ebde6242/antlr4_python3_runtime-4.13.2.tar.gz", hash = "sha256:909b647e1d2fc2b70180ac586df3933e38919c85f98ccc656a96cd3f25ef3916", size = 117467, upload-time = "2024-08-03T19:00:12.757Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/89/03/a851e84fcbb85214dc637b6378121ef9a0dd61b4c65264675d8a5c9b1ae7/antlr4_python3_runtime-4.13.2-py3-none-any.whl", hash = "sha256:fe3835eb8d33daece0e799090eda89719dbccee7aa39ef94eed3818cafa5a7e8", size = 144462, upload-time = "2024-08-03T19:00:11.134Z" }, +] + [[package]] name = "anyio" version = "4.9.0" @@ -456,14 +465,14 @@ wheels = [ [[package]] name = "coloredlogs" -version = "15.0.1" +version = "14.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "humanfriendly" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/cc/c7/eed8f27100517e8c0e6b923d5f0845d0cb99763da6fdee00478f91db7325/coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0", size = 278520, upload-time = "2021-06-11T10:22:45.202Z" } +sdist = { url = "https://files.pythonhosted.org/packages/84/1b/1ecdd371fa68839cfbda15cc671d0f6c92d2c42688df995a9bf6e36f3511/coloredlogs-14.0.tar.gz", hash = "sha256:a1fab193d2053aa6c0a97608c4342d031f1f93a3d1218432c59322441d31a505", size = 275863, upload-time = "2020-02-16T20:51:12.172Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", size = 46018, upload-time = "2021-06-11T10:22:42.561Z" }, + { url = "https://files.pythonhosted.org/packages/5c/2f/12747be360d6dea432e7b5dfae3419132cb008535cfe614af73b9ce2643b/coloredlogs-14.0-py2.py3-none-any.whl", hash = "sha256:346f58aad6afd48444c2468618623638dadab76e4e70d5e10822676f2d32226a", size = 43888, upload-time = "2020-02-16T20:51:09.712Z" }, ] [[package]] @@ -588,6 +597,85 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" }, ] +[[package]] +name = "dagster" +version = "1.12.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "alembic" }, + { name = "antlr4-python3-runtime" }, + { name = "click" }, + { name = "coloredlogs" }, + { name = "dagster-pipes" }, + { name = "dagster-shared" }, + { name = "docstring-parser" }, + { name = "filelock" }, + { name = "grpcio" }, + { name = "grpcio-health-checking" }, + { name = "jinja2" }, + { name = "protobuf" }, + { name = "psutil", marker = "sys_platform == 'win32'" }, + { name = "python-dotenv" }, + { name = "pytz" }, + { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "requests" }, + { name = "rich" }, + { name = "setuptools" }, + { name = "six" }, + { name = "sqlalchemy" }, + { name = "structlog" }, + { name = "tabulate" }, + { name = "tomli" }, + { name = "toposort" }, + { name = "tqdm" }, + { name = "tzdata" }, + { name = "universal-pathlib" }, + { name = "watchdog" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/db/59/d388dfbe5e7dfc0716a1f61f47014ec4373d37dbf175590a46bebf1a1b5a/dagster-1.12.2.tar.gz", hash = "sha256:cce66b20d5dd185b6d0415c495e1b40edc4e7f7f222e842e7706f67432d0c7ee", size = 1558635, upload-time = "2025-11-13T20:37:43.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/75/38/cb6461dfacfa2265a642db5adee86f3d022ca7094151154693911d356032/dagster-1.12.2-py3-none-any.whl", hash = "sha256:52b2b8873ba552d34bec0a5e31de646d8c56bef270fb020fc4a3729a0f0278a0", size = 1942153, upload-time = "2025-11-13T20:37:39.778Z" }, +] + +[[package]] +name = "dagster-pipes" +version = "1.12.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1e/14/8103379523a9f3ef479683f66ee4f5ae695466fd17f506bcaddece3b4a05/dagster_pipes-1.12.2.tar.gz", hash = "sha256:7fb4c42c2fb97acc2fcc04a2b69b18091f4fe6678665800f875f2deed17a7e21", size = 21056, upload-time = "2025-11-13T20:37:51.189Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/89/ce/2a603b4a448989c111a0b62fef922709ddef617b3678c4c0a76f620ada5a/dagster_pipes-1.12.2-py3-none-any.whl", hash = "sha256:a95ec64e1a6b91023a6c2fe4c0694b49afe1c8eda856c20dd712fb3160672ae1", size = 20829, upload-time = "2025-11-13T20:37:48.506Z" }, +] + +[[package]] +name = "dagster-postgres" +version = "0.28.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dagster" }, + { name = "psycopg2-binary" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2e/90/bfb950930f42bb70d69879ae3da213806d95d578fae95f3fb4d9cb5cc1b3/dagster_postgres-0.28.2.tar.gz", hash = "sha256:f9b6403f836f63a47d3a3af6a5b9f2c8298d706fe738884c57d818a16a0c8f2d", size = 16409, upload-time = "2025-11-13T20:43:07.407Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ac/63/c82afecf71746a231f38bc927574d4d117c2bc88c271a8a5968cf8b45fe1/dagster_postgres-0.28.2-py3-none-any.whl", hash = "sha256:536f1c2c282634392a993f017d4854fb2f49308483c34b082d9959d7f826ba70", size = 22940, upload-time = "2025-11-13T20:43:06.258Z" }, +] + +[[package]] +name = "dagster-shared" +version = "1.12.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, + { name = "platformdirs" }, + { name = "pydantic" }, + { name = "pyyaml" }, + { name = "tomlkit" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/db/1f/280c461ecc8a6664d92533a567e9f8fad1d1dd6b7da59f80efb88ecb662b/dagster_shared-1.12.2.tar.gz", hash = "sha256:4e20354bc15df717a7546a7561f36dd6e954bd1262010ccef2dabefcec4db908", size = 77846, upload-time = "2025-11-13T20:40:49.195Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/88/6da6fbbc81c0e9afe835eff60964ef516219f08b390c0424f731b7a677a7/dagster_shared-1.12.2-py3-none-any.whl", hash = "sha256:b006e78adc4be46818e5c05e9401203172a984f4921770c4a22b5cdcd8d61e45", size = 91000, upload-time = "2025-11-13T20:40:47.83Z" }, +] + [[package]] name = "dataclasses-json" version = "0.6.7" @@ -622,6 +710,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, ] +[[package]] +name = "docstring-parser" +version = "0.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442, upload-time = "2025-07-21T07:35:01.868Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" }, +] + [[package]] name = "durationpy" version = "0.10" @@ -913,6 +1010,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/35/347db7d2e7674b621afd21b12022e7f48c7b0861b5577134b4e939536141/grpcio-1.73.0-cp313-cp313-win_amd64.whl", hash = "sha256:38cf518cc54cd0c47c9539cefa8888549fcc067db0b0c66a46535ca8032020c4", size = 4335872, upload-time = "2025-06-09T10:04:29.032Z" }, ] +[[package]] +name = "grpcio-health-checking" +version = "1.71.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "grpcio" }, + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/53/86/20994347ef36b7626fb74539f13128100dd8b7eaac67efc063264e6cdc80/grpcio_health_checking-1.71.2.tar.gz", hash = "sha256:1c21ece88c641932f432b573ef504b20603bdf030ad4e1ec35dd7fdb4ea02637", size = 16770, upload-time = "2025-06-28T04:24:08.768Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1a/74/7bc6ab96bf1083cab2684f9c3ae434caa638de3d5c5574e8435e2c146598/grpcio_health_checking-1.71.2-py3-none-any.whl", hash = "sha256:f91db41410d6bd18a7828c5b6ac2bebd77a63483263cbe42bf3c0c9b86cece33", size = 18918, upload-time = "2025-06-28T04:23:56.923Z" }, +] + [[package]] name = "grpcio-status" version = "1.71.0" @@ -2636,6 +2746,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/18/35d1d947553d24909dca37e2ff11720eecb601360d1bac8d7a9a1bc7eb08/parsel-1.10.0-py2.py3-none-any.whl", hash = "sha256:6a0c28bd81f9df34ba665884c88efa0b18b8d2c44c81f64e27f2f0cb37d46169", size = 17266, upload-time = "2025-01-17T15:38:27.83Z" }, ] +[[package]] +name = "pathlib-abc" +version = "0.5.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/cb/448649d7f25d228bf0be3a04590ab7afa77f15e056f8fa976ed05ec9a78f/pathlib_abc-0.5.2.tar.gz", hash = "sha256:fcd56f147234645e2c59c7ae22808b34c364bb231f685ddd9f96885aed78a94c", size = 33342, upload-time = "2025-10-10T18:37:20.524Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b1/29/c028a0731e202035f0e2e0bfbf1a3e46ad6c628cbb17f6f1cc9eea5d9ff1/pathlib_abc-0.5.2-py3-none-any.whl", hash = "sha256:4c9d94cf1b23af417ce7c0417b43333b06a106c01000b286c99de230d95eefbb", size = 19070, upload-time = "2025-10-10T18:37:19.437Z" }, +] + [[package]] name = "peewee" version = "3.18.1" @@ -2824,6 +2943,50 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7e/cc/7e77861000a0691aeea8f4566e5d3aa716f2b1dece4a24439437e41d3d25/protobuf-5.29.5-py3-none-any.whl", hash = "sha256:6cf42630262c59b2d8de33954443d94b746c952b01434fc58a417fdbd2e84bd5", size = 172823, upload-time = "2025-05-28T23:51:58.157Z" }, ] +[[package]] +name = "psutil" +version = "7.1.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e1/88/bdd0a41e5857d5d703287598cbf08dad90aed56774ea52ae071bae9071b6/psutil-7.1.3.tar.gz", hash = "sha256:6c86281738d77335af7aec228328e944b30930899ea760ecf33a4dba66be5e74", size = 489059, upload-time = "2025-11-02T12:25:54.619Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/82/62d68066e13e46a5116df187d319d1724b3f437ddd0f958756fc052677f4/psutil-7.1.3-cp313-cp313t-win_amd64.whl", hash = "sha256:18349c5c24b06ac5612c0428ec2a0331c26443d259e2a0144a9b24b4395b58fa", size = 249642, upload-time = "2025-11-02T12:26:07.447Z" }, + { url = "https://files.pythonhosted.org/packages/df/ad/c1cd5fe965c14a0392112f68362cfceb5230819dbb5b1888950d18a11d9f/psutil-7.1.3-cp313-cp313t-win_arm64.whl", hash = "sha256:c525ffa774fe4496282fb0b1187725793de3e7c6b29e41562733cae9ada151ee", size = 245518, upload-time = "2025-11-02T12:26:09.719Z" }, + { url = "https://files.pythonhosted.org/packages/0f/1d/5774a91607035ee5078b8fd747686ebec28a962f178712de100d00b78a32/psutil-7.1.3-cp314-cp314t-win_amd64.whl", hash = "sha256:3792983e23b69843aea49c8f5b8f115572c5ab64c153bada5270086a2123c7e7", size = 250466, upload-time = "2025-11-02T12:26:21.183Z" }, + { url = "https://files.pythonhosted.org/packages/00/ca/e426584bacb43a5cb1ac91fae1937f478cd8fbe5e4ff96574e698a2c77cd/psutil-7.1.3-cp314-cp314t-win_arm64.whl", hash = "sha256:31d77fcedb7529f27bb3a0472bea9334349f9a04160e8e6e5020f22c59893264", size = 245756, upload-time = "2025-11-02T12:26:23.148Z" }, + { url = "https://files.pythonhosted.org/packages/55/4c/c3ed1a622b6ae2fd3c945a366e64eb35247a31e4db16cf5095e269e8eb3c/psutil-7.1.3-cp37-abi3-win_amd64.whl", hash = "sha256:f39c2c19fe824b47484b96f9692932248a54c43799a84282cfe58d05a6449efd", size = 247633, upload-time = "2025-11-02T12:26:33.887Z" }, + { url = "https://files.pythonhosted.org/packages/c9/ad/33b2ccec09bf96c2b2ef3f9a6f66baac8253d7565d8839e024a6b905d45d/psutil-7.1.3-cp37-abi3-win_arm64.whl", hash = "sha256:bd0d69cee829226a761e92f28140bec9a5ee9d5b4fb4b0cc589068dbfff559b1", size = 244608, upload-time = "2025-11-02T12:26:36.136Z" }, +] + +[[package]] +name = "psycopg2-binary" +version = "2.9.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ac/6c/8767aaa597ba424643dc87348c6f1754dd9f48e80fdc1b9f7ca5c3a7c213/psycopg2-binary-2.9.11.tar.gz", hash = "sha256:b6aed9e096bf63f9e75edf2581aa9a7e7186d97ab5c177aa6c87797cd591236c", size = 379620, upload-time = "2025-10-10T11:14:48.041Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ff/a8/a2709681b3ac11b0b1786def10006b8995125ba268c9a54bea6f5ae8bd3e/psycopg2_binary-2.9.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b8fb3db325435d34235b044b199e56cdf9ff41223a4b9752e8576465170bb38c", size = 3756572, upload-time = "2025-10-10T11:12:32.873Z" }, + { url = "https://files.pythonhosted.org/packages/62/e1/c2b38d256d0dafd32713e9f31982a5b028f4a3651f446be70785f484f472/psycopg2_binary-2.9.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:366df99e710a2acd90efed3764bb1e28df6c675d33a7fb40df9b7281694432ee", size = 3864529, upload-time = "2025-10-10T11:12:36.791Z" }, + { url = "https://files.pythonhosted.org/packages/11/32/b2ffe8f3853c181e88f0a157c5fb4e383102238d73c52ac6d93a5c8bffe6/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c55b385daa2f92cb64b12ec4536c66954ac53654c7f15a203578da4e78105c0", size = 4411242, upload-time = "2025-10-10T11:12:42.388Z" }, + { url = "https://files.pythonhosted.org/packages/10/04/6ca7477e6160ae258dc96f67c371157776564679aefd247b66f4661501a2/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c0377174bf1dd416993d16edc15357f6eb17ac998244cca19bc67cdc0e2e5766", size = 4468258, upload-time = "2025-10-10T11:12:48.654Z" }, + { url = "https://files.pythonhosted.org/packages/3c/7e/6a1a38f86412df101435809f225d57c1a021307dd0689f7a5e7fe83588b1/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5c6ff3335ce08c75afaed19e08699e8aacf95d4a260b495a4a8545244fe2ceb3", size = 4166295, upload-time = "2025-10-10T11:12:52.525Z" }, + { url = "https://files.pythonhosted.org/packages/f2/7d/c07374c501b45f3579a9eb761cbf2604ddef3d96ad48679112c2c5aa9c25/psycopg2_binary-2.9.11-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:84011ba3109e06ac412f95399b704d3d6950e386b7994475b231cf61eec2fc1f", size = 3983133, upload-time = "2025-10-30T02:55:24.329Z" }, + { url = "https://files.pythonhosted.org/packages/82/56/993b7104cb8345ad7d4516538ccf8f0d0ac640b1ebd8c754a7b024e76878/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ba34475ceb08cccbdd98f6b46916917ae6eeb92b5ae111df10b544c3a4621dc4", size = 3652383, upload-time = "2025-10-10T11:12:56.387Z" }, + { url = "https://files.pythonhosted.org/packages/2d/ac/eaeb6029362fd8d454a27374d84c6866c82c33bfc24587b4face5a8e43ef/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b31e90fdd0f968c2de3b26ab014314fe814225b6c324f770952f7d38abf17e3c", size = 3298168, upload-time = "2025-10-10T11:13:00.403Z" }, + { url = "https://files.pythonhosted.org/packages/2b/39/50c3facc66bded9ada5cbc0de867499a703dc6bca6be03070b4e3b65da6c/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:d526864e0f67f74937a8fce859bd56c979f5e2ec57ca7c627f5f1071ef7fee60", size = 3044712, upload-time = "2025-10-30T02:55:27.975Z" }, + { url = "https://files.pythonhosted.org/packages/9c/8e/b7de019a1f562f72ada81081a12823d3c1590bedc48d7d2559410a2763fe/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04195548662fa544626c8ea0f06561eb6203f1984ba5b4562764fbeb4c3d14b1", size = 3347549, upload-time = "2025-10-10T11:13:03.971Z" }, + { url = "https://files.pythonhosted.org/packages/80/2d/1bb683f64737bbb1f86c82b7359db1eb2be4e2c0c13b947f80efefa7d3e5/psycopg2_binary-2.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:efff12b432179443f54e230fdf60de1f6cc726b6c832db8701227d089310e8aa", size = 2714215, upload-time = "2025-10-10T11:13:07.14Z" }, + { url = "https://files.pythonhosted.org/packages/64/12/93ef0098590cf51d9732b4f139533732565704f45bdc1ffa741b7c95fb54/psycopg2_binary-2.9.11-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:92e3b669236327083a2e33ccfa0d320dd01b9803b3e14dd986a4fc54aa00f4e1", size = 3756567, upload-time = "2025-10-10T11:13:11.885Z" }, + { url = "https://files.pythonhosted.org/packages/7c/a9/9d55c614a891288f15ca4b5209b09f0f01e3124056924e17b81b9fa054cc/psycopg2_binary-2.9.11-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e0deeb03da539fa3577fcb0b3f2554a97f7e5477c246098dbb18091a4a01c16f", size = 3864755, upload-time = "2025-10-10T11:13:17.727Z" }, + { url = "https://files.pythonhosted.org/packages/13/1e/98874ce72fd29cbde93209977b196a2edae03f8490d1bd8158e7f1daf3a0/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b52a3f9bb540a3e4ec0f6ba6d31339727b2950c9772850d6545b7eae0b9d7c5", size = 4411646, upload-time = "2025-10-10T11:13:24.432Z" }, + { url = "https://files.pythonhosted.org/packages/5a/bd/a335ce6645334fb8d758cc358810defca14a1d19ffbc8a10bd38a2328565/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:db4fd476874ccfdbb630a54426964959e58da4c61c9feba73e6094d51303d7d8", size = 4468701, upload-time = "2025-10-10T11:13:29.266Z" }, + { url = "https://files.pythonhosted.org/packages/44/d6/c8b4f53f34e295e45709b7568bf9b9407a612ea30387d35eb9fa84f269b4/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:47f212c1d3be608a12937cc131bd85502954398aaa1320cb4c14421a0ffccf4c", size = 4166293, upload-time = "2025-10-10T11:13:33.336Z" }, + { url = "https://files.pythonhosted.org/packages/4b/e0/f8cc36eadd1b716ab36bb290618a3292e009867e5c97ce4aba908cb99644/psycopg2_binary-2.9.11-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e35b7abae2b0adab776add56111df1735ccc71406e56203515e228a8dc07089f", size = 3983184, upload-time = "2025-10-30T02:55:32.483Z" }, + { url = "https://files.pythonhosted.org/packages/53/3e/2a8fe18a4e61cfb3417da67b6318e12691772c0696d79434184a511906dc/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fcf21be3ce5f5659daefd2b3b3b6e4727b028221ddc94e6c1523425579664747", size = 3652650, upload-time = "2025-10-10T11:13:38.181Z" }, + { url = "https://files.pythonhosted.org/packages/76/36/03801461b31b29fe58d228c24388f999fe814dfc302856e0d17f97d7c54d/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:9bd81e64e8de111237737b29d68039b9c813bdf520156af36d26819c9a979e5f", size = 3298663, upload-time = "2025-10-10T11:13:44.878Z" }, + { url = "https://files.pythonhosted.org/packages/97/77/21b0ea2e1a73aa5fa9222b2a6b8ba325c43c3a8d54272839c991f2345656/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:32770a4d666fbdafab017086655bcddab791d7cb260a16679cc5a7338b64343b", size = 3044737, upload-time = "2025-10-30T02:55:35.69Z" }, + { url = "https://files.pythonhosted.org/packages/67/69/f36abe5f118c1dca6d3726ceae164b9356985805480731ac6712a63f24f0/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c3cb3a676873d7506825221045bd70e0427c905b9c8ee8d6acd70cfcbd6e576d", size = 3347643, upload-time = "2025-10-10T11:13:53.499Z" }, + { url = "https://files.pythonhosted.org/packages/e1/36/9c0c326fe3a4227953dfb29f5d0c8ae3b8eb8c1cd2967aa569f50cb3c61f/psycopg2_binary-2.9.11-cp314-cp314-win_amd64.whl", hash = "sha256:4012c9c954dfaccd28f94e84ab9f94e12df76b4afb22331b1f0d3154893a6316", size = 2803913, upload-time = "2025-10-10T11:13:57.058Z" }, +] + [[package]] name = "pyasn1" version = "0.6.1" @@ -3120,6 +3283,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" }, ] +[[package]] +name = "pywin32" +version = "311" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700, upload-time = "2025-07-14T20:13:26.471Z" }, + { url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700, upload-time = "2025-07-14T20:13:28.243Z" }, + { url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318, upload-time = "2025-07-14T20:13:30.348Z" }, + { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714, upload-time = "2025-07-14T20:13:32.449Z" }, + { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800, upload-time = "2025-07-14T20:13:34.312Z" }, + { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" }, +] + [[package]] name = "pyyaml" version = "6.0.2" @@ -3493,6 +3669,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bd/09/15a60adddee87fb0c9d1a2ed2ba0362a80451b107a77cfc87fbe72b9aac7/stockstats-0.6.5-py2.py3-none-any.whl", hash = "sha256:89a42808a8b0f94f7fa537cee8a097ae61790b3773051a889586d51a1e8c9392", size = 31727, upload-time = "2025-05-18T08:18:51.172Z" }, ] +[[package]] +name = "structlog" +version = "25.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ef/52/9ba0f43b686e7f3ddfeaa78ac3af750292662284b3661e91ad5494f21dbc/structlog-25.5.0.tar.gz", hash = "sha256:098522a3bebed9153d4570c6d0288abf80a031dfdb2048d59a49e9dc2190fc98", size = 1460830, upload-time = "2025-10-27T08:28:23.028Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/45/a132b9074aa18e799b891b91ad72133c98d8042c70f6240e4c5f9dabee2f/structlog-25.5.0-py3-none-any.whl", hash = "sha256:a8453e9b9e636ec59bd9e79bbd4a72f025981b3ba0f5837aebf48f02f37a7f9f", size = 72510, upload-time = "2025-10-27T08:28:21.535Z" }, +] + [[package]] name = "sympy" version = "1.14.0" @@ -3521,6 +3706,15 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/ba/97/a49816dd468a18ee080cf3a04640772a9f6321790d4049cece2490c4b7ad/ta_lib-0.6.4.tar.gz", hash = "sha256:08f55bc5771a6d1ceb1a2b713aad7b05f04eb0061e980c9113571c532d32e9cb", size = 381774, upload-time = "2025-06-08T15:28:15.452Z" } +[[package]] +name = "tabulate" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", size = 81090, upload-time = "2022-10-06T17:21:48.54Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" }, +] + [[package]] name = "tenacity" version = "9.1.2" @@ -3607,6 +3801,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257, upload-time = "2024-11-27T22:38:35.385Z" }, ] +[[package]] +name = "tomlkit" +version = "0.13.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cc/18/0bbf3884e9eaa38819ebe46a7bd25dcd56b67434402b66a58c4b8e552575/tomlkit-0.13.3.tar.gz", hash = "sha256:430cf247ee57df2b94ee3fbe588e71d362a941ebb545dec29b53961d61add2a1", size = 185207, upload-time = "2025-06-05T07:13:44.947Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/75/8539d011f6be8e29f339c42e633aae3cb73bffa95dd0f9adec09b9c58e85/tomlkit-0.13.3-py3-none-any.whl", hash = "sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0", size = 38901, upload-time = "2025-06-05T07:13:43.546Z" }, +] + +[[package]] +name = "toposort" +version = "1.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/69/19/8e955d90985ecbd3b9adb2a759753a6840da2dff3c569d412b2c9217678b/toposort-1.10.tar.gz", hash = "sha256:bfbb479c53d0a696ea7402601f4e693c97b0367837c8898bc6471adfca37a6bd", size = 11132, upload-time = "2023-02-27T13:59:51.834Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f6/17/57b444fd314d5e1593350b9a31d000e7411ba8e17ce12dc7ad54ca76b810/toposort-1.10-py3-none-any.whl", hash = "sha256:cbdbc0d0bee4d2695ab2ceec97fe0679e9c10eab4b2a87a9372b929e70563a87", size = 8500, upload-time = "2023-02-25T20:07:06.538Z" }, +] + [[package]] name = "tqdm" version = "4.67.1" @@ -3684,6 +3896,8 @@ dependencies = [ { name = "backtrader" }, { name = "chainlit" }, { name = "chromadb" }, + { name = "dagster" }, + { name = "dagster-postgres" }, { name = "eodhd" }, { name = "feedparser" }, { name = "finnhub-python" }, @@ -3744,6 +3958,8 @@ requires-dist = [ { name = "backtrader", specifier = ">=1.9.78.123" }, { name = "chainlit", specifier = ">=2.5.5" }, { name = "chromadb", specifier = ">=1.0.12" }, + { name = "dagster", specifier = ">=1.8.0" }, + { name = "dagster-postgres", specifier = ">=0.24.0" }, { name = "eodhd", specifier = ">=1.0.32" }, { name = "feedparser", specifier = ">=6.0.11" }, { name = "finnhub-python", specifier = ">=2.4.23" }, @@ -3870,6 +4086,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" }, ] +[[package]] +name = "universal-pathlib" +version = "0.3.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fsspec" }, + { name = "pathlib-abc" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/76/db/6874223d251a2e146dae57a27ca8cb1f71e7e135aa51ad394173ffe18fc0/universal_pathlib-0.3.6.tar.gz", hash = "sha256:d8640454ff08305fc639f7980e8bad4a7d38e82f6389ff993fb0e7b2a4969de9", size = 249113, upload-time = "2025-11-13T17:05:29.882Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/5d/fc1f5478eb486a59549e0dbea5827633bbba01139b549968d4936154b756/universal_pathlib-0.3.6-py3-none-any.whl", hash = "sha256:ff10a86e5340ad986b6f04847bb64ba397dff7467450234ffa2ab5ff135641d8", size = 78715, upload-time = "2025-11-13T17:05:28.101Z" }, +] + [[package]] name = "update-checker" version = "0.18.0" @@ -4009,6 +4238,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/58/dd/56f0d8af71e475ed194d702f8b4cf9cea812c95e82ad823d239023c6558c/w3lib-2.3.1-py3-none-any.whl", hash = "sha256:9ccd2ae10c8c41c7279cd8ad4fe65f834be894fe7bfdd7304b991fd69325847b", size = 21751, upload-time = "2025-01-27T14:22:09.421Z" }, ] +[[package]] +name = "watchdog" +version = "6.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/db/7d/7f3d619e951c88ed75c6037b246ddcf2d322812ee8ea189be89511721d54/watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282", size = 131220, upload-time = "2024-11-01T14:07:13.037Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/68/98/b0345cabdce2041a01293ba483333582891a3bd5769b08eceb0d406056ef/watchdog-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c", size = 96480, upload-time = "2024-11-01T14:06:42.952Z" }, + { url = "https://files.pythonhosted.org/packages/85/83/cdf13902c626b28eedef7ec4f10745c52aad8a8fe7eb04ed7b1f111ca20e/watchdog-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:76aae96b00ae814b181bb25b1b98076d5fc84e8a53cd8885a318b42b6d3a5134", size = 88451, upload-time = "2024-11-01T14:06:45.084Z" }, + { url = "https://files.pythonhosted.org/packages/fe/c4/225c87bae08c8b9ec99030cd48ae9c4eca050a59bf5c2255853e18c87b50/watchdog-6.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b", size = 89057, upload-time = "2024-11-01T14:06:47.324Z" }, + { url = "https://files.pythonhosted.org/packages/a9/c7/ca4bf3e518cb57a686b2feb4f55a1892fd9a3dd13f470fca14e00f80ea36/watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13", size = 79079, upload-time = "2024-11-01T14:06:59.472Z" }, + { url = "https://files.pythonhosted.org/packages/5c/51/d46dc9332f9a647593c947b4b88e2381c8dfc0942d15b8edc0310fa4abb1/watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379", size = 79078, upload-time = "2024-11-01T14:07:01.431Z" }, + { url = "https://files.pythonhosted.org/packages/d4/57/04edbf5e169cd318d5f07b4766fee38e825d64b6913ca157ca32d1a42267/watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e", size = 79076, upload-time = "2024-11-01T14:07:02.568Z" }, + { url = "https://files.pythonhosted.org/packages/ab/cc/da8422b300e13cb187d2203f20b9253e91058aaf7db65b74142013478e66/watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f", size = 79077, upload-time = "2024-11-01T14:07:03.893Z" }, + { url = "https://files.pythonhosted.org/packages/2c/3b/b8964e04ae1a025c44ba8e4291f86e97fac443bca31de8bd98d3263d2fcf/watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26", size = 79078, upload-time = "2024-11-01T14:07:05.189Z" }, + { url = "https://files.pythonhosted.org/packages/62/ae/a696eb424bedff7407801c257d4b1afda455fe40821a2be430e173660e81/watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c", size = 79077, upload-time = "2024-11-01T14:07:06.376Z" }, + { url = "https://files.pythonhosted.org/packages/b5/e8/dbf020b4d98251a9860752a094d09a65e1b436ad181faf929983f697048f/watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2", size = 79078, upload-time = "2024-11-01T14:07:07.547Z" }, + { url = "https://files.pythonhosted.org/packages/07/f6/d0e5b343768e8bcb4cda79f0f2f55051bf26177ecd5651f84c07567461cf/watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a", size = 79065, upload-time = "2024-11-01T14:07:09.525Z" }, + { url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070, upload-time = "2024-11-01T14:07:10.686Z" }, + { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload-time = "2024-11-01T14:07:11.845Z" }, +] + [[package]] name = "watchfiles" version = "0.20.0"