This commit is contained in:
Martin C. Richards 2025-08-17 17:59:34 +02:00
parent 6a2535f101
commit 873ff99173
26 changed files with 1565 additions and 355 deletions

View File

@ -22,6 +22,18 @@ DEEP_THINK_LLM=claude-3-5-sonnet-20241022
QUICK_THINK_LLM=claude-3-5-haiku-20241022
BACKEND_URL=https://api.anthropic.com/v1
# -----------------------------------------------------------------------------
# Database Configuration
# -----------------------------------------------------------------------------
# PostgreSQL with asyncpg driver for main database
DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/tradingagents
# PostgreSQL for testing (separate database)
TEST_DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/tradingagents_test
# Enable SQL query logging (true/false)
SQL_ECHO=false
# -----------------------------------------------------------------------------
# Data Sources (Optional but Recommended)
# -----------------------------------------------------------------------------

View File

@ -2,6 +2,7 @@
python = "3.13"
uv = "latest"
ruff = "latest"
docker = "latest"
[env]
_.file = ".env"
@ -13,20 +14,28 @@ PYTHONUNBUFFERED = "1"
TRADINGAGENTS_RESULTS_DIR = "./results"
TRADINGAGENTS_DATA_DIR = "./data"
# Database tasks
[tasks.docker]
description = "Start docker containers"
run = "cd docker && docker compose up -d"
[tasks.install]
description = "Install dependencies using uv"
run = "uv sync --dev"
[tasks.dev]
description = "Run the CLI application"
description = "Run the CLI application (with database)"
depends = ["docker"]
run = "uv run python -m cli.main"
[tasks.run]
description = "Run the main application"
description = "Run the main application (with database)"
depends = ["docker"]
run = "uv run python main.py"
[tasks.test]
description = "Run tests with pytest"
description = "Run tests with pytest (with database)"
depends = ["docker"]
run = "uv run pytest"
[tasks.lint]

99
alembic.ini Normal file
View File

@ -0,0 +1,99 @@
# A generic, single database configuration.
[alembic]
# path to migration scripts
script_location = alembic
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
# Uncomment the line below if you want the files to be prepended with date and time
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
# sys.path path, will be prepended to sys.path if present.
# defaults to the current working directory.
prepend_sys_path = .
# timezone to use when rendering the date within the migration file
# as well as the filename.
# If specified, requires the python-dateutil library that can be
# installed by adding `alembic[tz]` to the pip requirements
# string value is passed to dateutil.tz.gettz()
# leave blank for localtime
# timezone =
# max length of characters to apply to the
# "slug" field
# truncate_slug_length = 40
# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false
# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false
# version number format
version_num_format = %04d
# version name template
version_name_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d_%%(rev)s_%%(slug)s
# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8
sqlalchemy.url = postgresql://postgres:postgres@localhost:5432/tradingagents
[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
# on newly generated revision scripts. See the documentation for further
# detail and examples
# format using "black" - use the console_scripts runner, against the "black" entrypoint
# hooks = black
# black.type = console_scripts
# black.entrypoint = black
# black.options = -l 79 REVISION_SCRIPT_FILENAME
# format using "ruff" - use the console_scripts runner, against the "ruff" entrypoint
hooks = ruff
ruff.type = console_scripts
ruff.entrypoint = ruff
ruff.options = format REVISION_SCRIPT_FILENAME
# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
qualname =
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

96
alembic/env.py Normal file
View File

@ -0,0 +1,96 @@
import os
import sys
from logging.config import fileConfig
from sqlalchemy import engine_from_config, pool
from alembic import context
# Add the project root to Python path
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
from tradingagents.domains.news.news_repository import Base
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
# Interpret the config file for Python logging.
# This line sets up loggers basically.
if config.config_file_name is not None:
fileConfig(config.config_file_name)
# add your model's MetaData object here
# for 'autogenerate' support
target_metadata = Base.metadata
# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.
def get_url():
"""Get database URL from environment or config."""
return os.getenv("DATABASE_URL", config.get_main_option("sqlalchemy.url"))
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = get_url()
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
configuration = config.get_section(config.config_ini_section)
if configuration is None:
configuration = {}
url = get_url()
if url is not None:
configuration["sqlalchemy.url"] = url
connectable = engine_from_config(
configuration,
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata,
include_schemas=True, # Include all schemas
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

26
alembic/script.py.mako Normal file
View File

@ -0,0 +1,26 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

47
docker/Dockerfile Normal file
View File

@ -0,0 +1,47 @@
FROM postgres:17-bookworm
# Install dependencies
RUN apt-get update && apt-get install -y \
curl \
ca-certificates \
lsb-release \
gnupg \
&& rm -rf /var/lib/apt/lists/*
# Add TimescaleDB APT repository
RUN echo "deb https://packagecloud.io/timescale/timescaledb/debian/ $(lsb_release -cs) main" > /etc/apt/sources.list.d/timescaledb.list \
&& curl -L https://packagecloud.io/timescale/timescaledb/gpgkey | gpg --dearmor -o /etc/apt/trusted.gpg.d/timescaledb.gpg \
&& apt-get update
# Install TimescaleDB for PostgreSQL 16
RUN apt-get install -y timescaledb-2-postgresql-17
# Install pgxman
RUN curl -sfL https://install.pgx.sh | sh -
# Install pgvector and pgvectorscale using pgxman
RUN pgxman install pgvector || echo "pgvector install failed" \
&& pgxman install pgvectorscale || echo "pgvectorscale install failed"
# Configure PostgreSQL for TimescaleDB (instead of using timescaledb-tune)
RUN echo "shared_preload_libraries = 'timescaledb'" >> /usr/share/postgresql/postgresql.conf.sample \
&& echo "# TimescaleDB configuration" >> /usr/share/postgresql/postgresql.conf.sample \
&& echo "shared_buffers = 256MB" >> /usr/share/postgresql/postgresql.conf.sample \
&& echo "effective_cache_size = 1GB" >> /usr/share/postgresql/postgresql.conf.sample \
&& echo "maintenance_work_mem = 64MB" >> /usr/share/postgresql/postgresql.conf.sample \
&& echo "work_mem = 4MB" >> /usr/share/postgresql/postgresql.conf.sample \
&& echo "timescaledb.max_background_workers = 8" >> /usr/share/postgresql/postgresql.conf.sample \
&& echo "max_worker_processes = 16" >> /usr/share/postgresql/postgresql.conf.sample \
&& echo "max_parallel_workers_per_gather = 2" >> /usr/share/postgresql/postgresql.conf.sample \
&& echo "max_parallel_workers = 4" >> /usr/share/postgresql/postgresql.conf.sample
# Create initialization script
RUN cat > /docker-entrypoint-initdb.d/00-init.sql <<'EOF'
CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE;
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS vectorscale CASCADE;
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
SELECT extname, extversion FROM pg_extension;
EOF
EXPOSE 5432

23
docker/docker-compose.yml Normal file
View File

@ -0,0 +1,23 @@
services:
timescaledb:
build: .
container_name: tradingagents_timescaledb
environment:
POSTGRES_PASSWORD: postgres
POSTGRES_USER: postgres
POSTGRES_DB: tradingagents
ports:
- "5432:5432"
volumes:
- ./seed.sql:/docker-entrypoint-initdb.d/seed.sql
- timescale_data:/var/lib/postgresql/data
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres -d tradingagents"]
interval: 30s
timeout: 10s
retries: 3
volumes:
timescale_data:
driver: local

39
docker/seed.sql Normal file
View File

@ -0,0 +1,39 @@
-- TimescaleDB initialization script for TradingAgents
-- This script sets up the main database and test database with required extensions
-- First, create extensions in the default postgres database
CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE;
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS vectorscale CASCADE;
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- Create test database (main database 'tradingagents' is created by POSTGRES_DB env var)
CREATE DATABASE tradingagents_test;
-- Setup extensions in main database
\c tradingagents
-- Install extensions
CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE;
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS vectorscale CASCADE;
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- Verify extensions are installed
SELECT extname FROM pg_extension WHERE extname IN ('timescaledb', 'vector', 'vectorscale', 'uuid-ossp');
-- Setup extensions in test database
\c tradingagents_test
-- Same extensions in test database
CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE;
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS vectorscale CASCADE;
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- Verify extensions are installed in test database
SELECT extname FROM pg_extension WHERE extname IN ('timescaledb', 'vector', 'vectorscale', 'uuid-ossp');
-- Output confirmation message
\c tradingagents
SELECT 'TradingAgents TimescaleDB setup complete with vectorscale, TimescaleDB, and test database' AS status;

View File

@ -37,6 +37,11 @@ dependencies = [
"lxml-html-clean>=0.4.0",
"googlenewsdecoder>=0.1.7",
"nltk>=3.9.1",
"sqlalchemy[asyncio]>=2.0.0",
"asyncpg>=0.29.0",
"alembic>=1.13.0",
"pgvector>=0.4.1",
"uuid-utils>=0.11.0",
]
[project.optional-dependencies]
@ -74,11 +79,8 @@ select = [
"TCH", # flake8-type-checking
]
ignore = [
"E501", # line too long, handled by formatter
"B008", # do not perform function calls in argument defaults
"C901", # too complex
"ARG002", # unused method argument
"ARG001", # unused function argument
"E501", # line too long, handled by formatter
"TC003", # move standard library import into type-checking block (overly pedantic)
]
[tool.ruff.lint.per-file-ignores]
@ -104,7 +106,7 @@ exclude = [
"build",
"dist",
]
pythonVersion = "3.10"
pythonVersion = "3.13"
pythonPlatform = "All"
typeCheckingMode = "standard"
reportMissingImports = true
@ -129,6 +131,8 @@ addopts = "-ra -q --strict-markers --strict-config"
python_files = ["test_*.py", "*_test.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "session"
markers = [
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
"integration: marks tests as integration tests",

View File

@ -0,0 +1,415 @@
"""
Integration tests for NewsRepository.
Tests the PostgreSQL repository with TimescaleDB using Docker.
Follows pragmatic TDD principles - test real persistence with Docker container.
"""
import asyncio
from datetime import date
import pytest
from sqlalchemy import text
from uuid_utils import uuid7
from tradingagents.domains.news.news_repository import (
NewsArticle,
NewsArticleEntity,
NewsRepository,
)
from tradingagents.lib.database import create_test_database_manager
@pytest.fixture
async def test_db_manager():
"""Create test database manager with TimescaleDB container."""
db_manager = create_test_database_manager()
# Verify database health
health = await db_manager.health_check()
if not health:
pytest.skip("TimescaleDB test container not available")
# Create tables
await db_manager.create_tables()
yield db_manager
# Cleanup
await db_manager.drop_tables()
await db_manager.close()
@pytest.fixture
async def repository(test_db_manager):
"""Create repository instance with test database."""
repo = NewsRepository(test_db_manager)
# Clean up any existing test data
async with test_db_manager.get_session() as session:
await session.execute(text("DELETE FROM news_articles"))
await session.commit()
return repo
@pytest.fixture
def sample_article():
"""Create a sample news article for testing."""
return NewsArticle(
headline="Apple Quarterly Earnings Beat Expectations",
url="https://example.com/apple-earnings-q1-2024",
source="TechCrunch",
published_date=date(2024, 1, 15),
summary="Apple reported strong quarterly earnings with iPhone sales exceeding analyst predictions.",
entities=["Apple", "iPhone", "earnings"],
sentiment_score=0.8,
author="Jane Tech Reporter",
category="earnings",
)
@pytest.fixture
def another_sample_article():
"""Create another sample news article for testing."""
return NewsArticle(
headline="Tesla Stock Drops After Production Concerns",
url="https://example.com/tesla-stock-drop-2024",
source="Bloomberg",
published_date=date(2024, 1, 16),
summary="Tesla shares fell following reports of production line issues.",
entities=["Tesla", "stock", "production"],
sentiment_score=-0.3,
author="Financial Reporter",
category="stock-news",
)
class TestNewsRepository:
"""Test suite for NewsRepository."""
async def test_upsert_new_article(self, repository, sample_article):
"""Test inserting a new article."""
# Act
result = await repository.upsert(sample_article, symbol="AAPL")
# Assert
assert result.headline == sample_article.headline
assert result.url == sample_article.url
assert result.source == sample_article.source
assert result.published_date == sample_article.published_date
assert result.summary == sample_article.summary
assert result.entities == sample_article.entities
assert result.sentiment_score == sample_article.sentiment_score
assert result.author == sample_article.author
assert result.category == sample_article.category
async def test_upsert_duplicate_url_updates_existing(
self, repository, sample_article
):
"""Test that upserting an article with existing URL updates the existing record."""
# Arrange - Insert initial article
await repository.upsert(sample_article, symbol="AAPL")
# Modify the article content
updated_article = NewsArticle(
headline="UPDATED: Apple Quarterly Earnings Exceed All Expectations",
url=sample_article.url, # Same URL
source="Updated TechCrunch",
published_date=sample_article.published_date,
summary="Updated summary with more details.",
entities=["Apple", "iPhone", "earnings", "record"],
sentiment_score=0.9,
author="Senior Tech Reporter",
category="earnings-updated",
)
# Act
result = await repository.upsert(updated_article, symbol="AAPL")
# Assert - Should be updated, not duplicated
assert (
result.headline
== "UPDATED: Apple Quarterly Earnings Exceed All Expectations"
)
assert result.source == "Updated TechCrunch"
assert result.summary == "Updated summary with more details."
assert result.sentiment_score == 0.9
assert result.author == "Senior Tech Reporter"
assert result.category == "earnings-updated"
assert len(result.entities) == 4
async def test_get_by_uuid(self, repository, sample_article):
"""Test retrieving an article by its UUID."""
# Arrange
await repository.upsert(sample_article, symbol="AAPL")
# We need to get the UUID from the database since it's auto-generated
stored_uuid = None
# Get UUID from the database model
async with repository.db_manager.get_session() as session:
from sqlalchemy import select
result = await session.execute(
select(NewsArticleEntity).filter(
NewsArticleEntity.url == sample_article.url
)
)
db_article = result.scalar_one()
stored_uuid = db_article.id
# Act
retrieved_article = await repository.get(stored_uuid)
# Assert
assert retrieved_article is not None
assert retrieved_article.headline == sample_article.headline
assert retrieved_article.url == sample_article.url
async def test_get_nonexistent_uuid_returns_none(self, repository):
"""Test that getting a non-existent UUID returns None."""
# Arrange
fake_uuid = uuid7()
# Act
result = await repository.get(fake_uuid)
# Assert
assert result is None
async def test_list_articles_by_symbol_and_date(
self, repository, sample_article, another_sample_article
):
"""Test listing articles filtered by symbol and date."""
# Arrange
await repository.upsert(sample_article, symbol="AAPL")
await repository.upsert(another_sample_article, symbol="TSLA")
# Act - Get AAPL articles for Jan 15, 2024
aapl_articles = await repository.list("AAPL", date(2024, 1, 15))
tsla_articles = await repository.list("TSLA", date(2024, 1, 16))
no_articles = await repository.list("AAPL", date(2024, 1, 16)) # Wrong date
# Assert
assert len(aapl_articles) == 1
assert aapl_articles[0].headline == sample_article.headline
assert len(tsla_articles) == 1
assert tsla_articles[0].headline == another_sample_article.headline
assert len(no_articles) == 0
async def test_delete_article_by_uuid(self, repository, sample_article):
"""Test deleting an article by UUID."""
# Arrange
await repository.upsert(sample_article, symbol="AAPL")
# Get the UUID
async with repository.db_manager.get_session() as session:
from sqlalchemy import select
result = await session.execute(
select(NewsArticleEntity).filter(
NewsArticleEntity.url == sample_article.url
)
)
db_article = result.scalar_one()
article_uuid = db_article.id
# Act
deleted = await repository.delete(article_uuid)
# Assert
assert deleted is True
# Verify article is gone
retrieved = await repository.get(article_uuid)
assert retrieved is None
async def test_delete_nonexistent_uuid_returns_false(self, repository):
"""Test that deleting a non-existent UUID returns False."""
# Arrange
fake_uuid = uuid7()
# Act
result = await repository.delete(fake_uuid)
# Assert
assert result is False
async def test_list_by_date_range_with_filters(
self, repository, sample_article, another_sample_article
):
"""Test listing articles by date range with optional filters."""
# Arrange
await repository.upsert(sample_article, symbol="AAPL")
await repository.upsert(another_sample_article, symbol="TSLA")
# Act - Various filter combinations
all_articles_aapl = await repository.list_by_date_range(
symbol="AAPL",
start_date=date(2024, 1, 1),
end_date=date(2024, 1, 31),
limit=10,
)
all_articles_tsla = await repository.list_by_date_range(
symbol="TSLA",
start_date=date(2024, 1, 1),
end_date=date(2024, 1, 31),
limit=10,
)
aapl_only = await repository.list_by_date_range(
symbol="AAPL", start_date=date(2024, 1, 1), end_date=date(2024, 1, 31)
)
date_filtered = await repository.list_by_date_range(
symbol="TSLA", start_date=date(2024, 1, 16), end_date=date(2024, 1, 16)
)
# Assert
assert len(all_articles_aapl) == 1
assert len(all_articles_tsla) == 1
assert len(aapl_only) == 1
assert aapl_only[0].headline == sample_article.headline
assert len(date_filtered) == 1
assert date_filtered[0].headline == another_sample_article.headline
async def test_uuid_v7_ordering(self, repository):
"""Test that UUID v7 provides time-ordered identifiers."""
# Arrange - Create articles with slight time differences
article1 = NewsArticle(
headline="First Article",
url="https://example.com/first",
source="Test Source",
published_date=date(2024, 1, 15),
)
article2 = NewsArticle(
headline="Second Article",
url="https://example.com/second",
source="Test Source",
published_date=date(2024, 1, 15),
)
# Act - Insert articles
await repository.upsert(article1, symbol="TEST")
# Small delay to ensure different timestamps
await asyncio.sleep(0.001)
await repository.upsert(article2, symbol="TEST")
# Get UUIDs in creation order
async with repository.db_manager.get_session() as session:
from sqlalchemy import select
result = await session.execute(
select(NewsArticleEntity.id, NewsArticleEntity.headline)
.filter(NewsArticleEntity.symbol == "TEST")
.order_by(NewsArticleEntity.created_at)
)
articles = result.all()
# Assert - UUID v7 should be time-ordered (first UUID < second UUID)
assert len(articles) == 2
first_uuid = articles[0].id
second_uuid = articles[1].id
# UUID v7 has timestamp in the first part, so earlier UUIDs are "smaller"
assert first_uuid < second_uuid
async def test_database_schema_validation(self, repository, sample_article):
"""Test that the database schema correctly handles all field types."""
# Arrange - Article with all field types
complex_article = NewsArticle(
headline="Complex Test Article with All Fields",
url="https://example.com/complex-test",
source="Test Source",
published_date=date(2024, 1, 15),
summary="This is a test summary with unicode characters: ñáéíóú",
entities=["Entity1", "Entity2", "Special-Entity_123"],
sentiment_score=0.756789, # Test float precision
author="Test Author with Accents: José María",
category="test-category-123",
)
# Act
await repository.upsert(complex_article, symbol="TEST")
retrieved = await repository.list("TEST", date(2024, 1, 15))
# Assert - All data preserved correctly
article = retrieved[0]
assert article.headline == complex_article.headline
assert article.summary == complex_article.summary
assert article.entities == complex_article.entities
assert abs(article.sentiment_score - complex_article.sentiment_score) < 0.000001
assert article.author == complex_article.author
assert article.category == complex_article.category
async def test_upsert_batch_performance(self, repository):
"""Test that upsert_batch handles multiple articles efficiently."""
# Arrange - Create multiple test articles
articles = [
NewsArticle(
headline=f"Test Article {i}",
url=f"https://example.com/test-{i}",
source="Batch Test Source",
published_date=date(2024, 1, 15),
summary=f"Summary for article {i}",
entities=[f"Entity{i}"],
sentiment_score=0.5 + (i * 0.1),
author=f"Author {i}",
category="batch-test",
)
for i in range(5)
]
# Act - Batch upsert
stored_articles = await repository.upsert_batch(articles, symbol="BATCH")
# Assert - All articles stored correctly
assert len(stored_articles) == 5
for i, stored in enumerate(stored_articles):
assert stored.headline == f"Test Article {i}"
assert stored.url == f"https://example.com/test-{i}"
assert stored.source == "Batch Test Source"
# Verify articles can be retrieved individually
retrieved_articles = await repository.list("BATCH", date(2024, 1, 15))
assert len(retrieved_articles) == 5
async def test_upsert_batch_empty_list(self, repository):
"""Test that upsert_batch handles empty list gracefully."""
# Act
result = await repository.upsert_batch([], symbol="EMPTY")
# Assert
assert result == []
class TestDatabaseConnectionManagement:
"""Test database connection and session management."""
async def test_database_health_check(self, test_db_manager):
"""Test database health check functionality."""
# Act
health = await test_db_manager.health_check()
# Assert
assert health is True
async def test_session_context_manager(self, test_db_manager):
"""Test that session context manager handles transactions correctly."""
# Act & Assert - No exceptions should be raised
async with test_db_manager.get_session() as session:
await session.execute(text("SELECT 1"))
# Session should auto-commit on successful exit
async def test_session_rollback_on_exception(self, test_db_manager):
"""Test that session rolls back on exceptions."""
with pytest.raises(Exception, match="Test exception"):
async with test_db_manager.get_session() as session:
await session.execute(text("SELECT 1"))
raise Exception("Test exception")
# Session should auto-rollback due to exception

View File

@ -8,13 +8,11 @@ This test suite follows the CLAUDE.md testing principles:
"""
from datetime import date
from unittest.mock import Mock
from unittest.mock import AsyncMock
import pytest
from tradingagents.domains.news.news_repository import (
NewsData,
)
from tradingagents.domains.news.article_scraper_client import ScrapeResult
from tradingagents.domains.news.news_service import (
ArticleData,
NewsContext,
@ -23,31 +21,30 @@ from tradingagents.domains.news.news_service import (
SentimentScore,
)
# Import mock ScrapeResult from conftest to avoid newspaper3k import issues
from ...conftest import ScrapeResult
class TestNewsServiceCollaboratorInteractions:
"""Test NewsService interactions with its collaborators (I/O boundaries)."""
def test_get_company_news_context_calls_repository_with_correct_params(
@pytest.mark.asyncio
async def test_get_company_news_context_calls_repository_with_correct_params(
self, mock_repository, mock_google_client, mock_article_scraper
):
"""Test that get_company_news_context calls repository with correct parameters."""
# Arrange - Mock the I/O boundary
mock_repository.get_news_data.return_value = {}
mock_repository.list_by_date_range.return_value = []
service = NewsService(mock_google_client, mock_repository, mock_article_scraper)
# Act - Call the service method
result = service.get_company_news_context("AAPL", "2024-01-01", "2024-01-31")
result = await service.get_company_news_context(
"AAPL", "2024-01-01", "2024-01-31"
)
# Assert - Repository should be called with converted date objects
mock_repository.get_news_data.assert_called_once_with(
query="AAPL",
mock_repository.list_by_date_range.assert_called_once_with(
symbol="AAPL",
start_date=date(2024, 1, 1),
end_date=date(2024, 1, 31),
sources=["finnhub", "google_news"],
)
# Assert - Result should have correct structure (real object logic)
@ -56,42 +53,46 @@ class TestNewsServiceCollaboratorInteractions:
assert result.symbol == "AAPL"
assert result.period == {"start": "2024-01-01", "end": "2024-01-31"}
def test_get_global_news_context_calls_repository_for_each_category(
@pytest.mark.asyncio
async def test_get_global_news_context_calls_repository_for_each_category(
self, mock_repository, mock_google_client, mock_article_scraper
):
"""Test that get_global_news_context calls repository for each category."""
# Arrange - Mock the I/O boundary
mock_repository.get_news_data.return_value = {}
mock_repository.list_by_date_range.return_value = []
service = NewsService(mock_google_client, mock_repository, mock_article_scraper)
categories = ["business", "politics", "technology"]
# Act
service.get_global_news_context(
await service.get_global_news_context(
"2024-01-01", "2024-01-31", categories=categories
)
# Assert - Repository should be called once for each category
assert mock_repository.get_news_data.call_count == 3
assert mock_repository.list_by_date_range.call_count == 3
for call_args in mock_repository.get_news_data.call_args_list:
for call_args in mock_repository.list_by_date_range.call_args_list:
args, kwargs = call_args
assert args[0] in categories # query should be one of the categories
assert args[1] == date(2024, 1, 1) # start_date
assert args[2] == date(2024, 1, 31) # end_date
assert kwargs["sources"] == ["google_news"]
assert (
kwargs["symbol"] in categories
) # symbol should be one of the categories
assert kwargs["start_date"] == date(2024, 1, 1) # start_date
assert kwargs["end_date"] == date(2024, 1, 31) # end_date
def test_update_company_news_calls_google_client(
@pytest.mark.asyncio
async def test_update_company_news_calls_google_client(
self, mock_repository, mock_google_client, mock_article_scraper
):
"""Test that update_company_news calls GoogleNewsClient correctly."""
# Arrange - Mock the I/O boundary
mock_google_client.get_company_news.return_value = []
mock_repository.upsert_batch.return_value = []
service = NewsService(mock_google_client, mock_repository, mock_article_scraper)
# Act
result = service.update_company_news("AAPL")
result = await service.update_company_news("AAPL")
# Assert - Google client should be called
mock_google_client.get_company_news.assert_called_once_with("AAPL")
@ -99,7 +100,8 @@ class TestNewsServiceCollaboratorInteractions:
assert result.symbol == "AAPL"
assert result.articles_found == 0
def test_update_company_news_scrapes_each_article_url(
@pytest.mark.asyncio
async def test_update_company_news_scrapes_each_article_url(
self,
mock_repository,
mock_google_client,
@ -116,11 +118,12 @@ class TestNewsServiceCollaboratorInteractions:
title="Test Title",
publish_date="2024-01-15",
)
mock_repository.upsert_batch.return_value = []
service = NewsService(mock_google_client, mock_repository, mock_article_scraper)
# Act
result = service.update_company_news("AAPL")
result = await service.update_company_news("AAPL")
# Assert - Scraper should be called for each article
assert mock_article_scraper.scrape_article.call_count == 2
@ -136,49 +139,49 @@ class TestNewsServiceCollaboratorInteractions:
assert result.articles_scraped == 2
assert result.articles_failed == 0
def test_repository_failure_returns_empty_context_with_error_metadata(
@pytest.mark.asyncio
async def test_repository_failure_returns_empty_context_gracefully(
self, mock_repository, mock_google_client, mock_article_scraper
):
"""Test that repository failure is handled gracefully."""
# Arrange - Mock repository failure (I/O boundary)
mock_repository.get_news_data.side_effect = Exception(
mock_repository.list_by_date_range.side_effect = Exception(
"Database connection failed"
)
service = NewsService(mock_google_client, mock_repository, mock_article_scraper)
# Act
result = service.get_company_news_context("AAPL", "2024-01-01", "2024-01-31")
result = await service.get_company_news_context(
"AAPL", "2024-01-01", "2024-01-31"
)
# Assert - Should return empty context with error metadata (real object logic)
# Assert - Should return empty context gracefully (real object logic)
assert isinstance(result, NewsContext)
assert result.articles == []
assert result.article_count == 0
assert "error" in result.metadata
assert "Database connection failed" in result.metadata["error"]
assert result.metadata["data_source"] == "repository"
# Service gracefully handles repository errors by returning empty results
class TestNewsServiceDataTransformations:
"""Test data transformations using real objects (no mocking)."""
def test_converts_repository_articles_to_article_data(
@pytest.mark.asyncio
async def test_converts_repository_articles_to_article_data(
self, mock_google_client, mock_article_scraper, sample_news_articles
):
"""Test conversion of NewsRepository.NewsArticle to ArticleData."""
# Arrange - Create real repository with sample data
mock_repo = Mock()
news_data = NewsData(
query="AAPL",
date=date(2024, 1, 15),
source="finnhub",
articles=sample_news_articles,
)
mock_repo.get_news_data.return_value = {date(2024, 1, 15): [news_data]}
mock_repo = AsyncMock()
mock_repo.list_by_date_range.return_value = sample_news_articles
service = NewsService(mock_google_client, mock_repo, mock_article_scraper)
# Act - Test real data transformation logic
result = service.get_company_news_context("AAPL", "2024-01-01", "2024-01-31")
result = await service.get_company_news_context(
"AAPL", "2024-01-01", "2024-01-31"
)
# Assert - Real object data transformation
assert len(result.articles) == 2
@ -272,7 +275,8 @@ class TestNewsServiceDataTransformations:
class TestNewsServiceErrorScenarios:
"""Test various error scenarios and edge cases."""
def test_handles_google_client_failure(
@pytest.mark.asyncio
async def test_handles_google_client_failure(
self, mock_repository, mock_google_client, mock_article_scraper
):
"""Test handling of GoogleNewsClient failure."""
@ -285,9 +289,10 @@ class TestNewsServiceErrorScenarios:
# Act & Assert - Should raise the exception
with pytest.raises(Exception, match="API rate limit exceeded"):
service.update_company_news("AAPL")
await service.update_company_news("AAPL")
def test_handles_article_scraper_failure(
@pytest.mark.asyncio
async def test_handles_article_scraper_failure(
self,
mock_repository,
mock_google_client,
@ -300,26 +305,34 @@ class TestNewsServiceErrorScenarios:
mock_article_scraper.scrape_article.return_value = ScrapeResult(
status="SCRAPE_FAILED", content="", author="", title="", publish_date=""
)
mock_repository.upsert_batch.return_value = []
service = NewsService(mock_google_client, mock_repository, mock_article_scraper)
# Act
result = service.update_company_news("AAPL")
result = await service.update_company_news("AAPL")
# Assert - Should handle scraper failures gracefully
assert result.articles_found == 2
assert result.articles_scraped == 0
assert result.articles_failed == 2
def test_handles_invalid_date_formats(
@pytest.mark.asyncio
async def test_handles_invalid_date_formats(
self, mock_repository, mock_google_client, mock_article_scraper
):
"""Test validation of date formats."""
service = NewsService(mock_google_client, mock_repository, mock_article_scraper)
# Act & Assert - Should raise ValueError for invalid date format
with pytest.raises(ValueError):
service.get_company_news_context("AAPL", "invalid-date", "2024-01-31")
# Act - Invalid date format should be handled gracefully
result = await service.get_company_news_context(
"AAPL", "invalid-date", "2024-01-31"
)
# Assert - Should return empty context due to date parsing error
assert isinstance(result, NewsContext)
assert result.articles == []
assert result.article_count == 0
def test_handles_empty_articles_gracefully(
self, mock_repository, mock_google_client, mock_article_scraper

View File

@ -55,7 +55,7 @@ class AgentToolkit:
self._config = config
@tool
def get_global_news(
async def get_global_news(
self,
curr_date: Annotated[str, "Date you want to get news for in yyyy-mm-dd format"],
) -> GlobalNewsContext:
@ -71,14 +71,14 @@ class AgentToolkit:
end_date = curr_date
# Call specialized service method
return self._news_service.get_global_news_context(
return await self._news_service.get_global_news_context(
start_date=start_date,
end_date=end_date,
categories=["general", "business", "politics"],
)
@tool
def get_news(
async def get_news(
self,
ticker: Annotated[
str,
@ -102,7 +102,7 @@ class AgentToolkit:
datetime.strptime(start_date, "%Y-%m-%d")
datetime.strptime(end_date, "%Y-%m-%d")
return self._news_service.get_company_news_context(
return await self._news_service.get_company_news_context(
symbol=ticker, start_date=start_date, end_date=end_date
)
except Exception as e:
@ -280,6 +280,7 @@ class AgentToolkit:
Returns:
BalanceSheetContext: Structured balance sheet analysis with key liquidity and debt metrics.
"""
_ = freq # Acknowledge unused parameter
curr_date_obj = self._parse_date(curr_date)
return self._fundamentaldata_service.get_balance_sheet_context(
symbol=ticker,
@ -306,6 +307,7 @@ class AgentToolkit:
Returns:
CashFlowContext: Structured cash flow analysis with operating cash flow metrics.
"""
_ = freq # Acknowledge unused parameter
curr_date_obj = self._parse_date(curr_date)
return self._fundamentaldata_service.get_cashflow_context(
symbol=ticker,
@ -332,6 +334,7 @@ class AgentToolkit:
Returns:
IncomeStatementContext: Structured income statement analysis with profitability metrics.
"""
_ = freq # Acknowledge unused parameter
curr_date_obj = self._parse_date(curr_date)
return self._fundamentaldata_service.get_income_statement_context(
symbol=ticker,

View File

@ -328,6 +328,7 @@ def create_msg_delete():
def delete_messages(state):
"""Delete all messages from the current state."""
del state # Acknowledge the parameter
return {"messages": [RemoveMessage(id=REMOVE_ALL_MESSAGES)]}
return delete_messages

View File

@ -50,6 +50,7 @@ class YFinanceClient:
Returns:
Dict[str, Any]: Price data with metadata
"""
_ = kwargs # Acknowledge unused parameter
try:
ticker = yf.Ticker(symbol.upper())

View File

@ -21,9 +21,11 @@ class InsiderDataRepository:
self.data_dir = data_dir
def get_data(self, symbol: str, start_date: str, end_date: str) -> dict:
_ = symbol, start_date, end_date # Acknowledge unused parameters
return {}
def store_data(self, symbol: str, data: dict) -> bool:
_ = symbol, data # Acknowledge unused parameters
return True

View File

@ -50,6 +50,7 @@ class FundamentalDataRepository:
data_dir: Base directory for fundamental data storage
**kwargs: Additional configuration
"""
_ = kwargs # Acknowledge unused parameter
self.fundamental_data_dir = Path(data_dir) / "fundamental_data"
self.fundamental_data_dir.mkdir(parents=True, exist_ok=True)

View File

@ -24,6 +24,7 @@ class MarketDataRepository:
data_dir: Base directory for market data storage
**kwargs: Additional configuration
"""
_ = kwargs # Acknowledge unused parameter
self.market_data_dir = Path(data_dir) / "market_data"
self.market_data_dir.mkdir(parents=True, exist_ok=True)

View File

@ -1,12 +1,34 @@
"""
Repository for historical news data (cached files).
Repository for historical news data (cached files and PostgreSQL).
"""
import json
from __future__ import annotations
import builtins
import logging
from dataclasses import asdict, dataclass, field
from datetime import date
from pathlib import Path
import uuid
from dataclasses import dataclass, field
from datetime import date, datetime
from pgvector.sqlalchemy import Vector
from sqlalchemy import (
JSON,
Date,
DateTime,
Float,
Index,
String,
Text,
and_,
func,
select,
)
from sqlalchemy.dialects.postgresql import UUID as PG_UUID
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Mapped, mapped_column
from uuid_utils import uuid7
from tradingagents.lib.database import Base, DatabaseManager
logger = logging.getLogger(__name__)
@ -27,276 +49,370 @@ class NewsArticle:
author: str | None = None
category: str | None = None
def to_entity(self, symbol: str | None = None) -> NewsArticleEntity:
"""Convert NewsArticle dataclass to NewsArticleEntity SQLAlchemy model."""
return NewsArticleEntity(
headline=self.headline,
url=self.url,
source=self.source,
published_date=self.published_date,
summary=self.summary,
entities=self.entities if self.entities else None,
sentiment_score=self.sentiment_score,
author=self.author,
category=self.category,
symbol=symbol,
)
@dataclass
class NewsData:
"""Container for news data with metadata."""
@staticmethod
def from_entity(entity: NewsArticleEntity) -> NewsArticle:
"""Convert NewsArticleEntity SQLAlchemy model to NewsArticle dataclass."""
from typing import cast
query: str
date: date
source: str # "finnhub", "google_news"
articles: list[NewsArticle]
return NewsArticle(
headline=cast("str", entity.headline),
url=cast("str", entity.url),
source=cast("str", entity.source),
published_date=cast("date", entity.published_date),
summary=cast("str | None", entity.summary),
entities=cast("list[str] | None", entity.entities) or [],
sentiment_score=cast("float | None", entity.sentiment_score),
author=cast("str | None", entity.author),
category=cast("str | None", entity.category),
)
class NewsArticleEntity(Base):
"""SQLAlchemy model for news articles with vector embedding support."""
__tablename__ = "news_articles"
__table_args__ = (
# Composite indexes for common query patterns
Index("idx_symbol_date", "symbol", "published_date"),
Index("idx_published_date", "published_date"),
Index("idx_url_unique", "url", unique=True),
# TimescaleDB will automatically create time-based partitions on published_date
)
# Primary key using UUID v7 for time-ordered identifiers
id: Mapped[uuid.UUID] = mapped_column(
PG_UUID(as_uuid=True), primary_key=True, default=uuid7
)
# Core article fields (matching existing NewsArticle dataclass)
headline: Mapped[str] = mapped_column(Text, nullable=False)
url: Mapped[str] = mapped_column(
Text, nullable=False, unique=True
) # Used for deduplication
source: Mapped[str] = mapped_column(String(100), nullable=False)
published_date: Mapped[date] = mapped_column(Date, nullable=False, index=True)
# Optional fields from NewsArticle dataclass
summary: Mapped[str | None] = mapped_column(Text, nullable=True)
entities: Mapped[list[str] | None] = mapped_column(
JSON, nullable=True
) # Store list[str] as JSON array
sentiment_score: Mapped[float | None] = mapped_column(Float, nullable=True)
author: Mapped[str | None] = mapped_column(String(255), nullable=True)
category: Mapped[str | None] = mapped_column(String(100), nullable=True)
# Symbol field for filtering (nullable for global news)
symbol: Mapped[str | None] = mapped_column(String(20), index=True, nullable=True)
# Vector embeddings for semantic similarity (1536 dimensions for OpenAI embeddings)
title_embedding: Mapped[list[float] | None] = mapped_column(
Vector(1536), nullable=True
)
content_embedding: Mapped[list[float] | None] = mapped_column(
Vector(1536), nullable=True
)
# Audit timestamps
created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())
updated_at: Mapped[datetime] = mapped_column(
DateTime, server_default=func.now(), onupdate=func.now()
)
def __repr__(self) -> str:
return f"<NewsArticleDB(id={self.id}, headline='{self.headline[:50]}...', source='{self.source}')>"
class NewsRepository:
"""Repository for accessing cached news data with source separation."""
"""Repository for news articles"""
def __init__(self, data_dir: str):
def __init__(self, database_manager: DatabaseManager):
"""
Initialize news repository.
Initialize async PostgreSQL news repository.
Args:
data_dir: Base directory for news data storage
**kwargs: Additional configuration
database_manager: AsyncDatabaseManager instance. If None, creates default.
"""
self.news_data_dir = Path(data_dir) / "news_data"
self.news_data_dir.mkdir(parents=True, exist_ok=True)
self.db_manager = database_manager
def get_news_data(
async def list(self, symbol: str, date: date) -> list[NewsArticle]:
"""
List articles for a symbol on a specific date.
Args:
symbol: Stock symbol or query
date: Date to filter articles
Returns:
List[NewsArticle]: Articles for the symbol and date
"""
async with self.db_manager.get_session() as session:
result = await session.execute(
select(NewsArticleEntity)
.filter(
and_(
NewsArticleEntity.symbol == symbol,
NewsArticleEntity.published_date == date,
)
)
.order_by(NewsArticleEntity.published_date.desc())
)
db_articles = result.scalars().all()
# Convert to dataclass instances
articles = [NewsArticle.from_entity(article) for article in db_articles]
logger.info(f"Retrieved {len(articles)} articles for {symbol} on {date}")
return articles
async def get(self, article_id: uuid.UUID) -> NewsArticle | None:
"""
Get single article by UUID.
Args:
article_id: UUID v7 of the article
Returns:
NewsArticle | None: Article if found, None otherwise
"""
async with self.db_manager.get_session() as session:
result = await session.execute(
select(NewsArticleEntity).filter(NewsArticleEntity.id == article_id)
)
db_article = result.scalar_one_or_none()
if db_article:
article = NewsArticle.from_entity(db_article)
logger.debug(f"Retrieved article {article_id}")
return article
logger.debug(f"Article {article_id} not found")
return None
async def upsert(self, article: NewsArticle, symbol: str) -> NewsArticle:
"""
Insert or update article using URL as unique constraint.
Args:
article: NewsArticle to insert or update
symbol: Optional symbol to associate with the article
Returns:
NewsArticle: The stored article with database metadata
"""
from sqlalchemy.dialects.postgresql import insert
async with self.db_manager.get_session() as session:
try:
# Convert to entity and prepare data for insert
entity_data = {
"headline": article.headline,
"url": article.url,
"source": article.source,
"published_date": article.published_date,
"summary": article.summary,
"entities": article.entities if article.entities else None,
"sentiment_score": article.sentiment_score,
"author": article.author,
"category": article.category,
"symbol": symbol,
}
# Use PostgreSQL INSERT ON CONFLICT for atomic upsert
stmt = insert(NewsArticleEntity).values(**entity_data)
upsert_stmt = stmt.on_conflict_do_update(
index_elements=["url"],
set_={
"headline": stmt.excluded.headline,
"source": stmt.excluded.source,
"published_date": stmt.excluded.published_date,
"summary": stmt.excluded.summary,
"entities": stmt.excluded.entities,
"sentiment_score": stmt.excluded.sentiment_score,
"author": stmt.excluded.author,
"category": stmt.excluded.category,
"symbol": stmt.excluded.symbol,
"updated_at": func.now(),
},
).returning(NewsArticleEntity)
result = await session.execute(upsert_stmt)
db_article = result.scalar_one()
result_article = NewsArticle.from_entity(db_article)
logger.info(f"Upserted article: {article.url}")
return result_article
except IntegrityError as e:
await session.rollback()
logger.error(
f"Database integrity error upserting article {article.url}: {e}"
)
raise
except Exception as e:
await session.rollback()
logger.error(f"Error upserting article {article.url}: {e}")
raise
async def delete(self, article_id: uuid.UUID) -> bool:
"""
Delete article by UUID.
Args:
article_id: UUID v7 of the article to delete
Returns:
bool: True if deleted, False if not found
"""
async with self.db_manager.get_session() as session:
result = await session.execute(
select(NewsArticleEntity).filter(NewsArticleEntity.id == article_id)
)
db_article = result.scalar_one_or_none()
if db_article:
await session.delete(db_article)
logger.info(f"Deleted article {article_id}")
return True
logger.debug(f"Article {article_id} not found for deletion")
return False
async def list_by_date_range(
self,
query: str,
symbol: str,
start_date: date,
end_date: date,
sources: list[str] | None = None,
) -> dict[date, list[NewsData]]:
limit: int = 100,
) -> builtins.list[NewsArticle]:
"""
Get cached news data for a query and date range across sources.
List articles by date range, optionally filtered by symbol.
Args:
query: Search query or symbol
start_date: Start date
end_date: End date
sources: List of sources to check (default: ["finnhub", "google_news"])
symbol: Optional symbol filter
start_date: Optional start date
end_date: Optional end date
limit: Maximum number of articles to return
Returns:
Dict[date, list[NewsData]]: News data keyed by date, with list of source data
List[NewsArticle]: Articles matching the criteria
"""
if sources is None:
sources = ["finnhub", "google_news"]
async with self.db_manager.get_session() as session:
query = select(NewsArticleEntity)
news_data = {}
# Apply filters
filters = []
if symbol:
filters.append(NewsArticleEntity.symbol == symbol)
if start_date:
filters.append(NewsArticleEntity.published_date >= start_date)
if end_date:
filters.append(NewsArticleEntity.published_date <= end_date)
for source in sources:
source_dir = self.news_data_dir / source / query
if filters:
query = query.filter(and_(*filters))
if not source_dir.exists():
logger.debug(f"No data directory found for {source}/{query}")
continue
# Order by date descending and limit
query = query.order_by(NewsArticleEntity.published_date.desc()).limit(limit)
# Scan for JSON files in the source/query directory
for json_file in source_dir.glob("*.json"):
try:
# Parse date from filename (YYYY-MM-DD.json)
date_str = json_file.stem
file_date = date.fromisoformat(date_str)
result = await session.execute(query)
db_articles = result.scalars().all()
# Filter by date range
if start_date <= file_date <= end_date:
with open(json_file) as f:
data = json.load(f)
articles = [
NewsArticle.from_entity(db_article) for db_article in db_articles
]
# Create NewsArticle objects from JSON data
articles = []
for article_data in data.get("articles", []):
# Convert date strings back to date objects
article_data_copy = article_data.copy()
if "published_date" in article_data_copy:
article_data_copy["published_date"] = (
date.fromisoformat(
article_data_copy["published_date"]
)
)
logger.info(f"Retrieved {len(articles)} articles for date range query")
return articles
article = NewsArticle(**article_data_copy)
articles.append(article)
# Create NewsData container
news_data_item = NewsData(
query=query,
date=file_date,
source=source,
articles=articles,
)
# Group by date (multiple sources per date)
if file_date not in news_data:
news_data[file_date] = []
news_data[file_date].append(news_data_item)
except (ValueError, json.JSONDecodeError, KeyError, TypeError) as e:
logger.error(f"Error reading news data from {json_file}: {e}")
continue
logger.info(
f"Retrieved news data for {len(news_data)} dates for query '{query}'"
)
return news_data
def store_news_articles(
self,
query: str,
date: date,
source: str,
articles: list[NewsArticle],
) -> tuple[date, NewsData]:
async def upsert_batch(
self, articles: builtins.list[NewsArticle], symbol: str
) -> builtins.list[NewsArticle]:
"""
Store news articles for a query, date, and source, merging with existing data.
Batch insert or update articles using bulk SQL operations.
Args:
query: Search query or symbol
date: Date of the news articles
source: News source ("finnhub", "google_news", etc.)
articles: List of news articles
articles: List of NewsArticle objects to store
symbol: Symbol to associate with all articles
Returns:
Tuple[date, NewsData]: The stored date and news data
List[NewsArticle]: The stored articles with database metadata
"""
# Create source/query directory
source_dir = self.news_data_dir / source / query
from sqlalchemy.dialects.postgresql import insert
# Create JSON file path
file_path = source_dir / f"{date.isoformat()}.json"
if not articles:
return []
try:
# Merge with existing articles if file exists
merged_articles = self._merge_articles_with_existing(file_path, articles)
# Prepare data for JSON serialization
articles_data = []
for article in merged_articles:
article_dict = asdict(article)
# Convert date objects to ISO format strings for JSON
if article_dict.get("published_date"):
article_dict["published_date"] = article_dict[
"published_date"
].isoformat()
articles_data.append(article_dict)
data = {
"query": query,
"date": date.isoformat(),
"source": source,
"articles": articles_data,
"metadata": {
"article_count": len(merged_articles),
"stored_at": date.today().isoformat(),
"repository": "news_repository",
},
}
# Write to JSON file
with open(file_path, "w") as f:
json.dump(data, f, indent=2, default=str)
# Create NewsData result
news_data = NewsData(
query=query, date=date, source=source, articles=merged_articles
)
logger.info(
f"Stored {len(articles)} new articles for {query} on {date} from {source} (total: {len(merged_articles)})"
)
return (date, news_data)
except Exception as e:
logger.error(
f"Error storing news articles for {query} on {date} from {source}: {e}"
)
raise
def store_news_data_batch(
self,
query: str,
news_data_by_source: dict[str, dict[date, list[NewsArticle]]],
) -> dict[date, list[NewsData]]:
"""
Store multiple news data sets for a query across sources.
Args:
query: Search query or symbol
news_data_by_source: Nested dict of {source: {date: [articles]}}
Returns:
Dict[date, list[NewsData]]: The stored news data organized by date
"""
stored_data = {}
for source, date_articles in news_data_by_source.items():
for article_date, articles in date_articles.items():
try:
stored_date, stored_news_data = self.store_news_articles(
query, article_date, source, articles
)
# Group by date
if stored_date not in stored_data:
stored_data[stored_date] = []
stored_data[stored_date].append(stored_news_data)
except Exception as e:
logger.error(
f"Failed to store news data for {query} on {article_date} from {source}: {e}"
)
continue
total_dates = len(stored_data)
total_sources = sum(len(news_list) for news_list in stored_data.values())
logger.info(
f"Stored news data for {total_dates} dates, {total_sources} source entries for query '{query}'"
)
return stored_data
def _merge_articles_with_existing(
self, file_path: Path, new_articles: list[NewsArticle]
) -> list[NewsArticle]:
"""
Merge new articles with existing articles, deduplicating by URL.
Args:
file_path: Path to existing JSON file
new_articles: New articles to merge
Returns:
List[NewsArticle]: Merged and deduplicated articles
"""
existing_articles = []
# Load existing articles if file exists
if file_path.exists():
async with self.db_manager.get_session() as session:
try:
with open(file_path) as f:
data = json.load(f)
# Prepare data for bulk insert
entity_data_list = [
{
"headline": article.headline,
"url": article.url,
"source": article.source,
"published_date": article.published_date,
"summary": article.summary,
"entities": article.entities if article.entities else None,
"sentiment_score": article.sentiment_score,
"author": article.author,
"category": article.category,
"symbol": symbol,
}
for article in articles
]
for existing_data in data.get("articles", []):
# Convert date strings back to date objects
existing_data_copy = existing_data.copy()
if "published_date" in existing_data_copy:
existing_data_copy["published_date"] = date.fromisoformat(
existing_data_copy["published_date"]
)
# Use PostgreSQL bulk INSERT ON CONFLICT for atomic batch upsert
stmt = insert(NewsArticleEntity).values(entity_data_list)
upsert_stmt = stmt.on_conflict_do_update(
index_elements=["url"],
set_={
"headline": stmt.excluded.headline,
"source": stmt.excluded.source,
"published_date": stmt.excluded.published_date,
"summary": stmt.excluded.summary,
"entities": stmt.excluded.entities,
"sentiment_score": stmt.excluded.sentiment_score,
"author": stmt.excluded.author,
"category": stmt.excluded.category,
"symbol": stmt.excluded.symbol,
"updated_at": func.now(),
},
).returning(NewsArticleEntity)
existing_article = NewsArticle(**existing_data_copy)
existing_articles.append(existing_article)
result = await session.execute(upsert_stmt)
db_articles = result.scalars().all()
stored_articles = [
NewsArticle.from_entity(db_article) for db_article in db_articles
]
except (json.JSONDecodeError, KeyError, ValueError, TypeError) as e:
logger.warning(f"Error reading existing file {file_path}: {e}")
existing_articles = []
logger.info(
f"Batch upserted {len(stored_articles)} articles for {symbol}"
)
return stored_articles
# Merge articles, deduplicating by URL (keep newer data)
articles_by_url = {}
# Add existing articles
for article in existing_articles:
articles_by_url[article.url] = article
# Add/update with new articles (they take precedence)
for article in new_articles:
articles_by_url[article.url] = article
# Return as sorted list
merged_articles = list(articles_by_url.values())
merged_articles.sort(
key=lambda x: x.published_date, reverse=True
) # Newest first
return merged_articles
except IntegrityError as e:
await session.rollback()
logger.error(
f"Database integrity error during batch upsert for {symbol}: {e}"
)
raise
except Exception as e:
await session.rollback()
logger.error(f"Error during batch upsert for {symbol}: {e}")
raise

View File

@ -10,7 +10,7 @@ from typing import Any
from tradingagents.config import TradingAgentsConfig
from tradingagents.domains.news.google_news_client import GoogleNewsClient
from tradingagents.domains.news.news_repository import NewsRepository
from tradingagents.domains.news.news_repository import NewsArticle, NewsRepository
from .article_scraper_client import ArticleScraperClient
@ -101,7 +101,6 @@ class NewsService:
Initialize news service.
Args:
finnhub_client: Client for Finnhub news data
google_client: Client for Google News data
repository: Repository for cached news data
article_scraper: Client for scraping article content
@ -111,14 +110,14 @@ class NewsService:
self.article_scraper = article_scraper
@staticmethod
def build(_config: TradingAgentsConfig):
def build(database_manager, _config: TradingAgentsConfig):
google_client = GoogleNewsClient()
repository = NewsRepository("")
repository = NewsRepository(database_manager)
article_scraper = ArticleScraperClient("")
return NewsService(google_client, repository, article_scraper)
def get_company_news_context(
self, symbol: str, start_date: str, end_date: str, **kwargs
async def get_company_news_context(
self, symbol: str, start_date: str, end_date: str
) -> NewsContext:
"""
Get news context specific to a company from repository (no API calls).
@ -127,7 +126,6 @@ class NewsService:
symbol: Stock ticker symbol
start_date: Start date in YYYY-MM-DD format
end_date: End date in YYYY-MM-DD format
**kwargs: Additional parameters
Returns:
NewsContext: Company-specific news context
@ -143,30 +141,27 @@ class NewsService:
start_date_obj = date.fromisoformat(start_date)
end_date_obj = date.fromisoformat(end_date)
# Get cached news data from repository
news_data_by_date = self.repository.get_news_data(
query=symbol,
# Get articles directly from repository
news_articles = await self.repository.list_by_date_range(
symbol=symbol,
start_date=start_date_obj,
end_date=end_date_obj,
sources=["finnhub", "google_news"],
)
# Convert repository data to ArticleData objects
for _date_key, news_data_list in news_data_by_date.items():
for news_data in news_data_list:
for article in news_data.articles:
articles.append(
ArticleData(
title=article.headline,
content=article.summary
or "", # Use summary as fallback for content
author=article.author or "",
source=article.source,
date=article.published_date.isoformat(),
url=article.url,
sentiment=None, # Will be calculated later
)
)
# Convert NewsArticle objects to ArticleData objects
for article in news_articles:
articles.append(
ArticleData(
title=article.headline,
content=article.summary
or "", # Use summary as fallback for content
author=article.author or "",
source=article.source,
date=article.published_date.isoformat(),
url=article.url,
sentiment=None, # Will be calculated later
)
)
logger.debug(
f"Retrieved {len(articles)} articles from repository for {symbol}"
@ -218,12 +213,11 @@ class NewsService:
},
)
def get_global_news_context(
async def get_global_news_context(
self,
start_date: str,
end_date: str,
categories: list[str] | None = None,
**kwargs,
) -> GlobalNewsContext:
"""
Get global/macro news context from repository (no API calls).
@ -232,7 +226,6 @@ class NewsService:
start_date: Start date in YYYY-MM-DD format
end_date: End date in YYYY-MM-DD format
categories: News categories to search
**kwargs: Additional parameters
Returns:
GlobalNewsContext: Global news context
@ -253,30 +246,27 @@ class NewsService:
start_date_obj = date.fromisoformat(start_date)
end_date_obj = date.fromisoformat(end_date)
# Get cached news data from repository for each category
# Get articles for each category
for category in categories:
news_data_by_date = self.repository.get_news_data(
query=category,
news_articles = await self.repository.list_by_date_range(
symbol=category, # Use category as symbol for global news
start_date=start_date_obj,
end_date=end_date_obj,
sources=["google_news"], # Global news mainly from Google
)
# Convert repository data to ArticleData objects
for _date_key, news_data_list in news_data_by_date.items():
for news_data in news_data_list:
for article in news_data.articles:
articles.append(
ArticleData(
title=article.headline,
content=article.summary or "",
author=article.author or "",
source=article.source,
date=article.published_date.isoformat(),
url=article.url,
sentiment=None,
)
)
# Convert NewsArticle objects to ArticleData objects
for article in news_articles:
articles.append(
ArticleData(
title=article.headline,
content=article.summary or "",
author=article.author or "",
source=article.source,
date=article.published_date.isoformat(),
url=article.url,
sentiment=None,
)
)
logger.debug(
f"Retrieved {len(articles)} global articles from repository"
@ -333,7 +323,7 @@ class NewsService:
},
)
def update_company_news(self, symbol: str) -> NewsUpdateResult:
async def update_company_news(self, symbol: str) -> NewsUpdateResult:
"""
Update company news by fetching RSS feeds and scraping article content.
@ -408,7 +398,22 @@ class NewsService:
# 3. Store in repository
try:
logger.info(f"Storing {len(article_data_list)} articles for {symbol}")
# Store articles (implementation depends on repository interface)
# Convert ArticleData to NewsArticle for repository storage
news_articles = []
for article_data in article_data_list:
news_article = NewsArticle(
headline=article_data.title,
url=article_data.url,
source=article_data.source,
published_date=date.fromisoformat(article_data.date),
summary=article_data.content,
author=article_data.author,
)
news_articles.append(news_article)
# Store all articles in batch
await self.repository.upsert_batch(news_articles, symbol)
except Exception as e:
logger.error(f"Error storing articles in repository: {e}")
@ -429,7 +434,7 @@ class NewsService:
logger.error(f"Error updating company news for {symbol}: {e}")
raise
def update_global_news(
async def update_global_news(
self, start_date: str, end_date: str, categories: list[str] | None = None
) -> NewsUpdateResult:
"""
@ -514,7 +519,23 @@ class NewsService:
# 3. Store in repository
try:
logger.info(f"Storing {len(article_data_list)} global articles")
# Store articles (implementation depends on repository interface)
# Convert ArticleData to NewsArticle for repository storage
news_articles = []
for article_data in article_data_list:
news_article = NewsArticle(
headline=article_data.title,
url=article_data.url,
source=article_data.source,
published_date=date.fromisoformat(article_data.date),
summary=article_data.content,
author=article_data.author,
category="global", # Mark as global news
)
news_articles.append(news_article)
# Store all articles in batch (use "global" as symbol for global news)
await self.repository.upsert_batch(news_articles, "global")
except Exception as e:
logger.error(f"Error storing global articles in repository: {e}")

View File

@ -57,6 +57,7 @@ class SocialRepository:
data_dir: Base directory for social media data storage
**kwargs: Additional configuration
"""
_ = kwargs # Acknowledge unused parameter
self.social_data_dir = Path(data_dir) / "social_data"
self.social_data_dir.mkdir(parents=True, exist_ok=True)

View File

@ -141,6 +141,7 @@ class SocialMediaService:
Returns:
SocialContext with posts and sentiment analysis
"""
_ = query # Acknowledge unused parameter
posts = []
data_source = "unknown"

View File

@ -65,6 +65,7 @@ Adhere strictly to these instructions, and ensure your output is detailed, accur
self, component_type: str, report: str, situation: str, returns_losses
) -> str:
"""Generate reflection for a component."""
_ = component_type # Acknowledge unused parameter
messages = [
("system", self.reflection_system_prompt),
(

View File

@ -20,6 +20,7 @@ from tradingagents.domains.marketdata.insider_data_service import InsiderDataSer
from tradingagents.domains.marketdata.market_data_service import MarketDataService
from tradingagents.domains.news.news_service import NewsService
from tradingagents.domains.socialmedia.social_media_service import SocialMediaService
from tradingagents.lib.database import DatabaseManager
from .conditional_logic import ConditionalLogic
from .graph_setup import GraphSetup
@ -91,7 +92,10 @@ class TradingAgentsGraph:
else:
raise ValueError(f"Unsupported LLM provider: {self.config.llm_provider}")
news_service = NewsService.build(self.config)
# Create database manager for news service
database_url = os.getenv("DATABASE_URL", "postgresql://localhost/tradingagents")
database_manager = DatabaseManager(database_url)
news_service = NewsService.build(database_manager, self.config)
social_media_service = SocialMediaService.build(self.config)
market_data_service = MarketDataService.build(self.config)
fundamental_data_service = FundamentalDataService.build(self.config)

View File

@ -0,0 +1,3 @@
"""
Shared library modules for TradingAgents.
"""

View File

@ -0,0 +1,181 @@
"""
Database connection and session management for news repository with async support.
"""
import logging
from contextlib import asynccontextmanager
from sqlalchemy import event
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from sqlalchemy.orm import declarative_base
from sqlalchemy.pool import NullPool
from sqlalchemy.sql import text
from typing_extensions import AsyncGenerator
Base = declarative_base()
logger = logging.getLogger(__name__)
class DatabaseManager:
"""Manages async database connections and sessions for the news repository."""
def __init__(self, database_url: str, echo: bool = False):
"""
Initialize database manager.
Args:
database_url: PostgreSQL connection URL.
echo: Whether to log SQL statements
"""
# Ensure we're using asyncpg driver
if database_url.startswith("postgresql://"):
database_url = database_url.replace(
"postgresql://", "postgresql+asyncpg://"
)
elif not database_url.startswith("postgresql+asyncpg://"):
database_url = f"postgresql+asyncpg://{database_url}"
self.database_url = database_url
self.echo = echo
# Create async engine with connection pooling
self.engine = create_async_engine(
database_url,
echo=echo,
pool_recycle=3600, # Recycle connections after 1 hour
pool_pre_ping=True, # Verify connections before use
)
# Create async session factory
self.AsyncSessionLocal = async_sessionmaker(
bind=self.engine,
class_=AsyncSession,
autocommit=False,
autoflush=False,
)
# Register event listeners for optimization
self._setup_event_listeners()
def _setup_event_listeners(self):
"""Setup SQLAlchemy event listeners for performance optimization."""
@event.listens_for(self.engine.sync_engine, "connect")
def set_pg_pragma(dbapi_connection, _connection_record):
"""Optimize PostgreSQL connection settings."""
# These settings are specific to PostgreSQL/asyncpg
if "postgresql" in self.database_url:
# asyncpg handles these differently than psycopg2
async def setup_connection():
await dbapi_connection.execute("SET timezone = 'UTC'")
await dbapi_connection.execute("SET statement_timeout = '30s'")
await dbapi_connection.execute("SET lock_timeout = '10s'")
# Note: This is handled differently in asyncpg
# We'll set these in the session context instead
@asynccontextmanager
async def get_session(self) -> AsyncGenerator[AsyncSession, None]:
"""
Create and manage an async database session.
Note: We're manually managing the session lifecycle instead of using
`async with self.AsyncSessionLocal() as session:` because:
1. Pyrefly type checker throws "bad-context-manager" errors when using
async_sessionmaker() directly in async with statements
2. Manual management gives us explicit control over commit/rollback timing
3. Avoids type checker ambiguity about the session's async context manager protocol
4. Makes the session lifecycle more transparent and debuggable
This pattern is equivalent to using async with but more type-checker friendly.
Yields:
AsyncSession: SQLAlchemy async session
Example:
async with db_manager.get_session() as session:
result = await session.execute(select(NewsArticleDB))
articles = result.scalars().all()
"""
#
session: AsyncSession = self.AsyncSessionLocal()
try:
yield session
await session.commit()
except Exception as e:
await session.rollback()
logger.error(f"Database session error: {e}")
raise
finally:
await session.close()
async def create_tables(self):
"""Create all database tables."""
try:
async with self.engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
logger.info("Database tables created successfully")
except Exception as e:
logger.error(f"Failed to create database tables: {e}")
raise
async def drop_tables(self):
"""Drop all database tables (use with caution!)."""
try:
async with self.engine.begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
logger.info("Database tables dropped successfully")
except Exception as e:
logger.error(f"Failed to drop database tables: {e}")
raise
async def health_check(self) -> bool:
"""
Check if database connection is healthy.
Returns:
bool: True if database is accessible, False otherwise
"""
try:
async with self.get_session() as session:
await session.execute(text("SELECT 1"))
return True
except Exception as e:
logger.error(f"Database health check failed: {e}")
return False
async def close(self):
"""Close database engine and cleanup connections."""
if hasattr(self, "engine"):
await self.engine.dispose()
logger.info("Database connections closed")
def create_test_database_manager() -> DatabaseManager:
"""Create a test database manager for tests."""
# Use a test database URL with credentials
test_db_url = "postgresql://postgres:postgres@localhost:5432/tradingagents_test"
# Create a test-specific database manager with NullPool
db_manager = DatabaseManager(test_db_url)
# Override engine with NullPool for tests
db_manager.engine = create_async_engine(
db_manager.database_url,
echo=False,
poolclass=NullPool, # Use NullPool for tests
pool_pre_ping=False, # Disable ping for tests to avoid async issues
)
# Create new session factory for the test engine
db_manager.AsyncSessionLocal = async_sessionmaker(
bind=db_manager.engine,
class_=AsyncSession,
autocommit=False,
autoflush=False,
)
return db_manager

90
uv.lock
View File

@ -70,6 +70,20 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ec/6a/bc7e17a3e87a2985d3e8f4da4cd0f481060eb78fb08596c42be62c90a4d9/aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5", size = 7597, upload-time = "2024-12-13T17:10:38.469Z" },
]
[[package]]
name = "alembic"
version = "1.16.4"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "mako" },
{ name = "sqlalchemy" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/83/52/72e791b75c6b1efa803e491f7cbab78e963695e76d4ada05385252927e76/alembic-1.16.4.tar.gz", hash = "sha256:efab6ada0dd0fae2c92060800e0bf5c1dc26af15a10e02fb4babff164b4725e2", size = 1968161, upload-time = "2025-07-10T16:17:20.192Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c2/62/96b5217b742805236614f05904541000f55422a6060a90d7fd4ce26c172d/alembic-1.16.4-py3-none-any.whl", hash = "sha256:b05e51e8e82efc1abd14ba2af6392897e145930c3e0a2faf2b0da2f7f7fd660d", size = 247026, upload-time = "2025-07-10T16:17:21.845Z" },
]
[[package]]
name = "annotated-types"
version = "0.7.0"
@ -131,6 +145,22 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/3e/4b/40a1dc52fc26695b1e80a9e67dfb0fe7e6ddc57bbc5b61348e40c0045abb/asyncer-0.0.7-py3-none-any.whl", hash = "sha256:f0d579d4f67c4ead52ede3a45c854f462cae569058a8a6a68a4ebccac1c335d8", size = 8476, upload-time = "2024-04-30T06:25:58.991Z" },
]
[[package]]
name = "asyncpg"
version = "0.30.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/2f/4c/7c991e080e106d854809030d8584e15b2e996e26f16aee6d757e387bc17d/asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851", size = 957746, upload-time = "2024-10-20T00:30:41.127Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/3a/22/e20602e1218dc07692acf70d5b902be820168d6282e69ef0d3cb920dc36f/asyncpg-0.30.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05b185ebb8083c8568ea8a40e896d5f7af4b8554b64d7719c0eaa1eb5a5c3a70", size = 670373, upload-time = "2024-10-20T00:29:55.165Z" },
{ url = "https://files.pythonhosted.org/packages/3d/b3/0cf269a9d647852a95c06eb00b815d0b95a4eb4b55aa2d6ba680971733b9/asyncpg-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c47806b1a8cbb0a0db896f4cd34d89942effe353a5035c62734ab13b9f938da3", size = 634745, upload-time = "2024-10-20T00:29:57.14Z" },
{ url = "https://files.pythonhosted.org/packages/8e/6d/a4f31bf358ce8491d2a31bfe0d7bcf25269e80481e49de4d8616c4295a34/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b6fde867a74e8c76c71e2f64f80c64c0f3163e687f1763cfaf21633ec24ec33", size = 3512103, upload-time = "2024-10-20T00:29:58.499Z" },
{ url = "https://files.pythonhosted.org/packages/96/19/139227a6e67f407b9c386cb594d9628c6c78c9024f26df87c912fabd4368/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46973045b567972128a27d40001124fbc821c87a6cade040cfcd4fa8a30bcdc4", size = 3592471, upload-time = "2024-10-20T00:30:00.354Z" },
{ url = "https://files.pythonhosted.org/packages/67/e4/ab3ca38f628f53f0fd28d3ff20edff1c975dd1cb22482e0061916b4b9a74/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9110df111cabc2ed81aad2f35394a00cadf4f2e0635603db6ebbd0fc896f46a4", size = 3496253, upload-time = "2024-10-20T00:30:02.794Z" },
{ url = "https://files.pythonhosted.org/packages/ef/5f/0bf65511d4eeac3a1f41c54034a492515a707c6edbc642174ae79034d3ba/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04ff0785ae7eed6cc138e73fc67b8e51d54ee7a3ce9b63666ce55a0bf095f7ba", size = 3662720, upload-time = "2024-10-20T00:30:04.501Z" },
{ url = "https://files.pythonhosted.org/packages/e7/31/1513d5a6412b98052c3ed9158d783b1e09d0910f51fbe0e05f56cc370bc4/asyncpg-0.30.0-cp313-cp313-win32.whl", hash = "sha256:ae374585f51c2b444510cdf3595b97ece4f233fde739aa14b50e0d64e8a7a590", size = 560404, upload-time = "2024-10-20T00:30:06.537Z" },
{ url = "https://files.pythonhosted.org/packages/c8/a4/cec76b3389c4c5ff66301cd100fe88c318563ec8a520e0b2e792b5b84972/asyncpg-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:f59b430b8e27557c3fb9869222559f7417ced18688375825f8f12302c34e915e", size = 621623, upload-time = "2024-10-20T00:30:09.024Z" },
]
[[package]]
name = "attrs"
version = "25.3.0"
@ -1484,6 +1514,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/4e/0b/942cb7278d6caad79343ad2ddd636ed204a47909b969d19114a3097f5aa3/lxml_html_clean-0.4.2-py3-none-any.whl", hash = "sha256:74ccfba277adcfea87a1e9294f47dd86b05d65b4da7c5b07966e3d5f3be8a505", size = 14184, upload-time = "2025-04-09T11:33:57.988Z" },
]
[[package]]
name = "mako"
version = "1.3.10"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "markupsafe" },
]
sdist = { url = "https://files.pythonhosted.org/packages/9e/38/bd5b78a920a64d708fe6bc8e0a2c075e1389d53bef8413725c63ba041535/mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28", size = 392474, upload-time = "2025-04-10T12:44:31.16Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/87/fb/99f81ac72ae23375f22b7afdb7642aba97c00a713c217124420147681a2f/mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59", size = 78509, upload-time = "2025-04-10T12:50:53.297Z" },
]
[[package]]
name = "markdown-it-py"
version = "3.0.0"
@ -2600,6 +2642,18 @@ version = "3.18.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/1e/ce/c2bb58d00cb12d19dea28d5a98d05a14350197a3d03eba60be9bae708bac/peewee-3.18.1.tar.gz", hash = "sha256:a76a694b3b3012ce22f00d51fd83e55bf80b595275a90ed62cd36eb45496cf1d", size = 3026130, upload-time = "2025-04-30T15:40:35.06Z" }
[[package]]
name = "pgvector"
version = "0.4.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "numpy" },
]
sdist = { url = "https://files.pythonhosted.org/packages/44/43/9a0fb552ab4fd980680c2037962e331820f67585df740bedc4a2b50faf20/pgvector-0.4.1.tar.gz", hash = "sha256:83d3a1c044ff0c2f1e95d13dfb625beb0b65506cfec0941bfe81fd0ad44f4003", size = 30646, upload-time = "2025-04-26T18:56:37.151Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/bf/21/b5735d5982892c878ff3d01bb06e018c43fc204428361ee9fc25a1b2125c/pgvector-0.4.1-py3-none-any.whl", hash = "sha256:34bb4e99e1b13d08a2fe82dda9f860f15ddcd0166fbb25bffe15821cbfeb7362", size = 27086, upload-time = "2025-04-26T18:56:35.956Z" },
]
[[package]]
name = "pillow"
version = "11.2.1"
@ -3397,6 +3451,11 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/1c/fc/9ba22f01b5cdacc8f5ed0d22304718d2c758fce3fd49a5372b886a86f37c/sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576", size = 1911224, upload-time = "2025-05-14T17:39:42.154Z" },
]
[package.optional-dependencies]
asyncio = [
{ name = "greenlet" },
]
[[package]]
name = "sse-starlette"
version = "2.3.6"
@ -3620,6 +3679,8 @@ name = "tradingagents"
version = "0.1.0"
source = { editable = "." }
dependencies = [
{ name = "alembic" },
{ name = "asyncpg" },
{ name = "backtrader" },
{ name = "chainlit" },
{ name = "chromadb" },
@ -3637,6 +3698,7 @@ dependencies = [
{ name = "nltk" },
{ name = "pandas" },
{ name = "parsel" },
{ name = "pgvector" },
{ name = "praw" },
{ name = "python-dotenv" },
{ name = "pytz" },
@ -3645,12 +3707,14 @@ dependencies = [
{ name = "requests" },
{ name = "rich" },
{ name = "setuptools" },
{ name = "sqlalchemy", extra = ["asyncio"] },
{ name = "stockstats" },
{ name = "ta-lib" },
{ name = "tqdm" },
{ name = "tushare" },
{ name = "typer" },
{ name = "typing-extensions" },
{ name = "uuid-utils" },
{ name = "yfinance" },
]
@ -3675,6 +3739,8 @@ dev = [
[package.metadata]
requires-dist = [
{ name = "alembic", specifier = ">=1.13.0" },
{ name = "asyncpg", specifier = ">=0.29.0" },
{ name = "backtrader", specifier = ">=1.9.78.123" },
{ name = "chainlit", specifier = ">=2.5.5" },
{ name = "chromadb", specifier = ">=1.0.12" },
@ -3692,6 +3758,7 @@ requires-dist = [
{ name = "nltk", specifier = ">=3.9.1" },
{ name = "pandas", specifier = ">=2.3.0" },
{ name = "parsel", specifier = ">=1.10.0" },
{ name = "pgvector", specifier = ">=0.4.1" },
{ name = "praw", specifier = ">=7.8.1" },
{ name = "pyright", marker = "extra == 'dev'", specifier = ">=1.1.390" },
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
@ -3705,12 +3772,14 @@ requires-dist = [
{ name = "rich", specifier = ">=14.0.0" },
{ name = "ruff", marker = "extra == 'dev'", specifier = ">=0.8.0" },
{ name = "setuptools", specifier = ">=80.9.0" },
{ name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.0" },
{ name = "stockstats", specifier = ">=0.6.5" },
{ name = "ta-lib", specifier = ">=0.4.28" },
{ name = "tqdm", specifier = ">=4.67.1" },
{ name = "tushare", specifier = ">=1.4.21" },
{ name = "typer", specifier = ">=0.12.0" },
{ name = "typing-extensions", specifier = ">=4.14.0" },
{ name = "uuid-utils", specifier = ">=0.11.0" },
{ name = "yfinance", specifier = ">=0.2.63" },
]
provides-extras = ["dev"]
@ -3837,6 +3906,27 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680, upload-time = "2025-04-10T15:23:37.377Z" },
]
[[package]]
name = "uuid-utils"
version = "0.11.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/24/7f/7d83b937889d65682d95b40c94ba226b353d3f532290ee3acb17c8746e49/uuid_utils-0.11.0.tar.gz", hash = "sha256:18cf2b7083da7f3cca0517647213129eb16d20d7ed0dd74b3f4f8bff2aa334ea", size = 18854, upload-time = "2025-05-22T11:23:15.596Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d2/20/4a34f2a6e77b1f0f3334b111e4d2411fc8646ab2987892a36507e2d6a498/uuid_utils-0.11.0-cp39-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:094445ccd323bc5507e28e9d6d86b983513efcf19ab59c2dd75239cef765631a", size = 593779, upload-time = "2025-05-22T11:22:41.36Z" },
{ url = "https://files.pythonhosted.org/packages/a1/a1/1897cd3d37144f698392ec8aae89da2c00c6d34acd77f75312477f4510ab/uuid_utils-0.11.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:6430b53d343215f85269ffd74e1d1f4b25ae1031acf0ac24ff3d5721f6a06f48", size = 300848, upload-time = "2025-05-22T11:22:43.221Z" },
{ url = "https://files.pythonhosted.org/packages/d4/36/3ae8896de8a5320a9e7529452ed29af0082daf8c3787f17c5cbf9defc651/uuid_utils-0.11.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be2e6e4318d23195887fa74fa1d64565a34f7127fdcf22918954981d79765f68", size = 336053, upload-time = "2025-05-22T11:22:44.741Z" },
{ url = "https://files.pythonhosted.org/packages/fe/b6/751e84cd056074a40ca9ac21db6ca4802e31d78207309c0d9c8ff69cd43b/uuid_utils-0.11.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d37289ab72aa30b5550bfa64d91431c62c89e4969bdf989988aa97f918d5f803", size = 338529, upload-time = "2025-05-22T11:22:46.303Z" },
{ url = "https://files.pythonhosted.org/packages/3b/c2/f6a1c00a1b067a886fc57c24da46bb0bcb753c92afb898871c6df3ae606f/uuid_utils-0.11.0-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1012595220f945fe09641f1365a8a06915bf432cac1b31ebd262944934a9b787", size = 480378, upload-time = "2025-05-22T11:22:47.482Z" },
{ url = "https://files.pythonhosted.org/packages/60/ea/cefc0521e07a35e85416d145382ac4817957cdec037271d0c9e27cbc7d45/uuid_utils-0.11.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35cd3fc718a673e4516e87afb9325558969eca513aa734515b9031d1b651bbb1", size = 332220, upload-time = "2025-05-22T11:22:48.55Z" },
{ url = "https://files.pythonhosted.org/packages/03/91/5929f209bd4660a7e3b4d47d26189d3cf33e14297312a5f51f5451805fec/uuid_utils-0.11.0-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ed325e0c40e0f59ae82b347f534df954b50cedf12bf60d025625538530e1965d", size = 359052, upload-time = "2025-05-22T11:22:49.617Z" },
{ url = "https://files.pythonhosted.org/packages/d8/0d/32034d5b13bc07dd95f23122cb743b4eeca8e6d88173ea3c7100c67b6269/uuid_utils-0.11.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:5c8b7cf201990ee3140956e541967bd556a7365ec738cb504b04187ad89c757a", size = 515186, upload-time = "2025-05-22T11:22:50.808Z" },
{ url = "https://files.pythonhosted.org/packages/e7/43/ccf2474f723d6de5e214c22999ffb34219acf83d1e3fff6a4734172e10c0/uuid_utils-0.11.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:9966df55bed5d538ba2e9cc40115796480f437f9007727116ef99dc2f42bd5fa", size = 535318, upload-time = "2025-05-22T11:22:52.304Z" },
{ url = "https://files.pythonhosted.org/packages/fb/05/f668b4ad2b3542cd021c4b27d1ff4e425f854f299bcf7ee36f304399a58c/uuid_utils-0.11.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:cb04b6c604968424b7e6398d54debbdd5b771b39fc1e648c6eabf3f1dc20582e", size = 502691, upload-time = "2025-05-22T11:22:53.483Z" },
{ url = "https://files.pythonhosted.org/packages/9e/0b/b906301638eef837c89b19206989dbe27794c591d794ecc06167d9a47c41/uuid_utils-0.11.0-cp39-abi3-win32.whl", hash = "sha256:18420eb3316bb514f09f2da15750ac135478c3a12a704e2c5fb59eab642bb255", size = 180147, upload-time = "2025-05-22T11:22:54.598Z" },
{ url = "https://files.pythonhosted.org/packages/56/99/ad24ee5ecfc5fbd4a4490bb59c0e72ce604d5eef08683d345546ff6a6f2d/uuid_utils-0.11.0-cp39-abi3-win_amd64.whl", hash = "sha256:37c4805af61a7cce899597d34e7c3dd5cb6a8b4b93a90fbca3826b071ba544df", size = 183574, upload-time = "2025-05-22T11:22:55.581Z" },
{ url = "https://files.pythonhosted.org/packages/0e/76/2301b1d34defc8c234596ffb6e6d456cd7ef061d108e10a14ceda5ec5d4b/uuid_utils-0.11.0-cp39-abi3-win_arm64.whl", hash = "sha256:4065cf17bbe97f6d8ccc7dc6a0bae7d28fd4797d7f32028a5abd979aeb7bf7c9", size = 181014, upload-time = "2025-05-22T11:22:56.575Z" },
]
[[package]]
name = "uvicorn"
version = "0.34.3"