TradingAgents/tests/test_anonymizer.py

250 lines
9.6 KiB
Python

"""
Unit Tests for Ticker Anonymizer
Tests:
- Ticker anonymization (deterministic hashing)
- Text anonymization (company names, products)
- Price normalization with Adj Close
- Dividend/split handling
- Edge cases (empty data, invalid prices)
"""
import unittest
import pandas as pd
import numpy as np
from pathlib import Path
import tempfile
import sys
import os
# Add parent directory to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from tradingagents.utils.anonymizer import TickerAnonymizer
class TestTickerAnonymizer(unittest.TestCase):
"""Test suite for TickerAnonymizer."""
def setUp(self):
"""Set up test fixtures."""
self.anonymizer = TickerAnonymizer(seed="test_seed")
def test_ticker_anonymization_deterministic(self):
"""Test that ticker anonymization is deterministic."""
ticker = "AAPL"
anon1 = self.anonymizer.anonymize_ticker(ticker)
anon2 = self.anonymizer.anonymize_ticker(ticker)
self.assertEqual(anon1, anon2, "Anonymization should be deterministic")
self.assertTrue(anon1.startswith("ASSET_"), "Should start with ASSET_")
self.assertNotEqual(anon1, ticker, "Should be different from original")
def test_different_tickers_different_labels(self):
"""Test that different tickers get different labels."""
anon_aapl = self.anonymizer.anonymize_ticker("AAPL")
anon_msft = self.anonymizer.anonymize_ticker("MSFT")
self.assertNotEqual(anon_aapl, anon_msft, "Different tickers should have different labels")
def test_text_anonymization_ticker(self):
"""Test ticker replacement in text."""
ticker = "AAPL"
text = "AAPL stock rose 5% today"
anon_text = self.anonymizer.anonymize_text(text, ticker)
self.assertNotIn("AAPL", anon_text, "Original ticker should be removed")
self.assertIn("ASSET_", anon_text, "Should contain anonymous label")
def test_text_anonymization_company_name(self):
"""Test company name replacement."""
ticker = "AAPL"
self.anonymizer.set_company_name(ticker, "Apple Inc.")
text = "Apple Inc. reported strong earnings"
anon_text = self.anonymizer.anonymize_text(text, ticker)
self.assertNotIn("Apple Inc.", anon_text, "Company name should be removed")
self.assertIn("Company ASSET_", anon_text, "Should contain anonymous company label")
def test_text_anonymization_products(self):
"""Test product name replacement."""
ticker = "AAPL"
text = "iPhone sales exceeded expectations"
anon_text = self.anonymizer.anonymize_text(text, ticker)
self.assertNotIn("iPhone", anon_text, "Product name should be removed")
self.assertIn("Product A", anon_text, "Should contain anonymous product label")
def test_price_normalization_basic(self):
"""Test basic price normalization to base-100."""
df = pd.DataFrame({
'Date': pd.date_range('2024-01-01', periods=5),
'Open': [150.0, 152.0, 151.0, 153.0, 155.0],
'High': [152.0, 154.0, 153.0, 155.0, 157.0],
'Low': [149.0, 151.0, 150.0, 152.0, 154.0],
'Close': [151.0, 153.0, 152.0, 154.0, 156.0],
'Volume': [1000000] * 5
})
df_normalized = self.anonymizer.normalize_price_series(df, base_value=100.0, use_adjusted=False)
# First close should be 100.0
self.assertAlmostEqual(df_normalized['Close'].iloc[0], 100.0, places=2)
# Relative changes should be preserved
original_pct_change = (df['Close'].iloc[-1] / df['Close'].iloc[0]) - 1
normalized_pct_change = (df_normalized['Close'].iloc[-1] / df_normalized['Close'].iloc[0]) - 1
self.assertAlmostEqual(original_pct_change, normalized_pct_change, places=6,
msg="Percentage changes should be preserved")
def test_price_normalization_with_adj_close(self):
"""Test price normalization using Adj Close (handles dividends/splits)."""
df = pd.DataFrame({
'Date': pd.date_range('2024-01-01', periods=5),
'Open': [150.0, 152.0, 151.0, 153.0, 155.0],
'High': [152.0, 154.0, 153.0, 155.0, 157.0],
'Low': [149.0, 151.0, 150.0, 152.0, 154.0],
'Close': [151.0, 153.0, 152.0, 154.0, 156.0],
'Adj Close': [150.5, 152.5, 151.5, 153.5, 155.5], # Adjusted for dividends
'Volume': [1000000] * 5
})
df_normalized = self.anonymizer.normalize_price_series(df, base_value=100.0, use_adjusted=True)
# Should use Adj Close as baseline
baseline = df['Adj Close'].iloc[0]
expected_first_close = (df['Close'].iloc[0] / baseline) * 100.0
self.assertAlmostEqual(df_normalized['Close'].iloc[0], expected_first_close, places=2)
def test_price_normalization_preserves_volume(self):
"""Test that volume is not normalized."""
df = pd.DataFrame({
'Date': pd.date_range('2024-01-01', periods=3),
'Close': [150.0, 153.0, 156.0],
'Volume': [1000000, 1500000, 2000000]
})
df_normalized = self.anonymizer.normalize_price_series(df, use_adjusted=False)
# Volume should remain unchanged
pd.testing.assert_series_equal(df['Volume'], df_normalized['Volume'])
def test_price_normalization_empty_dataframe(self):
"""Test that empty DataFrame raises error."""
df = pd.DataFrame()
with self.assertRaises(ValueError):
self.anonymizer.normalize_price_series(df)
def test_price_normalization_invalid_baseline(self):
"""Test that invalid baseline (zero or negative) raises error."""
df = pd.DataFrame({
'Close': [0.0, 10.0, 20.0] # First value is zero
})
with self.assertRaises(ValueError):
self.anonymizer.normalize_price_series(df, use_adjusted=False)
def test_price_normalization_missing_close_column(self):
"""Test that missing Close column raises error."""
df = pd.DataFrame({
'Open': [150.0, 152.0],
'Volume': [1000000, 1500000]
})
with self.assertRaises(ValueError):
self.anonymizer.normalize_price_series(df, use_adjusted=False)
def test_normalize_single_value(self):
"""Test normalizing a single price value."""
value = 153.0
baseline = 150.0
normalized = self.anonymizer.normalize_price_value(value, baseline, base_value=100.0)
expected = (153.0 / 150.0) * 100.0
self.assertAlmostEqual(normalized, expected, places=2)
def test_normalize_single_value_invalid_baseline(self):
"""Test that invalid baseline raises error."""
with self.assertRaises(ValueError):
self.anonymizer.normalize_price_value(100.0, 0.0)
def test_save_and_load_mapping(self):
"""Test saving and loading ticker mappings."""
# Create some mappings
self.anonymizer.anonymize_ticker("AAPL")
self.anonymizer.anonymize_ticker("MSFT")
self.anonymizer.set_company_name("AAPL", "Apple Inc.")
# Save to temp file
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f:
temp_path = Path(f.name)
try:
self.anonymizer.save_mapping(temp_path)
# Load into new anonymizer
new_anonymizer = TickerAnonymizer()
new_anonymizer.load_mapping(temp_path)
# Check mappings are preserved
self.assertEqual(
self.anonymizer.ticker_map,
new_anonymizer.ticker_map,
"Ticker mappings should be preserved"
)
self.assertEqual(
self.anonymizer.company_names,
new_anonymizer.company_names,
"Company names should be preserved"
)
finally:
temp_path.unlink()
def test_deanonymize_ticker(self):
"""Test reverse mapping from anonymous to original ticker."""
ticker = "AAPL"
anon_ticker = self.anonymizer.anonymize_ticker(ticker)
original = self.anonymizer.deanonymize_ticker(anon_ticker)
self.assertEqual(original, ticker, "Should reverse map correctly")
def test_anonymize_csv(self):
"""Test anonymizing a CSV file."""
# Create test CSV
df = pd.DataFrame({
'Date': pd.date_range('2024-01-01', periods=3),
'Close': [150.0, 153.0, 156.0],
'Adj Close': [150.0, 153.0, 156.0],
'Volume': [1000000, 1500000, 2000000]
})
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv') as f:
input_path = Path(f.name)
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv') as f:
output_path = Path(f.name)
try:
df.to_csv(input_path, index=False)
self.anonymizer.anonymize_csv(input_path, output_path, "AAPL", normalize_prices=True)
# Read output
df_output = pd.read_csv(output_path)
# Check normalization
self.assertAlmostEqual(df_output['Close'].iloc[0], 100.0, places=1)
finally:
input_path.unlink()
output_path.unlink()
if __name__ == '__main__':
# Run tests
unittest.main(verbosity=2)