""" Unit Tests for Ticker Anonymizer Tests: - Ticker anonymization (deterministic hashing) - Text anonymization (company names, products) - Price normalization with Adj Close - Dividend/split handling - Edge cases (empty data, invalid prices) """ import unittest import pandas as pd import numpy as np from pathlib import Path import tempfile import sys import os # Add parent directory to path sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) from tradingagents.utils.anonymizer import TickerAnonymizer class TestTickerAnonymizer(unittest.TestCase): """Test suite for TickerAnonymizer.""" def setUp(self): """Set up test fixtures.""" self.anonymizer = TickerAnonymizer(seed="test_seed") def test_ticker_anonymization_deterministic(self): """Test that ticker anonymization is deterministic.""" ticker = "AAPL" anon1 = self.anonymizer.anonymize_ticker(ticker) anon2 = self.anonymizer.anonymize_ticker(ticker) self.assertEqual(anon1, anon2, "Anonymization should be deterministic") self.assertTrue(anon1.startswith("ASSET_"), "Should start with ASSET_") self.assertNotEqual(anon1, ticker, "Should be different from original") def test_different_tickers_different_labels(self): """Test that different tickers get different labels.""" anon_aapl = self.anonymizer.anonymize_ticker("AAPL") anon_msft = self.anonymizer.anonymize_ticker("MSFT") self.assertNotEqual(anon_aapl, anon_msft, "Different tickers should have different labels") def test_text_anonymization_ticker(self): """Test ticker replacement in text.""" ticker = "AAPL" text = "AAPL stock rose 5% today" anon_text = self.anonymizer.anonymize_text(text, ticker) self.assertNotIn("AAPL", anon_text, "Original ticker should be removed") self.assertIn("ASSET_", anon_text, "Should contain anonymous label") def test_text_anonymization_company_name(self): """Test company name replacement.""" ticker = "AAPL" self.anonymizer.set_company_name(ticker, "Apple Inc.") text = "Apple Inc. reported strong earnings" anon_text = self.anonymizer.anonymize_text(text, ticker) self.assertNotIn("Apple Inc.", anon_text, "Company name should be removed") self.assertIn("Company ASSET_", anon_text, "Should contain anonymous company label") def test_text_anonymization_products(self): """Test product name replacement.""" ticker = "AAPL" text = "iPhone sales exceeded expectations" anon_text = self.anonymizer.anonymize_text(text, ticker) self.assertNotIn("iPhone", anon_text, "Product name should be removed") self.assertIn("Product A", anon_text, "Should contain anonymous product label") def test_price_normalization_basic(self): """Test basic price normalization to base-100.""" df = pd.DataFrame({ 'Date': pd.date_range('2024-01-01', periods=5), 'Open': [150.0, 152.0, 151.0, 153.0, 155.0], 'High': [152.0, 154.0, 153.0, 155.0, 157.0], 'Low': [149.0, 151.0, 150.0, 152.0, 154.0], 'Close': [151.0, 153.0, 152.0, 154.0, 156.0], 'Volume': [1000000] * 5 }) df_normalized = self.anonymizer.normalize_price_series(df, base_value=100.0, use_adjusted=False) # First close should be 100.0 self.assertAlmostEqual(df_normalized['Close'].iloc[0], 100.0, places=2) # Relative changes should be preserved original_pct_change = (df['Close'].iloc[-1] / df['Close'].iloc[0]) - 1 normalized_pct_change = (df_normalized['Close'].iloc[-1] / df_normalized['Close'].iloc[0]) - 1 self.assertAlmostEqual(original_pct_change, normalized_pct_change, places=6, msg="Percentage changes should be preserved") def test_price_normalization_with_adj_close(self): """Test price normalization using Adj Close (handles dividends/splits).""" df = pd.DataFrame({ 'Date': pd.date_range('2024-01-01', periods=5), 'Open': [150.0, 152.0, 151.0, 153.0, 155.0], 'High': [152.0, 154.0, 153.0, 155.0, 157.0], 'Low': [149.0, 151.0, 150.0, 152.0, 154.0], 'Close': [151.0, 153.0, 152.0, 154.0, 156.0], 'Adj Close': [150.5, 152.5, 151.5, 153.5, 155.5], # Adjusted for dividends 'Volume': [1000000] * 5 }) df_normalized = self.anonymizer.normalize_price_series(df, base_value=100.0, use_adjusted=True) # Should use Adj Close as baseline baseline = df['Adj Close'].iloc[0] expected_first_close = (df['Close'].iloc[0] / baseline) * 100.0 self.assertAlmostEqual(df_normalized['Close'].iloc[0], expected_first_close, places=2) def test_price_normalization_preserves_volume(self): """Test that volume is not normalized.""" df = pd.DataFrame({ 'Date': pd.date_range('2024-01-01', periods=3), 'Close': [150.0, 153.0, 156.0], 'Volume': [1000000, 1500000, 2000000] }) df_normalized = self.anonymizer.normalize_price_series(df, use_adjusted=False) # Volume should remain unchanged pd.testing.assert_series_equal(df['Volume'], df_normalized['Volume']) def test_price_normalization_empty_dataframe(self): """Test that empty DataFrame raises error.""" df = pd.DataFrame() with self.assertRaises(ValueError): self.anonymizer.normalize_price_series(df) def test_price_normalization_invalid_baseline(self): """Test that invalid baseline (zero or negative) raises error.""" df = pd.DataFrame({ 'Close': [0.0, 10.0, 20.0] # First value is zero }) with self.assertRaises(ValueError): self.anonymizer.normalize_price_series(df, use_adjusted=False) def test_price_normalization_missing_close_column(self): """Test that missing Close column raises error.""" df = pd.DataFrame({ 'Open': [150.0, 152.0], 'Volume': [1000000, 1500000] }) with self.assertRaises(ValueError): self.anonymizer.normalize_price_series(df, use_adjusted=False) def test_normalize_single_value(self): """Test normalizing a single price value.""" value = 153.0 baseline = 150.0 normalized = self.anonymizer.normalize_price_value(value, baseline, base_value=100.0) expected = (153.0 / 150.0) * 100.0 self.assertAlmostEqual(normalized, expected, places=2) def test_normalize_single_value_invalid_baseline(self): """Test that invalid baseline raises error.""" with self.assertRaises(ValueError): self.anonymizer.normalize_price_value(100.0, 0.0) def test_save_and_load_mapping(self): """Test saving and loading ticker mappings.""" # Create some mappings self.anonymizer.anonymize_ticker("AAPL") self.anonymizer.anonymize_ticker("MSFT") self.anonymizer.set_company_name("AAPL", "Apple Inc.") # Save to temp file with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f: temp_path = Path(f.name) try: self.anonymizer.save_mapping(temp_path) # Load into new anonymizer new_anonymizer = TickerAnonymizer() new_anonymizer.load_mapping(temp_path) # Check mappings are preserved self.assertEqual( self.anonymizer.ticker_map, new_anonymizer.ticker_map, "Ticker mappings should be preserved" ) self.assertEqual( self.anonymizer.company_names, new_anonymizer.company_names, "Company names should be preserved" ) finally: temp_path.unlink() def test_deanonymize_ticker(self): """Test reverse mapping from anonymous to original ticker.""" ticker = "AAPL" anon_ticker = self.anonymizer.anonymize_ticker(ticker) original = self.anonymizer.deanonymize_ticker(anon_ticker) self.assertEqual(original, ticker, "Should reverse map correctly") def test_anonymize_csv(self): """Test anonymizing a CSV file.""" # Create test CSV df = pd.DataFrame({ 'Date': pd.date_range('2024-01-01', periods=3), 'Close': [150.0, 153.0, 156.0], 'Adj Close': [150.0, 153.0, 156.0], 'Volume': [1000000, 1500000, 2000000] }) with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv') as f: input_path = Path(f.name) with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv') as f: output_path = Path(f.name) try: df.to_csv(input_path, index=False) self.anonymizer.anonymize_csv(input_path, output_path, "AAPL", normalize_prices=True) # Read output df_output = pd.read_csv(output_path) # Check normalization self.assertAlmostEqual(df_output['Close'].iloc[0], 100.0, places=1) finally: input_path.unlink() output_path.unlink() if __name__ == '__main__': # Run tests unittest.main(verbosity=2)