"""
Ticker Anonymizer - Production Implementation

Handles:
- Ticker masking (AAPL → ASSET_042)
- Company name anonymization
- Product name anonymization
- Price normalization to base-100 index
- CRITICAL: Uses Adj Close to handle dividends/splits correctly
"""

import hashlib
import re
import json
from pathlib import Path
from typing import Dict, List, Optional
import pandas as pd
import numpy as np


class TickerAnonymizer:
    """
    Anonymize tickers and normalize prices to prevent LLM identification.
    
    CRITICAL: Uses adjusted close prices to handle dividends and splits.
    """
    
    def __init__(self, seed: str = "blindfire_v1", auto_persist: bool = True):
        self.seed = seed
        self.ticker_map = {}
        self.reverse_map = {}
        self.company_names = {}
        self.baseline_prices = {}  # Store baseline for normalization
        self.auto_persist = auto_persist
        
        # Persistence path
        self.map_file = Path("ticker_map.json")
        if self.auto_persist:
            self._load_from_file()
        
        # Product name mappings
        self.product_map = {
            # Apple
            "iPhone": "Product A",
            "iPad": "Product B",
            "MacBook": "Product C",
            "Apple Watch": "Product D",
            "AirPods": "Product E",
            # Nvidia
            "GeForce": "Product X",
            "RTX": "Product Y",
            "H100": "Product Z",
            "A100": "Product W",
            # Microsoft
            "Windows": "Software Platform A",
            "Office": "Software Platform B",
            "Azure": "Cloud Platform A",
            # Meta
            "Facebook": "Social Platform A",
            "Instagram": "Social Platform B",
            "WhatsApp": "Messaging Platform A",
            # Google
            "Search": "Platform Service A",
            "YouTube": "Video Platform A",
            "Android": "Mobile OS A",
        }
        
    def _load_from_file(self):
        """Load mapping from disk if exists"""
        if self.map_file.exists():
            try:
                with open(self.map_file, 'r') as f:
                    data = json.load(f)
                    # Merge loaded data
                    self.ticker_map.update(data.get("ticker_map", {}))
                    self.reverse_map.update(data.get("reverse_map", {}))
                    self.company_names.update(data.get("company_names", {}))
            except Exception as e:
                print(f"Warning: Failed to load ticker map: {e}")

    def _save_to_file(self):
        """Save mapping to disk"""
        if not self.auto_persist:
            return
        
        data = {
            "ticker_map": self.ticker_map,
            "reverse_map": self.reverse_map,
            "company_names": self.company_names,
            "seed": self.seed
        }
        try:
            with open(self.map_file, 'w') as f:
                json.dump(data, f, indent=2)
        except Exception as e:
            print(f"Warning: Failed to save ticker map: {e}")
    
    def anonymize_ticker(self, ticker: str) -> str:
        """
        Map ticker to anonymous label using deterministic hash.
        
        Args:
            ticker: Original ticker symbol (e.g., "AAPL")
        
        Returns:
            Anonymous label (e.g., "ASSET_042")
        """
        if ticker not in self.ticker_map:
            hash_input = f"{self.seed}_{ticker}"
            hash_val = int(hashlib.md5(hash_input.encode()).hexdigest(), 16)
            anon_label = f"ASSET_{hash_val % 1000:03d}"
            self.ticker_map[ticker] = anon_label
            self.reverse_map[anon_label] = ticker
            self._save_to_file()  # Save on new mapping
            
        return self.ticker_map[ticker]
    
    def set_company_name(self, ticker: str, company_name: str):
        """Store company name for anonymization."""
        if ticker not in self.company_names or self.company_names[ticker] != company_name:
            self.company_names[ticker] = company_name
            self._save_to_file()
    
    def anonymize_text(self, text: str, ticker: str) -> str:
        """
        Replace all company-specific information in text.
        
        Args:
            text: Text to anonymize
            ticker: Ticker symbol for context
        
        Returns:
            Anonymized text
        """
        if not text:
            return text
        
        anon_ticker = self.anonymize_ticker(ticker)
        
        # Replace company name FIRST (before ticker, to avoid partial replacements)
        if ticker in self.company_names:
            company_name = self.company_names[ticker]
            # Escape special regex characters including periods
            escaped_name = re.escape(company_name)
            text = re.sub(
                rf'\b{escaped_name}\b',
                f"Company {anon_ticker}",
                text,
                flags=re.IGNORECASE
            )
        
        # Replace ticker symbol
        text = re.sub(rf'\b{ticker}\b', anon_ticker, text, flags=re.IGNORECASE)
        
        # Replace product names
        for product, anon_product in self.product_map.items():
            text = re.sub(
                rf'\b{re.escape(product)}\b',
                anon_product,
                text,
                flags=re.IGNORECASE
            )
        
        return text
    
    def normalize_price_series(
        self,
        df: pd.DataFrame,
        base_value: float = 100.0,
        use_adjusted: bool = True
    ) -> pd.DataFrame:
        """
        Normalize price series to base-100 index.
        
        CRITICAL: Uses Adj Close by default to handle dividends/splits correctly.
        
        Args:
            df: DataFrame with OHLCV columns
            base_value: Starting index value (default 100.0)
            use_adjusted: Use 'Adj Close' if available (default True)
        
        Returns:
            DataFrame with normalized prices
        
        Raises:
            ValueError: If required columns are missing
        """
        df_normalized = df.copy()
        
        # Determine which close column to use
        if use_adjusted and 'Adj Close' in df.columns:
            close_col = 'Adj Close'
        elif 'Close' in df.columns:
            close_col = 'Close'
        else:
            raise ValueError("DataFrame must have 'Close' or 'Adj Close' column")
        
        # Get baseline (first row)
        if len(df) == 0:
            raise ValueError("DataFrame is empty")
        
        baseline = df[close_col].iloc[0]
        if baseline <= 0 or np.isnan(baseline):
            raise ValueError(f"Invalid baseline price: {baseline}")
        
        # Normalize all price columns
        price_columns = ['Open', 'High', 'Low', 'Close']
        if 'Adj Close' in df.columns:
            price_columns.append('Adj Close')
        
        for col in price_columns:
            if col in df.columns:
                # Use the same baseline for all columns
                df_normalized[col] = (df[col] / baseline) * base_value
        
        # Volume stays absolute (less identifying than price)
        # Could normalize if needed, but keeping raw for now
        
        return df_normalized
    
    def normalize_price_value(
        self,
        value: float,
        baseline: float,
        base_value: float = 100.0
    ) -> float:
        """
        Normalize a single price value.
        
        Args:
            value: Current price
            baseline: Reference price
            base_value: Target baseline (default 100.0)
        
        Returns:
            Normalized price
        """
        if baseline <= 0:
            raise ValueError(f"Invalid baseline: {baseline}")
        return (value / baseline) * base_value
    
    def anonymize_csv(
        self,
        input_path: Path,
        output_path: Path,
        ticker: str,
        normalize_prices: bool = True
    ):
        """
        Anonymize a CSV file containing market data.
        
        Args:
            input_path: Path to input CSV
            output_path: Path to output CSV
            ticker: Ticker symbol
            normalize_prices: Whether to normalize prices to base-100
        """
        df = pd.read_csv(input_path)
        
        # Anonymize ticker in column names
        anon_ticker = self.anonymize_ticker(ticker)
        df.columns = [col.replace(ticker, anon_ticker) for col in df.columns]
        
        # Normalize prices if requested
        if normalize_prices:
            df = self.normalize_price_series(df, base_value=100.0)
        
        # Anonymize text columns
        for col in df.columns:
            if df[col].dtype == 'object':
                df[col] = df[col].apply(
                    lambda x: self.anonymize_text(str(x), ticker) if pd.notna(x) else x
                )
        
        df.to_csv(output_path, index=False)
        print(f"✅ Anonymized {input_path.name} → {output_path.name}")
    
    def save_mapping(self, output_path: Path):
        """Save ticker mapping for de-anonymization."""
        mapping = {
            "ticker_map": self.ticker_map,
            "reverse_map": self.reverse_map,
            "company_names": self.company_names,
            "seed": self.seed
        }
        with open(output_path, 'w') as f:
            json.dump(mapping, f, indent=2)
        print(f"✅ Saved mapping to {output_path}")
    
    def load_mapping(self, input_path: Path):
        """Load ticker mapping from file."""
        with open(input_path, 'r') as f:
            mapping = json.load(f)
        
        self.ticker_map = mapping["ticker_map"]
        self.reverse_map = mapping["reverse_map"]
        self.company_names = mapping["company_names"]
        self.seed = mapping.get("seed", self.seed)
        print(f"✅ Loaded mapping from {input_path}")
    
    def deanonymize_ticker(self, anon_ticker: str) -> Optional[str]:
        """Reverse mapping: ASSET_042 → AAPL."""
        return self.reverse_map.get(anon_ticker)


    def deanonymize_text(self, text: str) -> str:
        """
        Restore original company information in text.
        
        Args:
            text: Anonymized text
        
        Returns:
            Deanonymized text with real names and tickers.
        """
        if not text:
            return text
            
        # 1. Reverse Product Maps (Product A -> iPhone)
        # We need a reverse product map for this
        reverse_product_map = {v: k for k, v in self.product_map.items()}
        for anon_prod, real_prod in reverse_product_map.items():
            text = re.sub(
                rf'\b{re.escape(anon_prod)}\b',
                real_prod,
                text,
                flags=re.IGNORECASE
            )

        # 2. Reverse Ticker and Company Name
        # Iterate through all known mappings in reverse map
        # Sort by length desc to handle potential overlaps if any (though ASSET_XXX is fixed len)
        for anon_ticker, real_ticker in self.reverse_map.items():
            # Replace "Company ASSET_XXX" -> "Apple" (or "Company Name" if stored)
            if real_ticker in self.company_names:
                real_name = self.company_names[real_ticker]
                # Try to catch "Company ASSET_XXX" pattern first
                text = re.sub(
                    rf'Company {anon_ticker}\b',
                    real_name,
                    text,
                    flags=re.IGNORECASE
                )
            
            # Replace remaining ASSET_XXX -> AAPL
            text = re.sub(rf'\b{anon_ticker}\b', real_ticker, text, flags=re.IGNORECASE)
            
        # 3. Catch-all: Replace "[Company Name]" if we can guess the target
        # Since we usually run this for a specific target report, we might not know which "Real Name" 
        # to put in validly unless we know the context.
        # But if we have ONE main ticker in our map that we just analyzed, we can start with that.
        # For now, let's just stick to the text reversion logic.
        
        return text

# Example usage
if __name__ == "__main__":
    anonymizer = TickerAnonymizer()
    
    # Test anonymization
    ticker = "AAPL"
    anonymizer.set_company_name(ticker, "Apple Inc.")
    
    anon_ticker = anonymizer.anonymize_ticker(ticker)
    print(f"Ticker: {ticker} → {anon_ticker}")
    
    # Test text anonymization
    text = "Apple Inc. (AAPL) reported strong iPhone sales"
    anon_text = anonymizer.anonymize_text(text, ticker)
    print(f"Text: {text}")
    print(f"Anonymized: {anon_text}")
    
    # Test price normalization with Adj Close
    df = pd.DataFrame({
        'Date': pd.date_range('2024-01-01', periods=5),
        'Open': [150.0, 152.0, 151.0, 153.0, 155.0],
        'High': [152.0, 154.0, 153.0, 155.0, 157.0],
        'Low': [149.0, 151.0, 150.0, 152.0, 154.0],
        'Close': [151.0, 153.0, 152.0, 154.0, 156.0],
        'Adj Close': [150.5, 152.5, 151.5, 153.5, 155.5],  # Adjusted for dividends
        'Volume': [1000000] * 5
    })
    
    print("\nOriginal prices:")
    print(df[['Date', 'Close', 'Adj Close']].head())
    
    df_normalized = anonymizer.normalize_price_series(df)
    print("\nNormalized prices (using Adj Close):")
    print(df_normalized[['Date', 'Close', 'Adj Close']].head())