TradingAgents/autonomous/connectors/perplexity_finance.py

"""
Perplexity Finance API Connector for real-time financial analysis and research.
FIXED VERSION: Addresses all critical issues from code review.
"""

import asyncio
import os
import json
import re
from typing import Dict, List, Optional, Any
from datetime import datetime, timedelta, timezone
from dataclasses import dataclass, asdict
from enum import Enum
import aiohttp
from pydantic import BaseModel, Field, validator
import logging

# Import cache if available
try:
    from autonomous.core.cache import RedisCache, CacheKey
    CACHE_AVAILABLE = True
except ImportError:
    CACHE_AVAILABLE = False

logger = logging.getLogger(__name__)


class AnalysisType(str, Enum):
    """Types of financial analysis available"""
    FUNDAMENTAL = "fundamental"
    TECHNICAL = "technical"
    SENTIMENT = "sentiment"
    EARNINGS = "earnings"
    VALUATION = "valuation"
    COMPETITIVE = "competitive"
    MACRO = "macro"
    INSIDER = "insider"
    INSTITUTIONAL = "institutional"
    OPTIONS_FLOW = "options_flow"


class ResearchDepth(str, Enum):
    """Depth of research analysis"""
    QUICK = "quick"
    STANDARD = "standard"
    DEEP = "deep"
    EXPERT = "expert"


@dataclass
class StockAnalysis:
    """Complete stock analysis result"""
    ticker: str
    timestamp: datetime
    analysis_type: AnalysisType
    current_price: float
    fair_value: Optional[float]
    upside_potential: Optional[float]
    pe_ratio: Optional[float]
    peg_ratio: Optional[float]
    price_to_book: Optional[float]
    debt_to_equity: Optional[float]
    roe: Optional[float]
    revenue_growth: Optional[float]
    earnings_growth: Optional[float]
    bull_case: str
    bear_case: str
    key_risks: List[str]
    catalysts: List[str]
    rating: str
    confidence_score: float
    time_horizon: str
    detailed_analysis: str
    data_sources: List[str]


@dataclass
class MarketScreenerResult:
    """Result from market screening queries"""
    query: str
    timestamp: datetime
    total_results: int
    stocks: List[Dict[str, Any]]
    screening_criteria: Dict[str, Any]
    market_context: str
    best_value: List[str]
    highest_growth: List[str]
    lowest_risk: List[str]
    detailed_explanation: str


class PerplexityFinanceConnector:
    """
    Fixed connector for Perplexity Finance API providing advanced financial analysis.
    """

    # List of valid Perplexity models (current API models)
    VALID_MODELS = [
        "sonar",                               # Default sonar model (works!)
        "sonar-online",                        # Online search model
        "sonar-chat",                          # Chat-focused model
        "mixtral-8x7b-instruct",               # Mixtral instruct model
        "codellama-70b-instruct",              # Code-focused model
        "llama-3.1-70b-instruct"               # Large Llama model
    ]

    def __init__(self,
                 api_key: Optional[str] = None,
                 cache: Optional[RedisCache] = None,
                 rate_limit: int = 50,
                 model: Optional[str] = None):
        """
        Initialize Perplexity Finance connector.

        Args:
            api_key: Perplexity API key
            cache: Redis cache instance
            rate_limit: Maximum requests per minute
            model: Specific model to use (defaults to auto-selection)
        """
        self.api_key = api_key or os.getenv('PERPLEXITY_API_KEY')
        if not self.api_key:
            raise ValueError("Perplexity API key required. Set PERPLEXITY_API_KEY environment variable.")

        self.base_url = "https://api.perplexity.ai"
        self.cache = cache if cache and CACHE_AVAILABLE else None
        self.rate_limit = rate_limit
        self.last_request_time = datetime.now(timezone.utc)

        # Headers for API requests
        self.headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json"
        }

        # Select appropriate model
        if model and model in self.VALID_MODELS:
            self.finance_model = model
        else:
            # Default to most reliable model
            self.finance_model = "sonar"  # Fast and reliable (verified working)
            logger.info(f"Using default model: {self.finance_model}")

        # Track rate limiting
        self.request_count = 0
        self.rate_limit_reset = datetime.now(timezone.utc)

    async def analyze_stock(self,
                          ticker: str,
                          analysis_type: AnalysisType = AnalysisType.FUNDAMENTAL,
                          depth: ResearchDepth = ResearchDepth.STANDARD) -> StockAnalysis:
        """
        Perform comprehensive analysis on a single stock.
        """
        # Validate ticker
        if not ticker or not ticker.replace('-', '').replace('.', '').isalnum():
            raise ValueError(f"Invalid ticker symbol: {ticker}")

        # Check cache first
        cache_key = f"{CacheKey.AI_DECISION if CACHE_AVAILABLE else 'ai'}:perplexity:{ticker}:{analysis_type.value}"
        if self.cache:
            try:
                cached = await self.cache.get(cache_key)
                if cached:
                    logger.info(f"Using cached Perplexity analysis for {ticker}")
                    # Reconstruct StockAnalysis from dict
                    cached['timestamp'] = datetime.fromisoformat(cached['timestamp'])
                    cached['analysis_type'] = AnalysisType(cached['analysis_type'])
                    return StockAnalysis(**cached)
            except Exception as e:
                logger.warning(f"Cache retrieval error: {e}")

        # Construct analysis prompt
        prompt = self._build_analysis_prompt(ticker, analysis_type, depth)

        try:
            # Make API request with financial context
            analysis_text = await self._query_perplexity(
                prompt,
                context="financial_analysis",
                include_sources=True
            )

            # Parse into structured format (simplified - avoid double API call)
            result = self._parse_analysis_locally(ticker, analysis_text, analysis_type)

            # Cache the result
            if self.cache:
                try:
                    cache_data = asdict(result)
                    cache_data['timestamp'] = cache_data['timestamp'].isoformat()
                    cache_data['analysis_type'] = cache_data['analysis_type'].value
                    await self.cache.set(cache_key, cache_data, ttl=3600)  # 1 hour cache
                except Exception as e:
                    logger.warning(f"Cache storage error: {e}")

            return result

        except Exception as e:
            logger.error(f"Stock analysis failed for {ticker}: {e}")
            # Return minimal result on error
            return StockAnalysis(
                ticker=ticker,
                timestamp=datetime.now(timezone.utc),
                analysis_type=analysis_type,
                current_price=0,
                fair_value=None,
                upside_potential=None,
                pe_ratio=None,
                peg_ratio=None,
                price_to_book=None,
                debt_to_equity=None,
                roe=None,
                revenue_growth=None,
                earnings_growth=None,
                bull_case="Analysis unavailable",
                bear_case="Analysis unavailable",
                key_risks=["Analysis failed"],
                catalysts=[],
                rating="Hold",
                confidence_score=0,
                time_horizon="medium",
                detailed_analysis=str(e),
                data_sources=["Error"]
            )

    async def screen_stocks(self,
                          query: str,
                          max_results: int = 20,
                          filters: Optional[Dict[str, Any]] = None) -> MarketScreenerResult:
        """
        Screen stocks based on natural language query.
        """
        if not query:
            raise ValueError("Query cannot be empty")

        # Sanitize query to prevent injection
        query = query[:500]  # Limit length
        query = re.sub(r'[^\w\s\-.,?!$%]', '', query)  # Remove special chars

        prompt = f"""
        Financial Stock Screening Request:
        {query}

        Requirements:
        1. Search across US listed stocks
        2. Return up to {min(max_results, 50)} stocks
        3. Include current price, market cap, P/E ratio
        4. Rank by relevance
        5. Consider recent market conditions

        Filters: {json.dumps(filters) if filters else 'None'}

        Format response with clear ticker symbols and metrics.
        """

        try:
            response = await self._query_perplexity(
                prompt,
                context="stock_screening",
                include_sources=True
            )

            result = self._parse_screening_locally(query, response)
            return result

        except Exception as e:
            logger.error(f"Stock screening failed: {e}")
            return MarketScreenerResult(
                query=query,
                timestamp=datetime.now(timezone.utc),
                total_results=0,
                stocks=[],
                screening_criteria=filters or {},
                market_context="Screening failed",
                best_value=[],
                highest_growth=[],
                lowest_risk=[],
                detailed_explanation=str(e)
            )

    async def _query_perplexity(self,
                               prompt: str,
                               context: str = "general",
                               include_sources: bool = True,
                               max_tokens: int = 1500) -> str:
        """
        Make API request to Perplexity with proper error handling.
        """
        # Rate limiting
        await self._rate_limit()

        # Sanitize prompt
        prompt = prompt[:4000]  # Perplexity has token limits

        payload = {
            "model": self.finance_model,
            "messages": [
                {
                    "role": "system",
                    "content": f"You are a senior financial analyst providing {context} analysis. "
                              "Use real-time market data and cite credible sources. "
                              "Be specific with numbers, percentages, and dates."
                },
                {
                    "role": "user",
                    "content": prompt
                }
            ],
            "max_tokens": max_tokens,
            "temperature": 0.2,
            "return_citations": include_sources,
            "search_domain_filter": ["finance", "investing", "markets"],
            "search_recency_filter": "day"
        }

        async with aiohttp.ClientSession() as session:
            try:
                async with session.post(
                    f"{self.base_url}/chat/completions",
                    headers=self.headers,
                    json=payload,
                    timeout=aiohttp.ClientTimeout(total=30)
                ) as response:

                    # Handle rate limiting
                    if response.status == 429:
                        retry_after = int(response.headers.get('Retry-After', 60))
                        logger.warning(f"Rate limited. Waiting {retry_after} seconds...")
                        await asyncio.sleep(retry_after)
                        return await self._query_perplexity(prompt, context, include_sources, max_tokens)

                    if response.status == 200:
                        data = await response.json()

                        # Validate response structure
                        if not data.get('choices'):
                            raise ValueError("Empty response from Perplexity API")

                        if len(data['choices']) == 0:
                            raise ValueError("No choices in Perplexity response")

                        choice = data['choices'][0]
                        if 'message' not in choice or 'content' not in choice['message']:
                            raise ValueError("Malformed response structure from Perplexity")

                        content = choice['message']['content']
                        if not content:
                            raise ValueError("Empty content in Perplexity response")

                        return content

                    else:
                        # Sanitize error before logging (remove potential API key)
                        error = await response.text()
                        error = re.sub(r'Bearer [^\s]+', 'Bearer ***', error)
                        logger.error(f"Perplexity API error (status {response.status}): {error[:200]}")
                        raise Exception(f"API request failed with status {response.status}")

            except asyncio.TimeoutError:
                logger.error("Perplexity API request timed out")
                raise
            except Exception as e:
                # Sanitize error message
                error_msg = str(e)
                error_msg = re.sub(r'Bearer [^\s]+', 'Bearer ***', error_msg)
                logger.error(f"Perplexity API error: {error_msg}")
                raise

    async def _rate_limit(self):
        """Implement proper rate limiting with tracking"""
        now = datetime.now(timezone.utc)

        # Reset counter every minute
        if (now - self.rate_limit_reset).total_seconds() > 60:
            self.request_count = 0
            self.rate_limit_reset = now

        # Check if we've hit the limit
        if self.request_count >= self.rate_limit:
            sleep_time = 60 - (now - self.rate_limit_reset).total_seconds()
            if sleep_time > 0:
                logger.info(f"Rate limit reached. Sleeping {sleep_time:.1f} seconds...")
                await asyncio.sleep(sleep_time)
                self.request_count = 0
                self.rate_limit_reset = datetime.now(timezone.utc)

        # Minimum time between requests
        time_since_last = (now - self.last_request_time).total_seconds()
        min_interval = 60 / self.rate_limit  # seconds between requests

        if time_since_last < min_interval:
            await asyncio.sleep(min_interval - time_since_last)

        self.last_request_time = datetime.now(timezone.utc)
        self.request_count += 1

    def _build_analysis_prompt(self,
                              ticker: str,
                              analysis_type: AnalysisType,
                              depth: ResearchDepth) -> str:
        """Build analysis prompt based on type and depth"""

        base_prompt = f"Analyze {ticker} stock with focus on {analysis_type.value} analysis.\n\n"

        if analysis_type == AnalysisType.FUNDAMENTAL:
            base_prompt += """
            Include:
            1. Current valuation metrics (P/E, PEG, P/B, EV/EBITDA)
            2. Profitability metrics (ROE, ROA, profit margins)
            3. Growth metrics (revenue, earnings, FCF growth)
            4. Balance sheet strength
            5. Competitive position
            6. Fair value estimate
            7. Investment recommendation
            """
        elif analysis_type == AnalysisType.TECHNICAL:
            base_prompt += """
            Include:
            1. Current price action and trend
            2. Support and resistance levels
            3. Moving averages
            4. RSI, MACD indicators
            5. Volume analysis
            6. Chart patterns
            7. Short-term outlook
            """
        elif analysis_type == AnalysisType.VALUATION:
            base_prompt += """
            Perform valuation:
            1. DCF analysis
            2. Comparable company analysis
            3. Sensitivity analysis
            4. Fair value range
            5. Investment recommendation
            """

        if depth == ResearchDepth.DEEP:
            base_prompt += "\nProvide extensive detail with specific numbers."
        elif depth == ResearchDepth.EXPERT:
            base_prompt += "\nProvide institutional-quality analysis."

        return base_prompt

    def _parse_analysis_locally(self,
                               ticker: str,
                               raw_analysis: str,
                               analysis_type: AnalysisType) -> StockAnalysis:
        """Parse raw analysis text locally without additional API call"""

        # Extract metrics using regex patterns
        def extract_number(pattern: str, text: str, default: float = 0) -> float:
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                try:
                    return float(match.group(1).replace(',', '').replace('$', ''))
                except:
                    pass
            return default

        current_price = extract_number(r'current.*?price.*?\$?([\d,.]+)', raw_analysis)
        fair_value = extract_number(r'fair.*?value.*?\$?([\d,.]+)', raw_analysis)
        pe_ratio = extract_number(r'p/e.*?ratio.*?([\d,.]+)', raw_analysis)

        # Calculate upside if we have both prices
        upside_potential = None
        if current_price > 0 and fair_value > 0:
            upside_potential = ((fair_value - current_price) / current_price) * 100

        # Extract rating
        rating = "Hold"
        if re.search(r'\b(strong\s+)?buy\b', raw_analysis, re.IGNORECASE):
            rating = "Buy"
        elif re.search(r'\b(strong\s+)?sell\b', raw_analysis, re.IGNORECASE):
            rating = "Sell"

        # Extract risks and catalysts
        risks = []
        risk_section = re.search(r'risk[s]?:?(.*?)(?:catalyst|opportunit|\n\n)',
                                 raw_analysis, re.IGNORECASE | re.DOTALL)
        if risk_section:
            risks = [r.strip() for r in risk_section.group(1).split('\n')
                    if r.strip() and len(r.strip()) > 10][:5]

        # Build analysis object
        return StockAnalysis(
            ticker=ticker,
            timestamp=datetime.now(timezone.utc),
            analysis_type=analysis_type,
            current_price=current_price,
            fair_value=fair_value if fair_value > 0 else None,
            upside_potential=upside_potential,
            pe_ratio=pe_ratio if pe_ratio > 0 else None,
            peg_ratio=None,
            price_to_book=None,
            debt_to_equity=None,
            roe=None,
            revenue_growth=None,
            earnings_growth=None,
            bull_case=raw_analysis[:500],
            bear_case="See full analysis",
            key_risks=risks if risks else ["See full analysis"],
            catalysts=[],
            rating=rating,
            confidence_score=70,  # Default moderate confidence
            time_horizon="medium",
            detailed_analysis=raw_analysis,
            data_sources=["Perplexity AI", "Real-time market data"]
        )

    def _parse_screening_locally(self, query: str, raw_response: str) -> MarketScreenerResult:
        """Parse screening response locally"""

        # Extract stock symbols using regex
        ticker_pattern = r'\b([A-Z]{1,5})\b(?:\s*[\:\-\|]|\s+at\s+\$)'
        tickers = re.findall(ticker_pattern, raw_response)

        # Remove common words that look like tickers
        exclude = {'THE', 'AND', 'FOR', 'NYSE', 'NASDAQ', 'IPO', 'CEO', 'CFO', 'Q1', 'Q2', 'Q3', 'Q4'}
        tickers = [t for t in tickers if t not in exclude][:20]

        # Build basic stock info
        stocks = []
        for ticker in tickers[:10]:  # Limit to 10
            # Try to find price near ticker mention
            price_pattern = rf'{ticker}.*?\$?([\d,.]+)'
            price_match = re.search(price_pattern, raw_response)
            price = float(price_match.group(1).replace(',', '')) if price_match else 0

            stocks.append({
                'ticker': ticker,
                'company_name': '',
                'price': price,
                'pe_ratio': None,
                'market_cap': None
            })

        return MarketScreenerResult(
            query=query,
            timestamp=datetime.now(timezone.utc),
            total_results=len(stocks),
            stocks=stocks,
            screening_criteria={},
            market_context=raw_response[:200],
            best_value=[s['ticker'] for s in stocks[:3]],
            highest_growth=[],
            lowest_risk=[],
            detailed_explanation=raw_response
        )

    # Additional helper methods remain the same but with proper error handling
    async def get_market_sentiment(self, sector: Optional[str] = None) -> Dict[str, Any]:
        """Get current market sentiment with error handling"""
        try:
            prompt = f"""
            Analyze current market sentiment {f'for {sector} sector' if sector else 'overall'}:
            1. Bull vs Bear sentiment
            2. Key concerns
            3. Opportunities
            4. Technical levels
            """

            response = await self._query_perplexity(prompt, context="market_sentiment")

            return {
                "timestamp": datetime.now(timezone.utc).isoformat(),
                "sector": sector or "market",
                "analysis": response,
                "data_freshness": "real-time"
            }
        except Exception as e:
            logger.error(f"Market sentiment analysis failed: {e}")
            return {
                "timestamp": datetime.now(timezone.utc).isoformat(),
                "sector": sector or "market",
                "analysis": "Analysis unavailable",
                "error": str(e)
            }