TradingAgents/autonomous/connectors/perplexity_finance.py

578 lines
21 KiB
Python

"""
Perplexity Finance API Connector for real-time financial analysis and research.
FIXED VERSION: Addresses all critical issues from code review.
"""
import asyncio
import os
import json
import re
from typing import Dict, List, Optional, Any
from datetime import datetime, timedelta, timezone
from dataclasses import dataclass, asdict
from enum import Enum
import aiohttp
from pydantic import BaseModel, Field, validator
import logging
# Import cache if available
try:
from autonomous.core.cache import RedisCache, CacheKey
CACHE_AVAILABLE = True
except ImportError:
CACHE_AVAILABLE = False
logger = logging.getLogger(__name__)
class AnalysisType(str, Enum):
"""Types of financial analysis available"""
FUNDAMENTAL = "fundamental"
TECHNICAL = "technical"
SENTIMENT = "sentiment"
EARNINGS = "earnings"
VALUATION = "valuation"
COMPETITIVE = "competitive"
MACRO = "macro"
INSIDER = "insider"
INSTITUTIONAL = "institutional"
OPTIONS_FLOW = "options_flow"
class ResearchDepth(str, Enum):
"""Depth of research analysis"""
QUICK = "quick"
STANDARD = "standard"
DEEP = "deep"
EXPERT = "expert"
@dataclass
class StockAnalysis:
"""Complete stock analysis result"""
ticker: str
timestamp: datetime
analysis_type: AnalysisType
current_price: float
fair_value: Optional[float]
upside_potential: Optional[float]
pe_ratio: Optional[float]
peg_ratio: Optional[float]
price_to_book: Optional[float]
debt_to_equity: Optional[float]
roe: Optional[float]
revenue_growth: Optional[float]
earnings_growth: Optional[float]
bull_case: str
bear_case: str
key_risks: List[str]
catalysts: List[str]
rating: str
confidence_score: float
time_horizon: str
detailed_analysis: str
data_sources: List[str]
@dataclass
class MarketScreenerResult:
"""Result from market screening queries"""
query: str
timestamp: datetime
total_results: int
stocks: List[Dict[str, Any]]
screening_criteria: Dict[str, Any]
market_context: str
best_value: List[str]
highest_growth: List[str]
lowest_risk: List[str]
detailed_explanation: str
class PerplexityFinanceConnector:
"""
Fixed connector for Perplexity Finance API providing advanced financial analysis.
"""
# List of valid Perplexity models (current API models)
VALID_MODELS = [
"sonar", # Default sonar model (works!)
"sonar-online", # Online search model
"sonar-chat", # Chat-focused model
"mixtral-8x7b-instruct", # Mixtral instruct model
"codellama-70b-instruct", # Code-focused model
"llama-3.1-70b-instruct" # Large Llama model
]
def __init__(self,
api_key: Optional[str] = None,
cache: Optional[RedisCache] = None,
rate_limit: int = 50,
model: Optional[str] = None):
"""
Initialize Perplexity Finance connector.
Args:
api_key: Perplexity API key
cache: Redis cache instance
rate_limit: Maximum requests per minute
model: Specific model to use (defaults to auto-selection)
"""
self.api_key = api_key or os.getenv('PERPLEXITY_API_KEY')
if not self.api_key:
raise ValueError("Perplexity API key required. Set PERPLEXITY_API_KEY environment variable.")
self.base_url = "https://api.perplexity.ai"
self.cache = cache if cache and CACHE_AVAILABLE else None
self.rate_limit = rate_limit
self.last_request_time = datetime.now(timezone.utc)
# Headers for API requests
self.headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
# Select appropriate model
if model and model in self.VALID_MODELS:
self.finance_model = model
else:
# Default to most reliable model
self.finance_model = "sonar" # Fast and reliable (verified working)
logger.info(f"Using default model: {self.finance_model}")
# Track rate limiting
self.request_count = 0
self.rate_limit_reset = datetime.now(timezone.utc)
async def analyze_stock(self,
ticker: str,
analysis_type: AnalysisType = AnalysisType.FUNDAMENTAL,
depth: ResearchDepth = ResearchDepth.STANDARD) -> StockAnalysis:
"""
Perform comprehensive analysis on a single stock.
"""
# Validate ticker
if not ticker or not ticker.replace('-', '').replace('.', '').isalnum():
raise ValueError(f"Invalid ticker symbol: {ticker}")
# Check cache first
cache_key = f"{CacheKey.AI_DECISION if CACHE_AVAILABLE else 'ai'}:perplexity:{ticker}:{analysis_type.value}"
if self.cache:
try:
cached = await self.cache.get(cache_key)
if cached:
logger.info(f"Using cached Perplexity analysis for {ticker}")
# Reconstruct StockAnalysis from dict
cached['timestamp'] = datetime.fromisoformat(cached['timestamp'])
cached['analysis_type'] = AnalysisType(cached['analysis_type'])
return StockAnalysis(**cached)
except Exception as e:
logger.warning(f"Cache retrieval error: {e}")
# Construct analysis prompt
prompt = self._build_analysis_prompt(ticker, analysis_type, depth)
try:
# Make API request with financial context
analysis_text = await self._query_perplexity(
prompt,
context="financial_analysis",
include_sources=True
)
# Parse into structured format (simplified - avoid double API call)
result = self._parse_analysis_locally(ticker, analysis_text, analysis_type)
# Cache the result
if self.cache:
try:
cache_data = asdict(result)
cache_data['timestamp'] = cache_data['timestamp'].isoformat()
cache_data['analysis_type'] = cache_data['analysis_type'].value
await self.cache.set(cache_key, cache_data, ttl=3600) # 1 hour cache
except Exception as e:
logger.warning(f"Cache storage error: {e}")
return result
except Exception as e:
logger.error(f"Stock analysis failed for {ticker}: {e}")
# Return minimal result on error
return StockAnalysis(
ticker=ticker,
timestamp=datetime.now(timezone.utc),
analysis_type=analysis_type,
current_price=0,
fair_value=None,
upside_potential=None,
pe_ratio=None,
peg_ratio=None,
price_to_book=None,
debt_to_equity=None,
roe=None,
revenue_growth=None,
earnings_growth=None,
bull_case="Analysis unavailable",
bear_case="Analysis unavailable",
key_risks=["Analysis failed"],
catalysts=[],
rating="Hold",
confidence_score=0,
time_horizon="medium",
detailed_analysis=str(e),
data_sources=["Error"]
)
async def screen_stocks(self,
query: str,
max_results: int = 20,
filters: Optional[Dict[str, Any]] = None) -> MarketScreenerResult:
"""
Screen stocks based on natural language query.
"""
if not query:
raise ValueError("Query cannot be empty")
# Sanitize query to prevent injection
query = query[:500] # Limit length
query = re.sub(r'[^\w\s\-.,?!$%]', '', query) # Remove special chars
prompt = f"""
Financial Stock Screening Request:
{query}
Requirements:
1. Search across US listed stocks
2. Return up to {min(max_results, 50)} stocks
3. Include current price, market cap, P/E ratio
4. Rank by relevance
5. Consider recent market conditions
Filters: {json.dumps(filters) if filters else 'None'}
Format response with clear ticker symbols and metrics.
"""
try:
response = await self._query_perplexity(
prompt,
context="stock_screening",
include_sources=True
)
result = self._parse_screening_locally(query, response)
return result
except Exception as e:
logger.error(f"Stock screening failed: {e}")
return MarketScreenerResult(
query=query,
timestamp=datetime.now(timezone.utc),
total_results=0,
stocks=[],
screening_criteria=filters or {},
market_context="Screening failed",
best_value=[],
highest_growth=[],
lowest_risk=[],
detailed_explanation=str(e)
)
async def _query_perplexity(self,
prompt: str,
context: str = "general",
include_sources: bool = True,
max_tokens: int = 1500) -> str:
"""
Make API request to Perplexity with proper error handling.
"""
# Rate limiting
await self._rate_limit()
# Sanitize prompt
prompt = prompt[:4000] # Perplexity has token limits
payload = {
"model": self.finance_model,
"messages": [
{
"role": "system",
"content": f"You are a senior financial analyst providing {context} analysis. "
"Use real-time market data and cite credible sources. "
"Be specific with numbers, percentages, and dates."
},
{
"role": "user",
"content": prompt
}
],
"max_tokens": max_tokens,
"temperature": 0.2,
"return_citations": include_sources,
"search_domain_filter": ["finance", "investing", "markets"],
"search_recency_filter": "day"
}
async with aiohttp.ClientSession() as session:
try:
async with session.post(
f"{self.base_url}/chat/completions",
headers=self.headers,
json=payload,
timeout=aiohttp.ClientTimeout(total=30)
) as response:
# Handle rate limiting
if response.status == 429:
retry_after = int(response.headers.get('Retry-After', 60))
logger.warning(f"Rate limited. Waiting {retry_after} seconds...")
await asyncio.sleep(retry_after)
return await self._query_perplexity(prompt, context, include_sources, max_tokens)
if response.status == 200:
data = await response.json()
# Validate response structure
if not data.get('choices'):
raise ValueError("Empty response from Perplexity API")
if len(data['choices']) == 0:
raise ValueError("No choices in Perplexity response")
choice = data['choices'][0]
if 'message' not in choice or 'content' not in choice['message']:
raise ValueError("Malformed response structure from Perplexity")
content = choice['message']['content']
if not content:
raise ValueError("Empty content in Perplexity response")
return content
else:
# Sanitize error before logging (remove potential API key)
error = await response.text()
error = re.sub(r'Bearer [^\s]+', 'Bearer ***', error)
logger.error(f"Perplexity API error (status {response.status}): {error[:200]}")
raise Exception(f"API request failed with status {response.status}")
except asyncio.TimeoutError:
logger.error("Perplexity API request timed out")
raise
except Exception as e:
# Sanitize error message
error_msg = str(e)
error_msg = re.sub(r'Bearer [^\s]+', 'Bearer ***', error_msg)
logger.error(f"Perplexity API error: {error_msg}")
raise
async def _rate_limit(self):
"""Implement proper rate limiting with tracking"""
now = datetime.now(timezone.utc)
# Reset counter every minute
if (now - self.rate_limit_reset).total_seconds() > 60:
self.request_count = 0
self.rate_limit_reset = now
# Check if we've hit the limit
if self.request_count >= self.rate_limit:
sleep_time = 60 - (now - self.rate_limit_reset).total_seconds()
if sleep_time > 0:
logger.info(f"Rate limit reached. Sleeping {sleep_time:.1f} seconds...")
await asyncio.sleep(sleep_time)
self.request_count = 0
self.rate_limit_reset = datetime.now(timezone.utc)
# Minimum time between requests
time_since_last = (now - self.last_request_time).total_seconds()
min_interval = 60 / self.rate_limit # seconds between requests
if time_since_last < min_interval:
await asyncio.sleep(min_interval - time_since_last)
self.last_request_time = datetime.now(timezone.utc)
self.request_count += 1
def _build_analysis_prompt(self,
ticker: str,
analysis_type: AnalysisType,
depth: ResearchDepth) -> str:
"""Build analysis prompt based on type and depth"""
base_prompt = f"Analyze {ticker} stock with focus on {analysis_type.value} analysis.\n\n"
if analysis_type == AnalysisType.FUNDAMENTAL:
base_prompt += """
Include:
1. Current valuation metrics (P/E, PEG, P/B, EV/EBITDA)
2. Profitability metrics (ROE, ROA, profit margins)
3. Growth metrics (revenue, earnings, FCF growth)
4. Balance sheet strength
5. Competitive position
6. Fair value estimate
7. Investment recommendation
"""
elif analysis_type == AnalysisType.TECHNICAL:
base_prompt += """
Include:
1. Current price action and trend
2. Support and resistance levels
3. Moving averages
4. RSI, MACD indicators
5. Volume analysis
6. Chart patterns
7. Short-term outlook
"""
elif analysis_type == AnalysisType.VALUATION:
base_prompt += """
Perform valuation:
1. DCF analysis
2. Comparable company analysis
3. Sensitivity analysis
4. Fair value range
5. Investment recommendation
"""
if depth == ResearchDepth.DEEP:
base_prompt += "\nProvide extensive detail with specific numbers."
elif depth == ResearchDepth.EXPERT:
base_prompt += "\nProvide institutional-quality analysis."
return base_prompt
def _parse_analysis_locally(self,
ticker: str,
raw_analysis: str,
analysis_type: AnalysisType) -> StockAnalysis:
"""Parse raw analysis text locally without additional API call"""
# Extract metrics using regex patterns
def extract_number(pattern: str, text: str, default: float = 0) -> float:
match = re.search(pattern, text, re.IGNORECASE)
if match:
try:
return float(match.group(1).replace(',', '').replace('$', ''))
except:
pass
return default
current_price = extract_number(r'current.*?price.*?\$?([\d,.]+)', raw_analysis)
fair_value = extract_number(r'fair.*?value.*?\$?([\d,.]+)', raw_analysis)
pe_ratio = extract_number(r'p/e.*?ratio.*?([\d,.]+)', raw_analysis)
# Calculate upside if we have both prices
upside_potential = None
if current_price > 0 and fair_value > 0:
upside_potential = ((fair_value - current_price) / current_price) * 100
# Extract rating
rating = "Hold"
if re.search(r'\b(strong\s+)?buy\b', raw_analysis, re.IGNORECASE):
rating = "Buy"
elif re.search(r'\b(strong\s+)?sell\b', raw_analysis, re.IGNORECASE):
rating = "Sell"
# Extract risks and catalysts
risks = []
risk_section = re.search(r'risk[s]?:?(.*?)(?:catalyst|opportunit|\n\n)',
raw_analysis, re.IGNORECASE | re.DOTALL)
if risk_section:
risks = [r.strip() for r in risk_section.group(1).split('\n')
if r.strip() and len(r.strip()) > 10][:5]
# Build analysis object
return StockAnalysis(
ticker=ticker,
timestamp=datetime.now(timezone.utc),
analysis_type=analysis_type,
current_price=current_price,
fair_value=fair_value if fair_value > 0 else None,
upside_potential=upside_potential,
pe_ratio=pe_ratio if pe_ratio > 0 else None,
peg_ratio=None,
price_to_book=None,
debt_to_equity=None,
roe=None,
revenue_growth=None,
earnings_growth=None,
bull_case=raw_analysis[:500],
bear_case="See full analysis",
key_risks=risks if risks else ["See full analysis"],
catalysts=[],
rating=rating,
confidence_score=70, # Default moderate confidence
time_horizon="medium",
detailed_analysis=raw_analysis,
data_sources=["Perplexity AI", "Real-time market data"]
)
def _parse_screening_locally(self, query: str, raw_response: str) -> MarketScreenerResult:
"""Parse screening response locally"""
# Extract stock symbols using regex
ticker_pattern = r'\b([A-Z]{1,5})\b(?:\s*[\:\-\|]|\s+at\s+\$)'
tickers = re.findall(ticker_pattern, raw_response)
# Remove common words that look like tickers
exclude = {'THE', 'AND', 'FOR', 'NYSE', 'NASDAQ', 'IPO', 'CEO', 'CFO', 'Q1', 'Q2', 'Q3', 'Q4'}
tickers = [t for t in tickers if t not in exclude][:20]
# Build basic stock info
stocks = []
for ticker in tickers[:10]: # Limit to 10
# Try to find price near ticker mention
price_pattern = rf'{ticker}.*?\$?([\d,.]+)'
price_match = re.search(price_pattern, raw_response)
price = float(price_match.group(1).replace(',', '')) if price_match else 0
stocks.append({
'ticker': ticker,
'company_name': '',
'price': price,
'pe_ratio': None,
'market_cap': None
})
return MarketScreenerResult(
query=query,
timestamp=datetime.now(timezone.utc),
total_results=len(stocks),
stocks=stocks,
screening_criteria={},
market_context=raw_response[:200],
best_value=[s['ticker'] for s in stocks[:3]],
highest_growth=[],
lowest_risk=[],
detailed_explanation=raw_response
)
# Additional helper methods remain the same but with proper error handling
async def get_market_sentiment(self, sector: Optional[str] = None) -> Dict[str, Any]:
"""Get current market sentiment with error handling"""
try:
prompt = f"""
Analyze current market sentiment {f'for {sector} sector' if sector else 'overall'}:
1. Bull vs Bear sentiment
2. Key concerns
3. Opportunities
4. Technical levels
"""
response = await self._query_perplexity(prompt, context="market_sentiment")
return {
"timestamp": datetime.now(timezone.utc).isoformat(),
"sector": sector or "market",
"analysis": response,
"data_freshness": "real-time"
}
except Exception as e:
logger.error(f"Market sentiment analysis failed: {e}")
return {
"timestamp": datetime.now(timezone.utc).isoformat(),
"sector": sector or "market",
"analysis": "Analysis unavailable",
"error": str(e)
}