578 lines
21 KiB
Python
578 lines
21 KiB
Python
"""
|
|
Perplexity Finance API Connector for real-time financial analysis and research.
|
|
FIXED VERSION: Addresses all critical issues from code review.
|
|
"""
|
|
|
|
import asyncio
|
|
import os
|
|
import json
|
|
import re
|
|
from typing import Dict, List, Optional, Any
|
|
from datetime import datetime, timedelta, timezone
|
|
from dataclasses import dataclass, asdict
|
|
from enum import Enum
|
|
import aiohttp
|
|
from pydantic import BaseModel, Field, validator
|
|
import logging
|
|
|
|
# Import cache if available
|
|
try:
|
|
from autonomous.core.cache import RedisCache, CacheKey
|
|
CACHE_AVAILABLE = True
|
|
except ImportError:
|
|
CACHE_AVAILABLE = False
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class AnalysisType(str, Enum):
|
|
"""Types of financial analysis available"""
|
|
FUNDAMENTAL = "fundamental"
|
|
TECHNICAL = "technical"
|
|
SENTIMENT = "sentiment"
|
|
EARNINGS = "earnings"
|
|
VALUATION = "valuation"
|
|
COMPETITIVE = "competitive"
|
|
MACRO = "macro"
|
|
INSIDER = "insider"
|
|
INSTITUTIONAL = "institutional"
|
|
OPTIONS_FLOW = "options_flow"
|
|
|
|
|
|
class ResearchDepth(str, Enum):
|
|
"""Depth of research analysis"""
|
|
QUICK = "quick"
|
|
STANDARD = "standard"
|
|
DEEP = "deep"
|
|
EXPERT = "expert"
|
|
|
|
|
|
@dataclass
|
|
class StockAnalysis:
|
|
"""Complete stock analysis result"""
|
|
ticker: str
|
|
timestamp: datetime
|
|
analysis_type: AnalysisType
|
|
current_price: float
|
|
fair_value: Optional[float]
|
|
upside_potential: Optional[float]
|
|
pe_ratio: Optional[float]
|
|
peg_ratio: Optional[float]
|
|
price_to_book: Optional[float]
|
|
debt_to_equity: Optional[float]
|
|
roe: Optional[float]
|
|
revenue_growth: Optional[float]
|
|
earnings_growth: Optional[float]
|
|
bull_case: str
|
|
bear_case: str
|
|
key_risks: List[str]
|
|
catalysts: List[str]
|
|
rating: str
|
|
confidence_score: float
|
|
time_horizon: str
|
|
detailed_analysis: str
|
|
data_sources: List[str]
|
|
|
|
|
|
@dataclass
|
|
class MarketScreenerResult:
|
|
"""Result from market screening queries"""
|
|
query: str
|
|
timestamp: datetime
|
|
total_results: int
|
|
stocks: List[Dict[str, Any]]
|
|
screening_criteria: Dict[str, Any]
|
|
market_context: str
|
|
best_value: List[str]
|
|
highest_growth: List[str]
|
|
lowest_risk: List[str]
|
|
detailed_explanation: str
|
|
|
|
|
|
class PerplexityFinanceConnector:
|
|
"""
|
|
Fixed connector for Perplexity Finance API providing advanced financial analysis.
|
|
"""
|
|
|
|
# List of valid Perplexity models (current API models)
|
|
VALID_MODELS = [
|
|
"sonar", # Default sonar model (works!)
|
|
"sonar-online", # Online search model
|
|
"sonar-chat", # Chat-focused model
|
|
"mixtral-8x7b-instruct", # Mixtral instruct model
|
|
"codellama-70b-instruct", # Code-focused model
|
|
"llama-3.1-70b-instruct" # Large Llama model
|
|
]
|
|
|
|
def __init__(self,
|
|
api_key: Optional[str] = None,
|
|
cache: Optional[RedisCache] = None,
|
|
rate_limit: int = 50,
|
|
model: Optional[str] = None):
|
|
"""
|
|
Initialize Perplexity Finance connector.
|
|
|
|
Args:
|
|
api_key: Perplexity API key
|
|
cache: Redis cache instance
|
|
rate_limit: Maximum requests per minute
|
|
model: Specific model to use (defaults to auto-selection)
|
|
"""
|
|
self.api_key = api_key or os.getenv('PERPLEXITY_API_KEY')
|
|
if not self.api_key:
|
|
raise ValueError("Perplexity API key required. Set PERPLEXITY_API_KEY environment variable.")
|
|
|
|
self.base_url = "https://api.perplexity.ai"
|
|
self.cache = cache if cache and CACHE_AVAILABLE else None
|
|
self.rate_limit = rate_limit
|
|
self.last_request_time = datetime.now(timezone.utc)
|
|
|
|
# Headers for API requests
|
|
self.headers = {
|
|
"Authorization": f"Bearer {self.api_key}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
# Select appropriate model
|
|
if model and model in self.VALID_MODELS:
|
|
self.finance_model = model
|
|
else:
|
|
# Default to most reliable model
|
|
self.finance_model = "sonar" # Fast and reliable (verified working)
|
|
logger.info(f"Using default model: {self.finance_model}")
|
|
|
|
# Track rate limiting
|
|
self.request_count = 0
|
|
self.rate_limit_reset = datetime.now(timezone.utc)
|
|
|
|
async def analyze_stock(self,
|
|
ticker: str,
|
|
analysis_type: AnalysisType = AnalysisType.FUNDAMENTAL,
|
|
depth: ResearchDepth = ResearchDepth.STANDARD) -> StockAnalysis:
|
|
"""
|
|
Perform comprehensive analysis on a single stock.
|
|
"""
|
|
# Validate ticker
|
|
if not ticker or not ticker.replace('-', '').replace('.', '').isalnum():
|
|
raise ValueError(f"Invalid ticker symbol: {ticker}")
|
|
|
|
# Check cache first
|
|
cache_key = f"{CacheKey.AI_DECISION if CACHE_AVAILABLE else 'ai'}:perplexity:{ticker}:{analysis_type.value}"
|
|
if self.cache:
|
|
try:
|
|
cached = await self.cache.get(cache_key)
|
|
if cached:
|
|
logger.info(f"Using cached Perplexity analysis for {ticker}")
|
|
# Reconstruct StockAnalysis from dict
|
|
cached['timestamp'] = datetime.fromisoformat(cached['timestamp'])
|
|
cached['analysis_type'] = AnalysisType(cached['analysis_type'])
|
|
return StockAnalysis(**cached)
|
|
except Exception as e:
|
|
logger.warning(f"Cache retrieval error: {e}")
|
|
|
|
# Construct analysis prompt
|
|
prompt = self._build_analysis_prompt(ticker, analysis_type, depth)
|
|
|
|
try:
|
|
# Make API request with financial context
|
|
analysis_text = await self._query_perplexity(
|
|
prompt,
|
|
context="financial_analysis",
|
|
include_sources=True
|
|
)
|
|
|
|
# Parse into structured format (simplified - avoid double API call)
|
|
result = self._parse_analysis_locally(ticker, analysis_text, analysis_type)
|
|
|
|
# Cache the result
|
|
if self.cache:
|
|
try:
|
|
cache_data = asdict(result)
|
|
cache_data['timestamp'] = cache_data['timestamp'].isoformat()
|
|
cache_data['analysis_type'] = cache_data['analysis_type'].value
|
|
await self.cache.set(cache_key, cache_data, ttl=3600) # 1 hour cache
|
|
except Exception as e:
|
|
logger.warning(f"Cache storage error: {e}")
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Stock analysis failed for {ticker}: {e}")
|
|
# Return minimal result on error
|
|
return StockAnalysis(
|
|
ticker=ticker,
|
|
timestamp=datetime.now(timezone.utc),
|
|
analysis_type=analysis_type,
|
|
current_price=0,
|
|
fair_value=None,
|
|
upside_potential=None,
|
|
pe_ratio=None,
|
|
peg_ratio=None,
|
|
price_to_book=None,
|
|
debt_to_equity=None,
|
|
roe=None,
|
|
revenue_growth=None,
|
|
earnings_growth=None,
|
|
bull_case="Analysis unavailable",
|
|
bear_case="Analysis unavailable",
|
|
key_risks=["Analysis failed"],
|
|
catalysts=[],
|
|
rating="Hold",
|
|
confidence_score=0,
|
|
time_horizon="medium",
|
|
detailed_analysis=str(e),
|
|
data_sources=["Error"]
|
|
)
|
|
|
|
async def screen_stocks(self,
|
|
query: str,
|
|
max_results: int = 20,
|
|
filters: Optional[Dict[str, Any]] = None) -> MarketScreenerResult:
|
|
"""
|
|
Screen stocks based on natural language query.
|
|
"""
|
|
if not query:
|
|
raise ValueError("Query cannot be empty")
|
|
|
|
# Sanitize query to prevent injection
|
|
query = query[:500] # Limit length
|
|
query = re.sub(r'[^\w\s\-.,?!$%]', '', query) # Remove special chars
|
|
|
|
prompt = f"""
|
|
Financial Stock Screening Request:
|
|
{query}
|
|
|
|
Requirements:
|
|
1. Search across US listed stocks
|
|
2. Return up to {min(max_results, 50)} stocks
|
|
3. Include current price, market cap, P/E ratio
|
|
4. Rank by relevance
|
|
5. Consider recent market conditions
|
|
|
|
Filters: {json.dumps(filters) if filters else 'None'}
|
|
|
|
Format response with clear ticker symbols and metrics.
|
|
"""
|
|
|
|
try:
|
|
response = await self._query_perplexity(
|
|
prompt,
|
|
context="stock_screening",
|
|
include_sources=True
|
|
)
|
|
|
|
result = self._parse_screening_locally(query, response)
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Stock screening failed: {e}")
|
|
return MarketScreenerResult(
|
|
query=query,
|
|
timestamp=datetime.now(timezone.utc),
|
|
total_results=0,
|
|
stocks=[],
|
|
screening_criteria=filters or {},
|
|
market_context="Screening failed",
|
|
best_value=[],
|
|
highest_growth=[],
|
|
lowest_risk=[],
|
|
detailed_explanation=str(e)
|
|
)
|
|
|
|
async def _query_perplexity(self,
|
|
prompt: str,
|
|
context: str = "general",
|
|
include_sources: bool = True,
|
|
max_tokens: int = 1500) -> str:
|
|
"""
|
|
Make API request to Perplexity with proper error handling.
|
|
"""
|
|
# Rate limiting
|
|
await self._rate_limit()
|
|
|
|
# Sanitize prompt
|
|
prompt = prompt[:4000] # Perplexity has token limits
|
|
|
|
payload = {
|
|
"model": self.finance_model,
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": f"You are a senior financial analyst providing {context} analysis. "
|
|
"Use real-time market data and cite credible sources. "
|
|
"Be specific with numbers, percentages, and dates."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": prompt
|
|
}
|
|
],
|
|
"max_tokens": max_tokens,
|
|
"temperature": 0.2,
|
|
"return_citations": include_sources,
|
|
"search_domain_filter": ["finance", "investing", "markets"],
|
|
"search_recency_filter": "day"
|
|
}
|
|
|
|
async with aiohttp.ClientSession() as session:
|
|
try:
|
|
async with session.post(
|
|
f"{self.base_url}/chat/completions",
|
|
headers=self.headers,
|
|
json=payload,
|
|
timeout=aiohttp.ClientTimeout(total=30)
|
|
) as response:
|
|
|
|
# Handle rate limiting
|
|
if response.status == 429:
|
|
retry_after = int(response.headers.get('Retry-After', 60))
|
|
logger.warning(f"Rate limited. Waiting {retry_after} seconds...")
|
|
await asyncio.sleep(retry_after)
|
|
return await self._query_perplexity(prompt, context, include_sources, max_tokens)
|
|
|
|
if response.status == 200:
|
|
data = await response.json()
|
|
|
|
# Validate response structure
|
|
if not data.get('choices'):
|
|
raise ValueError("Empty response from Perplexity API")
|
|
|
|
if len(data['choices']) == 0:
|
|
raise ValueError("No choices in Perplexity response")
|
|
|
|
choice = data['choices'][0]
|
|
if 'message' not in choice or 'content' not in choice['message']:
|
|
raise ValueError("Malformed response structure from Perplexity")
|
|
|
|
content = choice['message']['content']
|
|
if not content:
|
|
raise ValueError("Empty content in Perplexity response")
|
|
|
|
return content
|
|
|
|
else:
|
|
# Sanitize error before logging (remove potential API key)
|
|
error = await response.text()
|
|
error = re.sub(r'Bearer [^\s]+', 'Bearer ***', error)
|
|
logger.error(f"Perplexity API error (status {response.status}): {error[:200]}")
|
|
raise Exception(f"API request failed with status {response.status}")
|
|
|
|
except asyncio.TimeoutError:
|
|
logger.error("Perplexity API request timed out")
|
|
raise
|
|
except Exception as e:
|
|
# Sanitize error message
|
|
error_msg = str(e)
|
|
error_msg = re.sub(r'Bearer [^\s]+', 'Bearer ***', error_msg)
|
|
logger.error(f"Perplexity API error: {error_msg}")
|
|
raise
|
|
|
|
async def _rate_limit(self):
|
|
"""Implement proper rate limiting with tracking"""
|
|
now = datetime.now(timezone.utc)
|
|
|
|
# Reset counter every minute
|
|
if (now - self.rate_limit_reset).total_seconds() > 60:
|
|
self.request_count = 0
|
|
self.rate_limit_reset = now
|
|
|
|
# Check if we've hit the limit
|
|
if self.request_count >= self.rate_limit:
|
|
sleep_time = 60 - (now - self.rate_limit_reset).total_seconds()
|
|
if sleep_time > 0:
|
|
logger.info(f"Rate limit reached. Sleeping {sleep_time:.1f} seconds...")
|
|
await asyncio.sleep(sleep_time)
|
|
self.request_count = 0
|
|
self.rate_limit_reset = datetime.now(timezone.utc)
|
|
|
|
# Minimum time between requests
|
|
time_since_last = (now - self.last_request_time).total_seconds()
|
|
min_interval = 60 / self.rate_limit # seconds between requests
|
|
|
|
if time_since_last < min_interval:
|
|
await asyncio.sleep(min_interval - time_since_last)
|
|
|
|
self.last_request_time = datetime.now(timezone.utc)
|
|
self.request_count += 1
|
|
|
|
def _build_analysis_prompt(self,
|
|
ticker: str,
|
|
analysis_type: AnalysisType,
|
|
depth: ResearchDepth) -> str:
|
|
"""Build analysis prompt based on type and depth"""
|
|
|
|
base_prompt = f"Analyze {ticker} stock with focus on {analysis_type.value} analysis.\n\n"
|
|
|
|
if analysis_type == AnalysisType.FUNDAMENTAL:
|
|
base_prompt += """
|
|
Include:
|
|
1. Current valuation metrics (P/E, PEG, P/B, EV/EBITDA)
|
|
2. Profitability metrics (ROE, ROA, profit margins)
|
|
3. Growth metrics (revenue, earnings, FCF growth)
|
|
4. Balance sheet strength
|
|
5. Competitive position
|
|
6. Fair value estimate
|
|
7. Investment recommendation
|
|
"""
|
|
elif analysis_type == AnalysisType.TECHNICAL:
|
|
base_prompt += """
|
|
Include:
|
|
1. Current price action and trend
|
|
2. Support and resistance levels
|
|
3. Moving averages
|
|
4. RSI, MACD indicators
|
|
5. Volume analysis
|
|
6. Chart patterns
|
|
7. Short-term outlook
|
|
"""
|
|
elif analysis_type == AnalysisType.VALUATION:
|
|
base_prompt += """
|
|
Perform valuation:
|
|
1. DCF analysis
|
|
2. Comparable company analysis
|
|
3. Sensitivity analysis
|
|
4. Fair value range
|
|
5. Investment recommendation
|
|
"""
|
|
|
|
if depth == ResearchDepth.DEEP:
|
|
base_prompt += "\nProvide extensive detail with specific numbers."
|
|
elif depth == ResearchDepth.EXPERT:
|
|
base_prompt += "\nProvide institutional-quality analysis."
|
|
|
|
return base_prompt
|
|
|
|
def _parse_analysis_locally(self,
|
|
ticker: str,
|
|
raw_analysis: str,
|
|
analysis_type: AnalysisType) -> StockAnalysis:
|
|
"""Parse raw analysis text locally without additional API call"""
|
|
|
|
# Extract metrics using regex patterns
|
|
def extract_number(pattern: str, text: str, default: float = 0) -> float:
|
|
match = re.search(pattern, text, re.IGNORECASE)
|
|
if match:
|
|
try:
|
|
return float(match.group(1).replace(',', '').replace('$', ''))
|
|
except:
|
|
pass
|
|
return default
|
|
|
|
current_price = extract_number(r'current.*?price.*?\$?([\d,.]+)', raw_analysis)
|
|
fair_value = extract_number(r'fair.*?value.*?\$?([\d,.]+)', raw_analysis)
|
|
pe_ratio = extract_number(r'p/e.*?ratio.*?([\d,.]+)', raw_analysis)
|
|
|
|
# Calculate upside if we have both prices
|
|
upside_potential = None
|
|
if current_price > 0 and fair_value > 0:
|
|
upside_potential = ((fair_value - current_price) / current_price) * 100
|
|
|
|
# Extract rating
|
|
rating = "Hold"
|
|
if re.search(r'\b(strong\s+)?buy\b', raw_analysis, re.IGNORECASE):
|
|
rating = "Buy"
|
|
elif re.search(r'\b(strong\s+)?sell\b', raw_analysis, re.IGNORECASE):
|
|
rating = "Sell"
|
|
|
|
# Extract risks and catalysts
|
|
risks = []
|
|
risk_section = re.search(r'risk[s]?:?(.*?)(?:catalyst|opportunit|\n\n)',
|
|
raw_analysis, re.IGNORECASE | re.DOTALL)
|
|
if risk_section:
|
|
risks = [r.strip() for r in risk_section.group(1).split('\n')
|
|
if r.strip() and len(r.strip()) > 10][:5]
|
|
|
|
# Build analysis object
|
|
return StockAnalysis(
|
|
ticker=ticker,
|
|
timestamp=datetime.now(timezone.utc),
|
|
analysis_type=analysis_type,
|
|
current_price=current_price,
|
|
fair_value=fair_value if fair_value > 0 else None,
|
|
upside_potential=upside_potential,
|
|
pe_ratio=pe_ratio if pe_ratio > 0 else None,
|
|
peg_ratio=None,
|
|
price_to_book=None,
|
|
debt_to_equity=None,
|
|
roe=None,
|
|
revenue_growth=None,
|
|
earnings_growth=None,
|
|
bull_case=raw_analysis[:500],
|
|
bear_case="See full analysis",
|
|
key_risks=risks if risks else ["See full analysis"],
|
|
catalysts=[],
|
|
rating=rating,
|
|
confidence_score=70, # Default moderate confidence
|
|
time_horizon="medium",
|
|
detailed_analysis=raw_analysis,
|
|
data_sources=["Perplexity AI", "Real-time market data"]
|
|
)
|
|
|
|
def _parse_screening_locally(self, query: str, raw_response: str) -> MarketScreenerResult:
|
|
"""Parse screening response locally"""
|
|
|
|
# Extract stock symbols using regex
|
|
ticker_pattern = r'\b([A-Z]{1,5})\b(?:\s*[\:\-\|]|\s+at\s+\$)'
|
|
tickers = re.findall(ticker_pattern, raw_response)
|
|
|
|
# Remove common words that look like tickers
|
|
exclude = {'THE', 'AND', 'FOR', 'NYSE', 'NASDAQ', 'IPO', 'CEO', 'CFO', 'Q1', 'Q2', 'Q3', 'Q4'}
|
|
tickers = [t for t in tickers if t not in exclude][:20]
|
|
|
|
# Build basic stock info
|
|
stocks = []
|
|
for ticker in tickers[:10]: # Limit to 10
|
|
# Try to find price near ticker mention
|
|
price_pattern = rf'{ticker}.*?\$?([\d,.]+)'
|
|
price_match = re.search(price_pattern, raw_response)
|
|
price = float(price_match.group(1).replace(',', '')) if price_match else 0
|
|
|
|
stocks.append({
|
|
'ticker': ticker,
|
|
'company_name': '',
|
|
'price': price,
|
|
'pe_ratio': None,
|
|
'market_cap': None
|
|
})
|
|
|
|
return MarketScreenerResult(
|
|
query=query,
|
|
timestamp=datetime.now(timezone.utc),
|
|
total_results=len(stocks),
|
|
stocks=stocks,
|
|
screening_criteria={},
|
|
market_context=raw_response[:200],
|
|
best_value=[s['ticker'] for s in stocks[:3]],
|
|
highest_growth=[],
|
|
lowest_risk=[],
|
|
detailed_explanation=raw_response
|
|
)
|
|
|
|
# Additional helper methods remain the same but with proper error handling
|
|
async def get_market_sentiment(self, sector: Optional[str] = None) -> Dict[str, Any]:
|
|
"""Get current market sentiment with error handling"""
|
|
try:
|
|
prompt = f"""
|
|
Analyze current market sentiment {f'for {sector} sector' if sector else 'overall'}:
|
|
1. Bull vs Bear sentiment
|
|
2. Key concerns
|
|
3. Opportunities
|
|
4. Technical levels
|
|
"""
|
|
|
|
response = await self._query_perplexity(prompt, context="market_sentiment")
|
|
|
|
return {
|
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
"sector": sector or "market",
|
|
"analysis": response,
|
|
"data_freshness": "real-time"
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Market sentiment analysis failed: {e}")
|
|
return {
|
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
"sector": sector or "market",
|
|
"analysis": "Analysis unavailable",
|
|
"error": str(e)
|
|
} |