1162 lines
39 KiB
Python
1162 lines
39 KiB
Python
"""SQLite database module for storing stock recommendations."""
|
|
import sqlite3
|
|
import json
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import Optional
|
|
|
|
DB_PATH = Path(__file__).parent / "recommendations.db"
|
|
|
|
|
|
def get_connection():
|
|
"""Get SQLite database connection."""
|
|
conn = sqlite3.connect(DB_PATH)
|
|
conn.row_factory = sqlite3.Row
|
|
return conn
|
|
|
|
|
|
def init_db():
|
|
"""Initialize the database with required tables."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
# Create recommendations table
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS daily_recommendations (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT UNIQUE NOT NULL,
|
|
summary_total INTEGER,
|
|
summary_buy INTEGER,
|
|
summary_sell INTEGER,
|
|
summary_hold INTEGER,
|
|
top_picks TEXT,
|
|
stocks_to_avoid TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
""")
|
|
|
|
# Create stock analysis table
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS stock_analysis (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT NOT NULL,
|
|
symbol TEXT NOT NULL,
|
|
company_name TEXT,
|
|
decision TEXT,
|
|
confidence TEXT,
|
|
risk TEXT,
|
|
raw_analysis TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(date, symbol)
|
|
)
|
|
""")
|
|
|
|
# Create index for faster queries
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_stock_analysis_date ON stock_analysis(date)
|
|
""")
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_stock_analysis_symbol ON stock_analysis(symbol)
|
|
""")
|
|
|
|
# Create agent_reports table (stores each analyst's detailed report)
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS agent_reports (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT NOT NULL,
|
|
symbol TEXT NOT NULL,
|
|
agent_type TEXT NOT NULL,
|
|
report_content TEXT,
|
|
data_sources_used TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(date, symbol, agent_type)
|
|
)
|
|
""")
|
|
|
|
# Create debate_history table (stores investment and risk debates)
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS debate_history (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT NOT NULL,
|
|
symbol TEXT NOT NULL,
|
|
debate_type TEXT NOT NULL,
|
|
bull_arguments TEXT,
|
|
bear_arguments TEXT,
|
|
risky_arguments TEXT,
|
|
safe_arguments TEXT,
|
|
neutral_arguments TEXT,
|
|
judge_decision TEXT,
|
|
full_history TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(date, symbol, debate_type)
|
|
)
|
|
""")
|
|
|
|
# Create pipeline_steps table (stores step-by-step execution log)
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS pipeline_steps (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT NOT NULL,
|
|
symbol TEXT NOT NULL,
|
|
step_number INTEGER,
|
|
step_name TEXT,
|
|
status TEXT,
|
|
started_at TEXT,
|
|
completed_at TEXT,
|
|
duration_ms INTEGER,
|
|
output_summary TEXT,
|
|
step_details TEXT,
|
|
UNIQUE(date, symbol, step_number)
|
|
)
|
|
""")
|
|
|
|
# Add step_details column if it doesn't exist (migration for existing DBs)
|
|
try:
|
|
cursor.execute("ALTER TABLE pipeline_steps ADD COLUMN step_details TEXT")
|
|
except sqlite3.OperationalError:
|
|
pass # Column already exists
|
|
|
|
# Create data_source_logs table (stores what raw data was fetched)
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS data_source_logs (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT NOT NULL,
|
|
symbol TEXT NOT NULL,
|
|
source_type TEXT,
|
|
source_name TEXT,
|
|
method TEXT,
|
|
args TEXT,
|
|
data_fetched TEXT,
|
|
fetch_timestamp TEXT,
|
|
success INTEGER DEFAULT 1,
|
|
error_message TEXT
|
|
)
|
|
""")
|
|
|
|
# Migrate: add method/args columns if missing (existing databases)
|
|
try:
|
|
cursor.execute("ALTER TABLE data_source_logs ADD COLUMN method TEXT")
|
|
except Exception:
|
|
pass # Column already exists
|
|
try:
|
|
cursor.execute("ALTER TABLE data_source_logs ADD COLUMN args TEXT")
|
|
except Exception:
|
|
pass # Column already exists
|
|
|
|
# Create backtest_results table (stores calculated backtest accuracy)
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS backtest_results (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT NOT NULL,
|
|
symbol TEXT NOT NULL,
|
|
decision TEXT,
|
|
price_at_prediction REAL,
|
|
price_1d_later REAL,
|
|
price_1w_later REAL,
|
|
price_1m_later REAL,
|
|
return_1d REAL,
|
|
return_1w REAL,
|
|
return_1m REAL,
|
|
prediction_correct INTEGER,
|
|
calculated_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(date, symbol)
|
|
)
|
|
""")
|
|
|
|
# Add hold_days column if it doesn't exist (migration for existing DBs)
|
|
try:
|
|
cursor.execute("ALTER TABLE stock_analysis ADD COLUMN hold_days INTEGER")
|
|
except sqlite3.OperationalError:
|
|
pass # Column already exists
|
|
try:
|
|
cursor.execute("ALTER TABLE backtest_results ADD COLUMN hold_days INTEGER")
|
|
except sqlite3.OperationalError:
|
|
pass # Column already exists
|
|
|
|
# Create indexes for new tables
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_agent_reports_date_symbol ON agent_reports(date, symbol)
|
|
""")
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_debate_history_date_symbol ON debate_history(date, symbol)
|
|
""")
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_pipeline_steps_date_symbol ON pipeline_steps(date, symbol)
|
|
""")
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_data_source_logs_date_symbol ON data_source_logs(date, symbol)
|
|
""")
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_backtest_results_date ON backtest_results(date)
|
|
""")
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
|
|
def save_recommendation(date: str, analysis_data: dict, summary: dict,
|
|
top_picks: list, stocks_to_avoid: list):
|
|
"""Save a daily recommendation to the database."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
# Insert or replace daily recommendation
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO daily_recommendations
|
|
(date, summary_total, summary_buy, summary_sell, summary_hold, top_picks, stocks_to_avoid)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date,
|
|
summary.get('total', 0),
|
|
summary.get('buy', 0),
|
|
summary.get('sell', 0),
|
|
summary.get('hold', 0),
|
|
json.dumps(top_picks),
|
|
json.dumps(stocks_to_avoid)
|
|
))
|
|
|
|
# Insert stock analysis for each stock
|
|
for symbol, analysis in analysis_data.items():
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO stock_analysis
|
|
(date, symbol, company_name, decision, confidence, risk, raw_analysis, hold_days)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date,
|
|
symbol,
|
|
analysis.get('company_name', ''),
|
|
analysis.get('decision'),
|
|
analysis.get('confidence'),
|
|
analysis.get('risk'),
|
|
analysis.get('raw_analysis', ''),
|
|
analysis.get('hold_days')
|
|
))
|
|
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def save_single_stock_analysis(date: str, symbol: str, analysis: dict):
|
|
"""Save analysis for a single stock.
|
|
|
|
Args:
|
|
date: Date string (YYYY-MM-DD)
|
|
symbol: Stock symbol
|
|
analysis: Dict with keys: company_name, decision, confidence, risk, raw_analysis, hold_days
|
|
"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO stock_analysis
|
|
(date, symbol, company_name, decision, confidence, risk, raw_analysis, hold_days)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date,
|
|
symbol,
|
|
analysis.get('company_name', symbol),
|
|
analysis.get('decision', 'HOLD'),
|
|
analysis.get('confidence', 'MEDIUM'),
|
|
analysis.get('risk', 'MEDIUM'),
|
|
analysis.get('raw_analysis', ''),
|
|
analysis.get('hold_days')
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_analyzed_symbols_for_date(date: str) -> list:
|
|
"""Get list of symbols that already have analysis for a given date.
|
|
|
|
Used by bulk analysis to skip already-completed stocks when resuming.
|
|
"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("SELECT symbol FROM stock_analysis WHERE date = ?", (date,))
|
|
return [row['symbol'] for row in cursor.fetchall()]
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_recommendation_by_date(date: str) -> Optional[dict]:
|
|
"""Get recommendation for a specific date."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
# Get daily summary
|
|
cursor.execute("""
|
|
SELECT * FROM daily_recommendations WHERE date = ?
|
|
""", (date,))
|
|
row = cursor.fetchone()
|
|
|
|
# Get stock analysis for this date
|
|
cursor.execute("""
|
|
SELECT * FROM stock_analysis WHERE date = ?
|
|
""", (date,))
|
|
analysis_rows = cursor.fetchall()
|
|
|
|
# If no daily_recommendations AND no stock_analysis, return None
|
|
if not row and not analysis_rows:
|
|
return None
|
|
|
|
analysis = {}
|
|
for a in analysis_rows:
|
|
decision = (a['decision'] or '').strip().upper()
|
|
if decision not in ('BUY', 'SELL', 'HOLD'):
|
|
decision = 'HOLD'
|
|
analysis[a['symbol']] = {
|
|
'symbol': a['symbol'],
|
|
'company_name': a['company_name'],
|
|
'decision': decision,
|
|
'confidence': a['confidence'] or 'MEDIUM',
|
|
'risk': a['risk'] or 'MEDIUM',
|
|
'raw_analysis': a['raw_analysis'],
|
|
'hold_days': a['hold_days'] if 'hold_days' in a.keys() else None
|
|
}
|
|
|
|
if row:
|
|
return {
|
|
'date': row['date'],
|
|
'analysis': analysis,
|
|
'summary': {
|
|
'total': row['summary_total'],
|
|
'buy': row['summary_buy'],
|
|
'sell': row['summary_sell'],
|
|
'hold': row['summary_hold']
|
|
},
|
|
'top_picks': json.loads(row['top_picks']) if row['top_picks'] else [],
|
|
'stocks_to_avoid': json.loads(row['stocks_to_avoid']) if row['stocks_to_avoid'] else []
|
|
}
|
|
|
|
# Fallback: build summary from stock_analysis when daily_recommendations is missing
|
|
buy_count = sum(1 for a in analysis.values() if a['decision'] == 'BUY')
|
|
sell_count = sum(1 for a in analysis.values() if a['decision'] == 'SELL')
|
|
hold_count = sum(1 for a in analysis.values() if a['decision'] == 'HOLD')
|
|
return {
|
|
'date': date,
|
|
'analysis': analysis,
|
|
'summary': {
|
|
'total': len(analysis),
|
|
'buy': buy_count,
|
|
'sell': sell_count,
|
|
'hold': hold_count
|
|
},
|
|
'top_picks': [],
|
|
'stocks_to_avoid': []
|
|
}
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_latest_recommendation() -> Optional[dict]:
|
|
"""Get the most recent recommendation."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT date FROM daily_recommendations ORDER BY date DESC LIMIT 1
|
|
""")
|
|
row = cursor.fetchone()
|
|
|
|
if not row:
|
|
return None
|
|
|
|
return get_recommendation_by_date(row['date'])
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_all_dates() -> list:
|
|
"""Get all available dates (union of daily_recommendations and stock_analysis)."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT DISTINCT date FROM (
|
|
SELECT date FROM daily_recommendations
|
|
UNION
|
|
SELECT date FROM stock_analysis
|
|
) ORDER BY date DESC
|
|
""")
|
|
return [row['date'] for row in cursor.fetchall()]
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_stock_history(symbol: str) -> list:
|
|
"""Get historical recommendations for a specific stock."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT date, decision, confidence, risk, hold_days
|
|
FROM stock_analysis
|
|
WHERE symbol = ?
|
|
ORDER BY date DESC
|
|
""", (symbol,))
|
|
|
|
results = []
|
|
for row in cursor.fetchall():
|
|
decision = (row['decision'] or '').strip().upper()
|
|
# Sanitize: only allow BUY/SELL/HOLD
|
|
if decision not in ('BUY', 'SELL', 'HOLD'):
|
|
decision = 'HOLD'
|
|
results.append({
|
|
'date': row['date'],
|
|
'decision': decision,
|
|
'confidence': row['confidence'] or 'MEDIUM',
|
|
'risk': row['risk'] or 'MEDIUM',
|
|
'hold_days': row['hold_days'] if 'hold_days' in row.keys() else None
|
|
})
|
|
return results
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_all_recommendations() -> list:
|
|
"""Get all daily recommendations."""
|
|
dates = get_all_dates()
|
|
return [get_recommendation_by_date(date) for date in dates]
|
|
|
|
|
|
# ============== Pipeline Data Functions ==============
|
|
|
|
def save_agent_report(date: str, symbol: str, agent_type: str,
|
|
report_content: str, data_sources_used: list = None):
|
|
"""Save an individual agent's report."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO agent_reports
|
|
(date, symbol, agent_type, report_content, data_sources_used)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
""", (
|
|
date, symbol, agent_type, report_content,
|
|
json.dumps(data_sources_used) if data_sources_used else '[]'
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def save_agent_reports_bulk(date: str, symbol: str, reports: dict):
|
|
"""Save all agent reports for a stock at once.
|
|
|
|
Args:
|
|
date: Date string (YYYY-MM-DD)
|
|
symbol: Stock symbol
|
|
reports: Dict with keys 'market', 'news', 'social_media', 'fundamentals'
|
|
"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
for agent_type, report_data in reports.items():
|
|
if isinstance(report_data, str):
|
|
report_content = report_data
|
|
data_sources = []
|
|
else:
|
|
report_content = report_data.get('content', '')
|
|
data_sources = report_data.get('data_sources', [])
|
|
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO agent_reports
|
|
(date, symbol, agent_type, report_content, data_sources_used)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
""", (date, symbol, agent_type, report_content, json.dumps(data_sources)))
|
|
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_agent_reports(date: str, symbol: str) -> dict:
|
|
"""Get all agent reports for a stock on a date."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT agent_type, report_content, data_sources_used, created_at
|
|
FROM agent_reports
|
|
WHERE date = ? AND symbol = ?
|
|
""", (date, symbol))
|
|
|
|
reports = {}
|
|
for row in cursor.fetchall():
|
|
reports[row['agent_type']] = {
|
|
'agent_type': row['agent_type'],
|
|
'report_content': row['report_content'],
|
|
'data_sources_used': json.loads(row['data_sources_used']) if row['data_sources_used'] else [],
|
|
'created_at': row['created_at']
|
|
}
|
|
return reports
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def save_debate_history(date: str, symbol: str, debate_type: str,
|
|
bull_arguments: str = None, bear_arguments: str = None,
|
|
risky_arguments: str = None, safe_arguments: str = None,
|
|
neutral_arguments: str = None, judge_decision: str = None,
|
|
full_history: str = None):
|
|
"""Save debate history for investment or risk debate."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO debate_history
|
|
(date, symbol, debate_type, bull_arguments, bear_arguments,
|
|
risky_arguments, safe_arguments, neutral_arguments,
|
|
judge_decision, full_history)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date, symbol, debate_type,
|
|
bull_arguments, bear_arguments,
|
|
risky_arguments, safe_arguments, neutral_arguments,
|
|
judge_decision, full_history
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_debate_history(date: str, symbol: str) -> dict:
|
|
"""Get all debate history for a stock on a date."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT * FROM debate_history
|
|
WHERE date = ? AND symbol = ?
|
|
""", (date, symbol))
|
|
|
|
debates = {}
|
|
for row in cursor.fetchall():
|
|
debates[row['debate_type']] = {
|
|
'debate_type': row['debate_type'],
|
|
'bull_arguments': row['bull_arguments'],
|
|
'bear_arguments': row['bear_arguments'],
|
|
'risky_arguments': row['risky_arguments'],
|
|
'safe_arguments': row['safe_arguments'],
|
|
'neutral_arguments': row['neutral_arguments'],
|
|
'judge_decision': row['judge_decision'],
|
|
'full_history': row['full_history'],
|
|
'created_at': row['created_at']
|
|
}
|
|
return debates
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def save_pipeline_step(date: str, symbol: str, step_number: int, step_name: str,
|
|
status: str, started_at: str = None, completed_at: str = None,
|
|
duration_ms: int = None, output_summary: str = None):
|
|
"""Save a pipeline step status."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO pipeline_steps
|
|
(date, symbol, step_number, step_name, status,
|
|
started_at, completed_at, duration_ms, output_summary)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date, symbol, step_number, step_name, status,
|
|
started_at, completed_at, duration_ms, output_summary
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def save_pipeline_steps_bulk(date: str, symbol: str, steps: list):
|
|
"""Save all pipeline steps at once.
|
|
|
|
Args:
|
|
date: Date string
|
|
symbol: Stock symbol
|
|
steps: List of step dicts with step_number, step_name, status, etc.
|
|
"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
for step in steps:
|
|
step_details = step.get('step_details')
|
|
if step_details and not isinstance(step_details, str):
|
|
step_details = json.dumps(step_details)
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO pipeline_steps
|
|
(date, symbol, step_number, step_name, status,
|
|
started_at, completed_at, duration_ms, output_summary, step_details)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date, symbol,
|
|
step.get('step_number'),
|
|
step.get('step_name'),
|
|
step.get('status'),
|
|
step.get('started_at'),
|
|
step.get('completed_at'),
|
|
step.get('duration_ms'),
|
|
step.get('output_summary'),
|
|
step_details
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_pipeline_steps(date: str, symbol: str) -> list:
|
|
"""Get all pipeline steps for a stock on a date."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT * FROM pipeline_steps
|
|
WHERE date = ? AND symbol = ?
|
|
ORDER BY step_number
|
|
""", (date, symbol))
|
|
|
|
results = []
|
|
for row in cursor.fetchall():
|
|
step_details = None
|
|
raw_details = row['step_details'] if 'step_details' in row.keys() else None
|
|
if raw_details:
|
|
try:
|
|
step_details = json.loads(raw_details)
|
|
except (json.JSONDecodeError, TypeError):
|
|
step_details = None
|
|
results.append({
|
|
'step_number': row['step_number'],
|
|
'step_name': row['step_name'],
|
|
'status': row['status'],
|
|
'started_at': row['started_at'],
|
|
'completed_at': row['completed_at'],
|
|
'duration_ms': row['duration_ms'],
|
|
'output_summary': row['output_summary'],
|
|
'step_details': step_details,
|
|
})
|
|
return results
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def save_data_source_log(date: str, symbol: str, source_type: str,
|
|
source_name: str, data_fetched: dict = None,
|
|
fetch_timestamp: str = None, success: bool = True,
|
|
error_message: str = None):
|
|
"""Log a data source fetch."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
INSERT INTO data_source_logs
|
|
(date, symbol, source_type, source_name, data_fetched,
|
|
fetch_timestamp, success, error_message)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date, symbol, source_type, source_name,
|
|
json.dumps(data_fetched) if data_fetched else None,
|
|
fetch_timestamp or datetime.now().isoformat(),
|
|
1 if success else 0,
|
|
error_message
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def save_data_source_logs_bulk(date: str, symbol: str, logs: list):
|
|
"""Save multiple data source logs at once."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
for log in logs:
|
|
cursor.execute("""
|
|
INSERT INTO data_source_logs
|
|
(date, symbol, source_type, source_name, method, args, data_fetched,
|
|
fetch_timestamp, success, error_message)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date, symbol,
|
|
log.get('source_type'),
|
|
log.get('source_name'),
|
|
log.get('method'),
|
|
log.get('args'),
|
|
json.dumps(log.get('data_fetched')) if log.get('data_fetched') else None,
|
|
log.get('fetch_timestamp') or datetime.now().isoformat(),
|
|
1 if log.get('success', True) else 0,
|
|
log.get('error_message')
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_data_source_logs(date: str, symbol: str) -> list:
|
|
"""Get all data source logs for a stock on a date.
|
|
Falls back to generating entries from agent_reports if no explicit logs exist."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT * FROM data_source_logs
|
|
WHERE date = ? AND symbol = ?
|
|
ORDER BY fetch_timestamp
|
|
""", (date, symbol))
|
|
|
|
logs = [
|
|
{
|
|
'source_type': row['source_type'],
|
|
'source_name': row['source_name'],
|
|
'method': row['method'] if 'method' in row.keys() else None,
|
|
'args': row['args'] if 'args' in row.keys() else None,
|
|
'data_fetched': json.loads(row['data_fetched']) if row['data_fetched'] else None,
|
|
'fetch_timestamp': row['fetch_timestamp'],
|
|
'success': bool(row['success']),
|
|
'error_message': row['error_message']
|
|
}
|
|
for row in cursor.fetchall()
|
|
]
|
|
|
|
if logs:
|
|
return logs
|
|
|
|
# No explicit logs — generate from agent_reports with full raw content
|
|
AGENT_TO_SOURCE = {
|
|
'market': ('market_data', 'Yahoo Finance'),
|
|
'news': ('news', 'Google News'),
|
|
'social_media': ('social_media', 'Social Sentiment'),
|
|
'fundamentals': ('fundamentals', 'Financial Data'),
|
|
}
|
|
|
|
cursor.execute("""
|
|
SELECT agent_type, report_content, created_at
|
|
FROM agent_reports
|
|
WHERE date = ? AND symbol = ?
|
|
""", (date, symbol))
|
|
|
|
generated = []
|
|
for row in cursor.fetchall():
|
|
source_type, source_name = AGENT_TO_SOURCE.get(
|
|
row['agent_type'], ('other', row['agent_type'])
|
|
)
|
|
generated.append({
|
|
'source_type': source_type,
|
|
'source_name': source_name,
|
|
'data_fetched': row['report_content'],
|
|
'fetch_timestamp': row['created_at'],
|
|
'success': True,
|
|
'error_message': None
|
|
})
|
|
|
|
return generated
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_full_pipeline_data(date: str, symbol: str) -> dict:
|
|
"""Get complete pipeline data for a stock on a date."""
|
|
return {
|
|
'date': date,
|
|
'symbol': symbol,
|
|
'agent_reports': get_agent_reports(date, symbol),
|
|
'debates': get_debate_history(date, symbol),
|
|
'pipeline_steps': get_pipeline_steps(date, symbol),
|
|
'data_sources': get_data_source_logs(date, symbol)
|
|
}
|
|
|
|
|
|
def save_full_pipeline_data(date: str, symbol: str, pipeline_data: dict):
|
|
"""Save complete pipeline data for a stock.
|
|
|
|
Args:
|
|
date: Date string
|
|
symbol: Stock symbol
|
|
pipeline_data: Dict containing agent_reports, debates, pipeline_steps, data_sources
|
|
"""
|
|
if 'agent_reports' in pipeline_data:
|
|
save_agent_reports_bulk(date, symbol, pipeline_data['agent_reports'])
|
|
|
|
if 'investment_debate' in pipeline_data:
|
|
debate = pipeline_data['investment_debate']
|
|
save_debate_history(
|
|
date, symbol, 'investment',
|
|
bull_arguments=debate.get('bull_history'),
|
|
bear_arguments=debate.get('bear_history'),
|
|
judge_decision=debate.get('judge_decision'),
|
|
full_history=debate.get('history')
|
|
)
|
|
|
|
if 'risk_debate' in pipeline_data:
|
|
debate = pipeline_data['risk_debate']
|
|
save_debate_history(
|
|
date, symbol, 'risk',
|
|
risky_arguments=debate.get('risky_history'),
|
|
safe_arguments=debate.get('safe_history'),
|
|
neutral_arguments=debate.get('neutral_history'),
|
|
judge_decision=debate.get('judge_decision'),
|
|
full_history=debate.get('history')
|
|
)
|
|
|
|
if 'pipeline_steps' in pipeline_data:
|
|
save_pipeline_steps_bulk(date, symbol, pipeline_data['pipeline_steps'])
|
|
|
|
if 'data_sources' in pipeline_data:
|
|
save_data_source_logs_bulk(date, symbol, pipeline_data['data_sources'])
|
|
|
|
|
|
def get_pipeline_summary_for_date(date: str) -> list:
|
|
"""Get pipeline summary for all stocks on a date."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
# Get all symbols for this date
|
|
cursor.execute("""
|
|
SELECT DISTINCT symbol FROM stock_analysis WHERE date = ?
|
|
""", (date,))
|
|
symbols = [row['symbol'] for row in cursor.fetchall()]
|
|
|
|
# Batch fetch all pipeline steps for the date (avoids N+1)
|
|
cursor.execute("""
|
|
SELECT symbol, step_name, status FROM pipeline_steps
|
|
WHERE date = ?
|
|
ORDER BY symbol, step_number
|
|
""", (date,))
|
|
all_steps = cursor.fetchall()
|
|
steps_by_symbol = {}
|
|
for row in all_steps:
|
|
if row['symbol'] not in steps_by_symbol:
|
|
steps_by_symbol[row['symbol']] = []
|
|
steps_by_symbol[row['symbol']].append({'step_name': row['step_name'], 'status': row['status']})
|
|
|
|
# Batch fetch agent report counts (avoids N+1)
|
|
cursor.execute("""
|
|
SELECT symbol, COUNT(*) as count FROM agent_reports
|
|
WHERE date = ?
|
|
GROUP BY symbol
|
|
""", (date,))
|
|
agent_counts = {row['symbol']: row['count'] for row in cursor.fetchall()}
|
|
|
|
# Batch fetch debates existence (avoids N+1)
|
|
cursor.execute("""
|
|
SELECT DISTINCT symbol FROM debate_history WHERE date = ?
|
|
""", (date,))
|
|
symbols_with_debates = {row['symbol'] for row in cursor.fetchall()}
|
|
|
|
summaries = []
|
|
for symbol in symbols:
|
|
summaries.append({
|
|
'symbol': symbol,
|
|
'pipeline_steps': steps_by_symbol.get(symbol, []),
|
|
'agent_reports_count': agent_counts.get(symbol, 0),
|
|
'has_debates': symbol in symbols_with_debates
|
|
})
|
|
|
|
return summaries
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def save_backtest_result(date: str, symbol: str, decision: str,
|
|
price_at_prediction: float, price_1d_later: float = None,
|
|
price_1w_later: float = None, price_1m_later: float = None,
|
|
return_1d: float = None, return_1w: float = None,
|
|
return_1m: float = None, prediction_correct: bool = None,
|
|
hold_days: int = None):
|
|
"""Save a backtest result for a stock recommendation."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO backtest_results
|
|
(date, symbol, decision, price_at_prediction,
|
|
price_1d_later, price_1w_later, price_1m_later,
|
|
return_1d, return_1w, return_1m, prediction_correct, hold_days)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date, symbol, decision, price_at_prediction,
|
|
price_1d_later, price_1w_later, price_1m_later,
|
|
return_1d, return_1w, return_1m,
|
|
1 if prediction_correct else 0 if prediction_correct is not None else None,
|
|
hold_days
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_backtest_result(date: str, symbol: str) -> Optional[dict]:
|
|
"""Get backtest result for a specific stock and date."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT * FROM backtest_results WHERE date = ? AND symbol = ?
|
|
""", (date, symbol))
|
|
row = cursor.fetchone()
|
|
|
|
if row:
|
|
return {
|
|
'date': row['date'],
|
|
'symbol': row['symbol'],
|
|
'decision': row['decision'],
|
|
'price_at_prediction': row['price_at_prediction'],
|
|
'price_1d_later': row['price_1d_later'],
|
|
'price_1w_later': row['price_1w_later'],
|
|
'price_1m_later': row['price_1m_later'],
|
|
'return_1d': row['return_1d'],
|
|
'return_1w': row['return_1w'],
|
|
'return_1m': row['return_1m'],
|
|
'prediction_correct': bool(row['prediction_correct']) if row['prediction_correct'] is not None else None,
|
|
'hold_days': row['hold_days'] if 'hold_days' in row.keys() else None,
|
|
'calculated_at': row['calculated_at']
|
|
}
|
|
return None
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_backtest_results_by_date(date: str) -> list:
|
|
"""Get all backtest results for a specific date."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT * FROM backtest_results WHERE date = ?
|
|
""", (date,))
|
|
|
|
return [
|
|
{
|
|
'symbol': row['symbol'],
|
|
'decision': row['decision'],
|
|
'price_at_prediction': row['price_at_prediction'],
|
|
'price_1d_later': row['price_1d_later'],
|
|
'price_1w_later': row['price_1w_later'],
|
|
'price_1m_later': row['price_1m_later'],
|
|
'return_1d': row['return_1d'],
|
|
'return_1w': row['return_1w'],
|
|
'return_1m': row['return_1m'],
|
|
'prediction_correct': bool(row['prediction_correct']) if row['prediction_correct'] is not None else None,
|
|
'hold_days': row['hold_days'] if 'hold_days' in row.keys() else None
|
|
}
|
|
for row in cursor.fetchall()
|
|
]
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_all_backtest_results() -> list:
|
|
"""Get all backtest results for accuracy calculation."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT br.*, sa.confidence, sa.risk
|
|
FROM backtest_results br
|
|
LEFT JOIN stock_analysis sa ON br.date = sa.date AND br.symbol = sa.symbol
|
|
WHERE br.prediction_correct IS NOT NULL
|
|
ORDER BY br.date DESC
|
|
""")
|
|
|
|
return [
|
|
{
|
|
'date': row['date'],
|
|
'symbol': row['symbol'],
|
|
'decision': row['decision'],
|
|
'confidence': row['confidence'],
|
|
'risk': row['risk'],
|
|
'price_at_prediction': row['price_at_prediction'],
|
|
'return_1d': row['return_1d'],
|
|
'return_1w': row['return_1w'],
|
|
'return_1m': row['return_1m'],
|
|
'prediction_correct': bool(row['prediction_correct'])
|
|
}
|
|
for row in cursor.fetchall()
|
|
]
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def calculate_accuracy_metrics() -> dict:
|
|
"""Calculate overall backtest accuracy metrics."""
|
|
results = get_all_backtest_results()
|
|
|
|
if not results:
|
|
return {
|
|
'overall_accuracy': 0,
|
|
'total_predictions': 0,
|
|
'correct_predictions': 0,
|
|
'by_decision': {'BUY': {'accuracy': 0, 'total': 0}, 'SELL': {'accuracy': 0, 'total': 0}, 'HOLD': {'accuracy': 0, 'total': 0}},
|
|
'by_confidence': {'High': {'accuracy': 0, 'total': 0}, 'Medium': {'accuracy': 0, 'total': 0}, 'Low': {'accuracy': 0, 'total': 0}}
|
|
}
|
|
|
|
total = len(results)
|
|
correct = sum(1 for r in results if r['prediction_correct'])
|
|
|
|
# By decision type
|
|
by_decision = {}
|
|
for decision in ['BUY', 'SELL', 'HOLD']:
|
|
decision_results = [r for r in results if r['decision'] == decision]
|
|
if decision_results:
|
|
decision_correct = sum(1 for r in decision_results if r['prediction_correct'])
|
|
by_decision[decision] = {
|
|
'accuracy': round(decision_correct / len(decision_results) * 100, 1),
|
|
'total': len(decision_results),
|
|
'correct': decision_correct
|
|
}
|
|
else:
|
|
by_decision[decision] = {'accuracy': 0, 'total': 0, 'correct': 0}
|
|
|
|
# By confidence level
|
|
by_confidence = {}
|
|
for conf in ['High', 'Medium', 'Low']:
|
|
conf_results = [r for r in results if r.get('confidence') == conf]
|
|
if conf_results:
|
|
conf_correct = sum(1 for r in conf_results if r['prediction_correct'])
|
|
by_confidence[conf] = {
|
|
'accuracy': round(conf_correct / len(conf_results) * 100, 1),
|
|
'total': len(conf_results),
|
|
'correct': conf_correct
|
|
}
|
|
else:
|
|
by_confidence[conf] = {'accuracy': 0, 'total': 0, 'correct': 0}
|
|
|
|
return {
|
|
'overall_accuracy': round(correct / total * 100, 1) if total > 0 else 0,
|
|
'total_predictions': total,
|
|
'correct_predictions': correct,
|
|
'by_decision': by_decision,
|
|
'by_confidence': by_confidence
|
|
}
|
|
|
|
|
|
def update_daily_recommendation_summary(date: str):
|
|
"""Auto-create/update daily_recommendations from stock_analysis for a date.
|
|
|
|
Counts BUY/SELL/HOLD decisions, generates top_picks and stocks_to_avoid,
|
|
and upserts the daily_recommendations row.
|
|
"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
# Get all stock analyses for this date
|
|
cursor.execute("""
|
|
SELECT symbol, company_name, decision, confidence, risk, raw_analysis
|
|
FROM stock_analysis WHERE date = ?
|
|
""", (date,))
|
|
rows = cursor.fetchall()
|
|
|
|
if not rows:
|
|
return
|
|
|
|
buy_count = 0
|
|
sell_count = 0
|
|
hold_count = 0
|
|
buy_stocks = []
|
|
sell_stocks = []
|
|
|
|
for row in rows:
|
|
decision = (row['decision'] or '').upper()
|
|
if decision == 'BUY':
|
|
buy_count += 1
|
|
buy_stocks.append({
|
|
'symbol': row['symbol'],
|
|
'company_name': row['company_name'] or row['symbol'],
|
|
'decision': 'BUY',
|
|
'confidence': row['confidence'] or 'MEDIUM',
|
|
'reason': (row['raw_analysis'] or '')[:200]
|
|
})
|
|
elif decision == 'SELL':
|
|
sell_count += 1
|
|
sell_stocks.append({
|
|
'symbol': row['symbol'],
|
|
'company_name': row['company_name'] or row['symbol'],
|
|
'decision': 'SELL',
|
|
'confidence': row['confidence'] or 'MEDIUM',
|
|
'reason': (row['raw_analysis'] or '')[:200]
|
|
})
|
|
else:
|
|
hold_count += 1
|
|
|
|
total = buy_count + sell_count + hold_count
|
|
|
|
# Top picks: up to 5 BUY stocks
|
|
top_picks = [
|
|
{'symbol': s['symbol'], 'company_name': s['company_name'],
|
|
'confidence': s['confidence'], 'reason': s['reason']}
|
|
for s in buy_stocks[:5]
|
|
]
|
|
|
|
# Stocks to avoid: up to 5 SELL stocks
|
|
stocks_to_avoid = [
|
|
{'symbol': s['symbol'], 'company_name': s['company_name'],
|
|
'confidence': s['confidence'], 'reason': s['reason']}
|
|
for s in sell_stocks[:5]
|
|
]
|
|
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO daily_recommendations
|
|
(date, summary_total, summary_buy, summary_sell, summary_hold, top_picks, stocks_to_avoid)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date, total, buy_count, sell_count, hold_count,
|
|
json.dumps(top_picks),
|
|
json.dumps(stocks_to_avoid)
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def rebuild_all_daily_recommendations():
|
|
"""Rebuild daily_recommendations for all dates that have stock_analysis data.
|
|
|
|
This ensures dates with stock_analysis but missing daily_recommendations
|
|
entries become visible to the API.
|
|
"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("SELECT DISTINCT date FROM stock_analysis")
|
|
dates = [row['date'] for row in cursor.fetchall()]
|
|
finally:
|
|
conn.close()
|
|
|
|
for date in dates:
|
|
update_daily_recommendation_summary(date)
|
|
|
|
if dates:
|
|
print(f"[DB] Rebuilt daily_recommendations for {len(dates)} dates: {sorted(dates)}")
|
|
|
|
|
|
# Initialize database on module import
|
|
init_db()
|