1546 lines
54 KiB
Python
1546 lines
54 KiB
Python
"""SQLite database module for storing stock recommendations."""
|
|
import sqlite3
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import Optional
|
|
|
|
DB_PATH = Path(__file__).parent / "recommendations.db"
|
|
|
|
|
|
def sanitize_decision(raw: str) -> str:
|
|
"""Extract BUY/SELL/HOLD from potentially noisy LLM output.
|
|
|
|
Handles: 'BUY', '**SELL**', 'HOLD\n\n---\nHOWEVER...', 'The decision is: **BUY**', etc.
|
|
Returns 'HOLD' as fallback.
|
|
"""
|
|
if not raw:
|
|
return 'HOLD'
|
|
text = raw.strip()
|
|
|
|
# Quick exact match (most common case)
|
|
upper = text.upper()
|
|
if upper in ('BUY', 'SELL', 'HOLD'):
|
|
return upper
|
|
|
|
# Strip markdown bold/italic: **SELL** → SELL, *BUY* → BUY
|
|
stripped = re.sub(r'[*_]+', '', text).strip().upper()
|
|
if stripped in ('BUY', 'SELL', 'HOLD'):
|
|
return stripped
|
|
|
|
# First word after stripping markdown
|
|
first_word = stripped.split()[0] if stripped else ''
|
|
if first_word in ('BUY', 'SELL', 'HOLD'):
|
|
return first_word
|
|
|
|
# Search for decision keyword in the text (prioritize earlier occurrences)
|
|
# Look for standalone BUY/SELL/HOLD words (not part of longer words)
|
|
for keyword in ('BUY', 'SELL', 'HOLD'):
|
|
if re.search(r'\b' + keyword + r'\b', upper):
|
|
return keyword
|
|
|
|
return 'HOLD'
|
|
|
|
|
|
def get_connection():
|
|
"""Get SQLite database connection."""
|
|
conn = sqlite3.connect(DB_PATH)
|
|
conn.row_factory = sqlite3.Row
|
|
return conn
|
|
|
|
|
|
def init_db():
|
|
"""Initialize the database with required tables."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
# Create recommendations table
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS daily_recommendations (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT UNIQUE NOT NULL,
|
|
summary_total INTEGER,
|
|
summary_buy INTEGER,
|
|
summary_sell INTEGER,
|
|
summary_hold INTEGER,
|
|
top_picks TEXT,
|
|
stocks_to_avoid TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
""")
|
|
|
|
# Create stock analysis table
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS stock_analysis (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT NOT NULL,
|
|
symbol TEXT NOT NULL,
|
|
company_name TEXT,
|
|
decision TEXT,
|
|
confidence TEXT,
|
|
risk TEXT,
|
|
raw_analysis TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(date, symbol)
|
|
)
|
|
""")
|
|
|
|
# Create index for faster queries
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_stock_analysis_date ON stock_analysis(date)
|
|
""")
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_stock_analysis_symbol ON stock_analysis(symbol)
|
|
""")
|
|
|
|
# Create agent_reports table (stores each analyst's detailed report)
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS agent_reports (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT NOT NULL,
|
|
symbol TEXT NOT NULL,
|
|
agent_type TEXT NOT NULL,
|
|
report_content TEXT,
|
|
data_sources_used TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(date, symbol, agent_type)
|
|
)
|
|
""")
|
|
|
|
# Create debate_history table (stores investment and risk debates)
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS debate_history (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT NOT NULL,
|
|
symbol TEXT NOT NULL,
|
|
debate_type TEXT NOT NULL,
|
|
bull_arguments TEXT,
|
|
bear_arguments TEXT,
|
|
risky_arguments TEXT,
|
|
safe_arguments TEXT,
|
|
neutral_arguments TEXT,
|
|
judge_decision TEXT,
|
|
full_history TEXT,
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(date, symbol, debate_type)
|
|
)
|
|
""")
|
|
|
|
# Create pipeline_steps table (stores step-by-step execution log)
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS pipeline_steps (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT NOT NULL,
|
|
symbol TEXT NOT NULL,
|
|
step_number INTEGER,
|
|
step_name TEXT,
|
|
status TEXT,
|
|
started_at TEXT,
|
|
completed_at TEXT,
|
|
duration_ms INTEGER,
|
|
output_summary TEXT,
|
|
step_details TEXT,
|
|
UNIQUE(date, symbol, step_number)
|
|
)
|
|
""")
|
|
|
|
# Add step_details column if it doesn't exist (migration for existing DBs)
|
|
try:
|
|
cursor.execute("ALTER TABLE pipeline_steps ADD COLUMN step_details TEXT")
|
|
except sqlite3.OperationalError:
|
|
pass # Column already exists
|
|
|
|
# Create data_source_logs table (stores what raw data was fetched)
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS data_source_logs (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT NOT NULL,
|
|
symbol TEXT NOT NULL,
|
|
source_type TEXT,
|
|
source_name TEXT,
|
|
method TEXT,
|
|
args TEXT,
|
|
data_fetched TEXT,
|
|
fetch_timestamp TEXT,
|
|
success INTEGER DEFAULT 1,
|
|
error_message TEXT
|
|
)
|
|
""")
|
|
|
|
# Migrate: add method/args columns if missing (existing databases)
|
|
try:
|
|
cursor.execute("ALTER TABLE data_source_logs ADD COLUMN method TEXT")
|
|
except Exception:
|
|
pass # Column already exists
|
|
try:
|
|
cursor.execute("ALTER TABLE data_source_logs ADD COLUMN args TEXT")
|
|
except Exception:
|
|
pass # Column already exists
|
|
|
|
# Create backtest_results table (stores calculated backtest accuracy)
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS backtest_results (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT NOT NULL,
|
|
symbol TEXT NOT NULL,
|
|
decision TEXT,
|
|
price_at_prediction REAL,
|
|
price_1d_later REAL,
|
|
price_1w_later REAL,
|
|
price_1m_later REAL,
|
|
return_1d REAL,
|
|
return_1w REAL,
|
|
return_1m REAL,
|
|
prediction_correct INTEGER,
|
|
calculated_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(date, symbol)
|
|
)
|
|
""")
|
|
|
|
# Add hold_days column if it doesn't exist (migration for existing DBs)
|
|
try:
|
|
cursor.execute("ALTER TABLE stock_analysis ADD COLUMN hold_days INTEGER")
|
|
except sqlite3.OperationalError:
|
|
pass # Column already exists
|
|
try:
|
|
cursor.execute("ALTER TABLE backtest_results ADD COLUMN hold_days INTEGER")
|
|
except sqlite3.OperationalError:
|
|
pass # Column already exists
|
|
try:
|
|
cursor.execute("ALTER TABLE backtest_results ADD COLUMN return_at_hold REAL")
|
|
# New column added — delete stale backtest data so it gets recalculated with return_at_hold
|
|
cursor.execute("DELETE FROM backtest_results")
|
|
print("Migration: Added return_at_hold column, cleared stale backtest data for recalculation")
|
|
except sqlite3.OperationalError:
|
|
pass # Column already exists
|
|
|
|
# Add rank column if it doesn't exist (migration for existing DBs)
|
|
try:
|
|
cursor.execute("ALTER TABLE stock_analysis ADD COLUMN rank INTEGER")
|
|
except sqlite3.OperationalError:
|
|
pass # Column already exists
|
|
|
|
# Create indexes for new tables
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_agent_reports_date_symbol ON agent_reports(date, symbol)
|
|
""")
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_debate_history_date_symbol ON debate_history(date, symbol)
|
|
""")
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_pipeline_steps_date_symbol ON pipeline_steps(date, symbol)
|
|
""")
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_data_source_logs_date_symbol ON data_source_logs(date, symbol)
|
|
""")
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_backtest_results_date ON backtest_results(date)
|
|
""")
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
# Fix data quality issues at startup
|
|
_fix_default_hold_days()
|
|
_fix_garbage_decisions()
|
|
_cleanup_bad_backtest_data()
|
|
|
|
|
|
def _fix_default_hold_days():
|
|
"""Re-extract hold_days from raw_analysis for rows where hold_days is NULL or 5 (defaults).
|
|
|
|
The signal processor sometimes defaults to 5 or leaves hold_days NULL when the
|
|
LLM fails to extract the actual hold period. This function uses regex on the
|
|
raw_analysis text to find the correct value.
|
|
"""
|
|
import re
|
|
|
|
patterns = [
|
|
r'(\d+)[\s-]*(?:day|trading[\s-]*day)[\s-]*(?:hold|horizon|period|timeframe)',
|
|
r'(?:hold|holding)[\s\w]*?(?:for|of|period\s+of)[\s]*(\d+)[\s]*(?:trading\s+)?days?',
|
|
r'setting\s+(\d+)\s+(?:trading\s+)?days',
|
|
r'(?:over|within|next)\s+(\d+)\s+(?:trading\s+)?days',
|
|
r'(\d+)\s+trading\s+days?\s*\(',
|
|
]
|
|
|
|
def extract_days(text):
|
|
if not text:
|
|
return None
|
|
# Search the conclusion/rationale section first (last 500 chars)
|
|
conclusion = text[-500:]
|
|
for pattern in patterns:
|
|
for match in re.finditer(pattern, conclusion, re.IGNORECASE):
|
|
days = int(match.group(1))
|
|
if 1 <= days <= 90:
|
|
return days
|
|
# Fall back to full text
|
|
for pattern in patterns:
|
|
for match in re.finditer(pattern, text, re.IGNORECASE):
|
|
days = int(match.group(1))
|
|
if 1 <= days <= 90:
|
|
return days
|
|
return None
|
|
|
|
conn = get_connection()
|
|
conn.row_factory = sqlite3.Row
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
# Fix rows where hold_days is NULL or the default 5
|
|
cursor.execute(
|
|
"SELECT id, symbol, date, raw_analysis, hold_days, decision FROM stock_analysis "
|
|
"WHERE (hold_days IS NULL OR hold_days = 5) "
|
|
"AND decision != 'SELL' "
|
|
"AND raw_analysis IS NOT NULL AND raw_analysis != ''"
|
|
)
|
|
rows = cursor.fetchall()
|
|
fixed = 0
|
|
for row in rows:
|
|
extracted = extract_days(row['raw_analysis'])
|
|
old_val = row['hold_days']
|
|
if extracted is not None and extracted != old_val:
|
|
cursor.execute(
|
|
"UPDATE stock_analysis SET hold_days = ? WHERE id = ?",
|
|
(extracted, row['id'])
|
|
)
|
|
fixed += 1
|
|
print(f" Fixed hold_days for {row['symbol']} ({row['date']}): {old_val} -> {extracted}")
|
|
|
|
if fixed > 0:
|
|
conn.commit()
|
|
# Also clear backtest results so they recalculate with correct hold_days
|
|
cursor.execute("DELETE FROM backtest_results")
|
|
conn.commit()
|
|
print(f"Fixed {fixed} stock(s) with missing/default hold_days. Cleared backtest cache.")
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def _fix_garbage_decisions():
|
|
"""Fix stock_analysis rows where the decision field contains garbage LLM output.
|
|
|
|
Uses sanitize_decision() to extract the real BUY/SELL/HOLD from the text,
|
|
then updates the DB rows and rebuilds daily_recommendations summaries.
|
|
"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
# Find rows where decision is not a clean BUY/SELL/HOLD
|
|
cursor.execute(
|
|
"SELECT id, date, symbol, decision FROM stock_analysis "
|
|
"WHERE decision NOT IN ('BUY', 'SELL', 'HOLD') AND decision IS NOT NULL"
|
|
)
|
|
rows = cursor.fetchall()
|
|
if not rows:
|
|
return
|
|
|
|
fixed = 0
|
|
affected_dates = set()
|
|
for row in rows:
|
|
clean = sanitize_decision(row['decision'])
|
|
if clean != row['decision']:
|
|
cursor.execute(
|
|
"UPDATE stock_analysis SET decision = ? WHERE id = ?",
|
|
(clean, row['id'])
|
|
)
|
|
fixed += 1
|
|
affected_dates.add(row['date'])
|
|
old_preview = row['decision'][:40].replace('\n', ' ')
|
|
print(f" Fixed decision for {row['symbol']} ({row['date']}): '{old_preview}...' -> {clean}")
|
|
|
|
if fixed > 0:
|
|
conn.commit()
|
|
print(f"Fixed {fixed} stock(s) with garbage decision values.")
|
|
|
|
# Rebuild daily_recommendations summaries for affected dates
|
|
for date in affected_dates:
|
|
cursor.execute(
|
|
"SELECT decision FROM stock_analysis WHERE date = ?", (date,)
|
|
)
|
|
decisions = [sanitize_decision(r['decision']) for r in cursor.fetchall()]
|
|
buy_count = decisions.count('BUY')
|
|
sell_count = decisions.count('SELL')
|
|
hold_count = decisions.count('HOLD')
|
|
cursor.execute(
|
|
"UPDATE daily_recommendations SET summary_buy=?, summary_sell=?, summary_hold=?, summary_total=? WHERE date=?",
|
|
(buy_count, sell_count, hold_count, len(decisions), date)
|
|
)
|
|
conn.commit()
|
|
print(f"Rebuilt summaries for {len(affected_dates)} date(s).")
|
|
|
|
# Clear backtest results that may have wrong decisions stored
|
|
cursor.execute("DELETE FROM backtest_results WHERE decision NOT IN ('BUY', 'SELL', 'HOLD')")
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def _cleanup_bad_backtest_data():
|
|
"""Remove backtest results that have invalid data.
|
|
|
|
Deletes rows where:
|
|
- return_1d is exactly 0.0 AND return_1w is also 0.0 or NULL (indicates same-day resolution)
|
|
- return_1d is NULL and return_1w is NULL and return_at_hold is NULL (no usable data)
|
|
"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
# Delete rows where return_1d=0 and no other useful return data
|
|
# This typically means pred_date and next-day resolved to the same trading day
|
|
cursor.execute(
|
|
"DELETE FROM backtest_results "
|
|
"WHERE return_1d = 0.0 AND (return_1w IS NULL OR return_1w = 0.0) "
|
|
"AND (return_at_hold IS NULL OR return_at_hold = 0.0)"
|
|
)
|
|
deleted_zero = cursor.rowcount
|
|
|
|
# Delete rows where all returns are NULL (no price data available)
|
|
cursor.execute(
|
|
"DELETE FROM backtest_results "
|
|
"WHERE return_1d IS NULL AND return_1w IS NULL AND return_at_hold IS NULL"
|
|
)
|
|
deleted_null = cursor.rowcount
|
|
|
|
if deleted_zero + deleted_null > 0:
|
|
conn.commit()
|
|
print(f"Cleaned up backtest data: removed {deleted_zero} zero-return rows, {deleted_null} null-return rows.")
|
|
|
|
# Fix rows where prediction_correct is NULL but we have return data
|
|
# Cross-reference with stock_analysis for the correct decision
|
|
cursor.execute("""
|
|
SELECT br.id, br.date, br.symbol, br.return_1d, br.return_at_hold,
|
|
sa.decision as sa_decision
|
|
FROM backtest_results br
|
|
JOIN stock_analysis sa ON br.date = sa.date AND br.symbol = sa.symbol
|
|
WHERE br.prediction_correct IS NULL
|
|
AND (br.return_1d IS NOT NULL OR br.return_at_hold IS NOT NULL)
|
|
""")
|
|
null_correct_rows = cursor.fetchall()
|
|
fixed_count = 0
|
|
for row in null_correct_rows:
|
|
decision = sanitize_decision(row['sa_decision'])
|
|
primary_return = row['return_at_hold'] if row['return_at_hold'] is not None else row['return_1d']
|
|
if primary_return is None:
|
|
continue
|
|
if decision in ('BUY', 'HOLD'):
|
|
is_correct = 1 if primary_return > 0 else 0
|
|
elif decision == 'SELL':
|
|
is_correct = 1 if primary_return < 0 else 0
|
|
else:
|
|
continue
|
|
cursor.execute(
|
|
"UPDATE backtest_results SET prediction_correct = ?, decision = ? WHERE id = ?",
|
|
(is_correct, decision, row['id'])
|
|
)
|
|
fixed_count += 1
|
|
|
|
if fixed_count > 0:
|
|
conn.commit()
|
|
print(f"Fixed prediction_correct for {fixed_count} backtest rows.")
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def save_recommendation(date: str, analysis_data: dict, summary: dict,
|
|
top_picks: list, stocks_to_avoid: list):
|
|
"""Save a daily recommendation to the database."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
# Insert or replace daily recommendation
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO daily_recommendations
|
|
(date, summary_total, summary_buy, summary_sell, summary_hold, top_picks, stocks_to_avoid)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date,
|
|
summary.get('total', 0),
|
|
summary.get('buy', 0),
|
|
summary.get('sell', 0),
|
|
summary.get('hold', 0),
|
|
json.dumps(top_picks),
|
|
json.dumps(stocks_to_avoid)
|
|
))
|
|
|
|
# Insert stock analysis for each stock
|
|
for symbol, analysis in analysis_data.items():
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO stock_analysis
|
|
(date, symbol, company_name, decision, confidence, risk, raw_analysis, hold_days)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date,
|
|
symbol,
|
|
analysis.get('company_name', ''),
|
|
analysis.get('decision'),
|
|
analysis.get('confidence'),
|
|
analysis.get('risk'),
|
|
analysis.get('raw_analysis', ''),
|
|
analysis.get('hold_days')
|
|
))
|
|
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def save_single_stock_analysis(date: str, symbol: str, analysis: dict):
|
|
"""Save analysis for a single stock.
|
|
|
|
Args:
|
|
date: Date string (YYYY-MM-DD)
|
|
symbol: Stock symbol
|
|
analysis: Dict with keys: company_name, decision, confidence, risk, raw_analysis, hold_days
|
|
"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO stock_analysis
|
|
(date, symbol, company_name, decision, confidence, risk, raw_analysis, hold_days)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date,
|
|
symbol,
|
|
analysis.get('company_name', symbol),
|
|
analysis.get('decision', 'HOLD'),
|
|
analysis.get('confidence', 'MEDIUM'),
|
|
analysis.get('risk', 'MEDIUM'),
|
|
analysis.get('raw_analysis', ''),
|
|
analysis.get('hold_days')
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_analyzed_symbols_for_date(date: str) -> list:
|
|
"""Get list of symbols that already have analysis for a given date.
|
|
|
|
Used by bulk analysis to skip already-completed stocks when resuming.
|
|
"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("SELECT symbol FROM stock_analysis WHERE date = ?", (date,))
|
|
return [row['symbol'] for row in cursor.fetchall()]
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_recommendation_by_date(date: str) -> Optional[dict]:
|
|
"""Get recommendation for a specific date."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
# Get daily summary
|
|
cursor.execute("""
|
|
SELECT * FROM daily_recommendations WHERE date = ?
|
|
""", (date,))
|
|
row = cursor.fetchone()
|
|
|
|
# Get stock analysis for this date
|
|
cursor.execute("""
|
|
SELECT * FROM stock_analysis WHERE date = ?
|
|
""", (date,))
|
|
analysis_rows = cursor.fetchall()
|
|
|
|
# If no daily_recommendations AND no stock_analysis, return None
|
|
if not row and not analysis_rows:
|
|
return None
|
|
|
|
analysis = {}
|
|
for a in analysis_rows:
|
|
decision = sanitize_decision(a['decision'])
|
|
analysis[a['symbol']] = {
|
|
'symbol': a['symbol'],
|
|
'company_name': a['company_name'],
|
|
'decision': decision,
|
|
'confidence': a['confidence'] or 'MEDIUM',
|
|
'risk': a['risk'] or 'MEDIUM',
|
|
'raw_analysis': a['raw_analysis'],
|
|
'hold_days': a['hold_days'] if 'hold_days' in a.keys() else None,
|
|
'rank': a['rank'] if 'rank' in a.keys() else None
|
|
}
|
|
|
|
if row:
|
|
return {
|
|
'date': row['date'],
|
|
'analysis': analysis,
|
|
'summary': {
|
|
'total': row['summary_total'],
|
|
'buy': row['summary_buy'],
|
|
'sell': row['summary_sell'],
|
|
'hold': row['summary_hold']
|
|
},
|
|
'top_picks': json.loads(row['top_picks']) if row['top_picks'] else [],
|
|
'stocks_to_avoid': json.loads(row['stocks_to_avoid']) if row['stocks_to_avoid'] else []
|
|
}
|
|
|
|
# Fallback: build summary from stock_analysis when daily_recommendations is missing
|
|
buy_count = sum(1 for a in analysis.values() if a['decision'] == 'BUY')
|
|
sell_count = sum(1 for a in analysis.values() if a['decision'] == 'SELL')
|
|
hold_count = sum(1 for a in analysis.values() if a['decision'] == 'HOLD')
|
|
return {
|
|
'date': date,
|
|
'analysis': analysis,
|
|
'summary': {
|
|
'total': len(analysis),
|
|
'buy': buy_count,
|
|
'sell': sell_count,
|
|
'hold': hold_count
|
|
},
|
|
'top_picks': [],
|
|
'stocks_to_avoid': []
|
|
}
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_latest_recommendation() -> Optional[dict]:
|
|
"""Get the most recent recommendation."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT date FROM daily_recommendations ORDER BY date DESC LIMIT 1
|
|
""")
|
|
row = cursor.fetchone()
|
|
|
|
if not row:
|
|
return None
|
|
|
|
return get_recommendation_by_date(row['date'])
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_all_dates() -> list:
|
|
"""Get all available dates (union of daily_recommendations and stock_analysis)."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT DISTINCT date FROM (
|
|
SELECT date FROM daily_recommendations
|
|
UNION
|
|
SELECT date FROM stock_analysis
|
|
) ORDER BY date DESC
|
|
""")
|
|
return [row['date'] for row in cursor.fetchall()]
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_stock_history(symbol: str) -> list:
|
|
"""Get historical recommendations for a specific stock."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT date, decision, confidence, risk, hold_days, rank
|
|
FROM stock_analysis
|
|
WHERE symbol = ?
|
|
ORDER BY date DESC
|
|
""", (symbol,))
|
|
|
|
results = []
|
|
for row in cursor.fetchall():
|
|
decision = sanitize_decision(row['decision'])
|
|
results.append({
|
|
'date': row['date'],
|
|
'decision': decision,
|
|
'confidence': row['confidence'] or 'MEDIUM',
|
|
'risk': row['risk'] or 'MEDIUM',
|
|
'hold_days': row['hold_days'] if 'hold_days' in row.keys() else None,
|
|
'rank': row['rank'] if 'rank' in row.keys() else None
|
|
})
|
|
return results
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_all_recommendations() -> list:
|
|
"""Get all daily recommendations."""
|
|
dates = get_all_dates()
|
|
return [get_recommendation_by_date(date) for date in dates]
|
|
|
|
|
|
# ============== Pipeline Data Functions ==============
|
|
|
|
def save_agent_report(date: str, symbol: str, agent_type: str,
|
|
report_content: str, data_sources_used: list = None):
|
|
"""Save an individual agent's report."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO agent_reports
|
|
(date, symbol, agent_type, report_content, data_sources_used)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
""", (
|
|
date, symbol, agent_type, report_content,
|
|
json.dumps(data_sources_used) if data_sources_used else '[]'
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def save_agent_reports_bulk(date: str, symbol: str, reports: dict):
|
|
"""Save all agent reports for a stock at once.
|
|
|
|
Args:
|
|
date: Date string (YYYY-MM-DD)
|
|
symbol: Stock symbol
|
|
reports: Dict with keys 'market', 'news', 'social_media', 'fundamentals'
|
|
"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
for agent_type, report_data in reports.items():
|
|
if isinstance(report_data, str):
|
|
report_content = report_data
|
|
data_sources = []
|
|
else:
|
|
report_content = report_data.get('content', '')
|
|
data_sources = report_data.get('data_sources', [])
|
|
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO agent_reports
|
|
(date, symbol, agent_type, report_content, data_sources_used)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
""", (date, symbol, agent_type, report_content, json.dumps(data_sources)))
|
|
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_agent_reports(date: str, symbol: str) -> dict:
|
|
"""Get all agent reports for a stock on a date."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT agent_type, report_content, data_sources_used, created_at
|
|
FROM agent_reports
|
|
WHERE date = ? AND symbol = ?
|
|
""", (date, symbol))
|
|
|
|
reports = {}
|
|
for row in cursor.fetchall():
|
|
reports[row['agent_type']] = {
|
|
'agent_type': row['agent_type'],
|
|
'report_content': row['report_content'],
|
|
'data_sources_used': json.loads(row['data_sources_used']) if row['data_sources_used'] else [],
|
|
'created_at': row['created_at']
|
|
}
|
|
return reports
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def save_debate_history(date: str, symbol: str, debate_type: str,
|
|
bull_arguments: str = None, bear_arguments: str = None,
|
|
risky_arguments: str = None, safe_arguments: str = None,
|
|
neutral_arguments: str = None, judge_decision: str = None,
|
|
full_history: str = None):
|
|
"""Save debate history for investment or risk debate."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO debate_history
|
|
(date, symbol, debate_type, bull_arguments, bear_arguments,
|
|
risky_arguments, safe_arguments, neutral_arguments,
|
|
judge_decision, full_history)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date, symbol, debate_type,
|
|
bull_arguments, bear_arguments,
|
|
risky_arguments, safe_arguments, neutral_arguments,
|
|
judge_decision, full_history
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_debate_history(date: str, symbol: str) -> dict:
|
|
"""Get all debate history for a stock on a date."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT * FROM debate_history
|
|
WHERE date = ? AND symbol = ?
|
|
""", (date, symbol))
|
|
|
|
debates = {}
|
|
for row in cursor.fetchall():
|
|
debates[row['debate_type']] = {
|
|
'debate_type': row['debate_type'],
|
|
'bull_arguments': row['bull_arguments'],
|
|
'bear_arguments': row['bear_arguments'],
|
|
'risky_arguments': row['risky_arguments'],
|
|
'safe_arguments': row['safe_arguments'],
|
|
'neutral_arguments': row['neutral_arguments'],
|
|
'judge_decision': row['judge_decision'],
|
|
'full_history': row['full_history'],
|
|
'created_at': row['created_at']
|
|
}
|
|
return debates
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def save_pipeline_step(date: str, symbol: str, step_number: int, step_name: str,
|
|
status: str, started_at: str = None, completed_at: str = None,
|
|
duration_ms: int = None, output_summary: str = None):
|
|
"""Save a pipeline step status."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO pipeline_steps
|
|
(date, symbol, step_number, step_name, status,
|
|
started_at, completed_at, duration_ms, output_summary)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date, symbol, step_number, step_name, status,
|
|
started_at, completed_at, duration_ms, output_summary
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def save_pipeline_steps_bulk(date: str, symbol: str, steps: list):
|
|
"""Save all pipeline steps at once.
|
|
|
|
Args:
|
|
date: Date string
|
|
symbol: Stock symbol
|
|
steps: List of step dicts with step_number, step_name, status, etc.
|
|
"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
for step in steps:
|
|
step_details = step.get('step_details')
|
|
if step_details and not isinstance(step_details, str):
|
|
step_details = json.dumps(step_details)
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO pipeline_steps
|
|
(date, symbol, step_number, step_name, status,
|
|
started_at, completed_at, duration_ms, output_summary, step_details)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date, symbol,
|
|
step.get('step_number'),
|
|
step.get('step_name'),
|
|
step.get('status'),
|
|
step.get('started_at'),
|
|
step.get('completed_at'),
|
|
step.get('duration_ms'),
|
|
step.get('output_summary'),
|
|
step_details
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_pipeline_steps(date: str, symbol: str) -> list:
|
|
"""Get all pipeline steps for a stock on a date."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT * FROM pipeline_steps
|
|
WHERE date = ? AND symbol = ?
|
|
ORDER BY step_number
|
|
""", (date, symbol))
|
|
|
|
results = []
|
|
for row in cursor.fetchall():
|
|
step_details = None
|
|
raw_details = row['step_details'] if 'step_details' in row.keys() else None
|
|
if raw_details:
|
|
try:
|
|
step_details = json.loads(raw_details)
|
|
except (json.JSONDecodeError, TypeError):
|
|
step_details = None
|
|
results.append({
|
|
'step_number': row['step_number'],
|
|
'step_name': row['step_name'],
|
|
'status': row['status'],
|
|
'started_at': row['started_at'],
|
|
'completed_at': row['completed_at'],
|
|
'duration_ms': row['duration_ms'],
|
|
'output_summary': row['output_summary'],
|
|
'step_details': step_details,
|
|
})
|
|
return results
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def save_data_source_log(date: str, symbol: str, source_type: str,
|
|
source_name: str, data_fetched: dict = None,
|
|
fetch_timestamp: str = None, success: bool = True,
|
|
error_message: str = None):
|
|
"""Log a data source fetch."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
INSERT INTO data_source_logs
|
|
(date, symbol, source_type, source_name, data_fetched,
|
|
fetch_timestamp, success, error_message)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date, symbol, source_type, source_name,
|
|
json.dumps(data_fetched) if data_fetched else None,
|
|
fetch_timestamp or datetime.now().isoformat(),
|
|
1 if success else 0,
|
|
error_message
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def save_data_source_logs_bulk(date: str, symbol: str, logs: list):
|
|
"""Save multiple data source logs at once."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
for log in logs:
|
|
cursor.execute("""
|
|
INSERT INTO data_source_logs
|
|
(date, symbol, source_type, source_name, method, args, data_fetched,
|
|
fetch_timestamp, success, error_message)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date, symbol,
|
|
log.get('source_type'),
|
|
log.get('source_name'),
|
|
log.get('method'),
|
|
log.get('args'),
|
|
json.dumps(log.get('data_fetched')) if log.get('data_fetched') else None,
|
|
log.get('fetch_timestamp') or datetime.now().isoformat(),
|
|
1 if log.get('success', True) else 0,
|
|
log.get('error_message')
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_data_source_logs(date: str, symbol: str) -> list:
|
|
"""Get all data source logs for a stock on a date.
|
|
Falls back to generating entries from agent_reports if no explicit logs exist."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT * FROM data_source_logs
|
|
WHERE date = ? AND symbol = ?
|
|
ORDER BY fetch_timestamp
|
|
""", (date, symbol))
|
|
|
|
logs = [
|
|
{
|
|
'source_type': row['source_type'],
|
|
'source_name': row['source_name'],
|
|
'method': row['method'] if 'method' in row.keys() else None,
|
|
'args': row['args'] if 'args' in row.keys() else None,
|
|
'data_fetched': json.loads(row['data_fetched']) if row['data_fetched'] else None,
|
|
'fetch_timestamp': row['fetch_timestamp'],
|
|
'success': bool(row['success']),
|
|
'error_message': row['error_message']
|
|
}
|
|
for row in cursor.fetchall()
|
|
]
|
|
|
|
if logs:
|
|
return logs
|
|
|
|
# No explicit logs — generate from agent_reports with full raw content
|
|
AGENT_TO_SOURCE = {
|
|
'market': ('market_data', 'Yahoo Finance'),
|
|
'news': ('news', 'Google News'),
|
|
'social_media': ('social_media', 'Social Sentiment'),
|
|
'fundamentals': ('fundamentals', 'Financial Data'),
|
|
}
|
|
|
|
cursor.execute("""
|
|
SELECT agent_type, report_content, created_at
|
|
FROM agent_reports
|
|
WHERE date = ? AND symbol = ?
|
|
""", (date, symbol))
|
|
|
|
generated = []
|
|
for row in cursor.fetchall():
|
|
source_type, source_name = AGENT_TO_SOURCE.get(
|
|
row['agent_type'], ('other', row['agent_type'])
|
|
)
|
|
generated.append({
|
|
'source_type': source_type,
|
|
'source_name': source_name,
|
|
'data_fetched': row['report_content'],
|
|
'fetch_timestamp': row['created_at'],
|
|
'success': True,
|
|
'error_message': None
|
|
})
|
|
|
|
return generated
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_full_pipeline_data(date: str, symbol: str) -> dict:
|
|
"""Get complete pipeline data for a stock on a date."""
|
|
return {
|
|
'date': date,
|
|
'symbol': symbol,
|
|
'agent_reports': get_agent_reports(date, symbol),
|
|
'debates': get_debate_history(date, symbol),
|
|
'pipeline_steps': get_pipeline_steps(date, symbol),
|
|
'data_sources': get_data_source_logs(date, symbol)
|
|
}
|
|
|
|
|
|
def save_full_pipeline_data(date: str, symbol: str, pipeline_data: dict):
|
|
"""Save complete pipeline data for a stock.
|
|
|
|
Args:
|
|
date: Date string
|
|
symbol: Stock symbol
|
|
pipeline_data: Dict containing agent_reports, debates, pipeline_steps, data_sources
|
|
"""
|
|
if 'agent_reports' in pipeline_data:
|
|
save_agent_reports_bulk(date, symbol, pipeline_data['agent_reports'])
|
|
|
|
if 'investment_debate' in pipeline_data:
|
|
debate = pipeline_data['investment_debate']
|
|
save_debate_history(
|
|
date, symbol, 'investment',
|
|
bull_arguments=debate.get('bull_history'),
|
|
bear_arguments=debate.get('bear_history'),
|
|
judge_decision=debate.get('judge_decision'),
|
|
full_history=debate.get('history')
|
|
)
|
|
|
|
if 'risk_debate' in pipeline_data:
|
|
debate = pipeline_data['risk_debate']
|
|
save_debate_history(
|
|
date, symbol, 'risk',
|
|
risky_arguments=debate.get('risky_history'),
|
|
safe_arguments=debate.get('safe_history'),
|
|
neutral_arguments=debate.get('neutral_history'),
|
|
judge_decision=debate.get('judge_decision'),
|
|
full_history=debate.get('history')
|
|
)
|
|
|
|
if 'pipeline_steps' in pipeline_data:
|
|
save_pipeline_steps_bulk(date, symbol, pipeline_data['pipeline_steps'])
|
|
|
|
if 'data_sources' in pipeline_data:
|
|
save_data_source_logs_bulk(date, symbol, pipeline_data['data_sources'])
|
|
|
|
|
|
def get_pipeline_summary_for_date(date: str) -> list:
|
|
"""Get pipeline summary for all stocks on a date."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
# Get all symbols for this date
|
|
cursor.execute("""
|
|
SELECT DISTINCT symbol FROM stock_analysis WHERE date = ?
|
|
""", (date,))
|
|
symbols = [row['symbol'] for row in cursor.fetchall()]
|
|
|
|
# Batch fetch all pipeline steps for the date (avoids N+1)
|
|
cursor.execute("""
|
|
SELECT symbol, step_name, status FROM pipeline_steps
|
|
WHERE date = ?
|
|
ORDER BY symbol, step_number
|
|
""", (date,))
|
|
all_steps = cursor.fetchall()
|
|
steps_by_symbol = {}
|
|
for row in all_steps:
|
|
if row['symbol'] not in steps_by_symbol:
|
|
steps_by_symbol[row['symbol']] = []
|
|
steps_by_symbol[row['symbol']].append({'step_name': row['step_name'], 'status': row['status']})
|
|
|
|
# Batch fetch agent report counts (avoids N+1)
|
|
cursor.execute("""
|
|
SELECT symbol, COUNT(*) as count FROM agent_reports
|
|
WHERE date = ?
|
|
GROUP BY symbol
|
|
""", (date,))
|
|
agent_counts = {row['symbol']: row['count'] for row in cursor.fetchall()}
|
|
|
|
# Batch fetch debates existence (avoids N+1)
|
|
cursor.execute("""
|
|
SELECT DISTINCT symbol FROM debate_history WHERE date = ?
|
|
""", (date,))
|
|
symbols_with_debates = {row['symbol'] for row in cursor.fetchall()}
|
|
|
|
summaries = []
|
|
for symbol in symbols:
|
|
summaries.append({
|
|
'symbol': symbol,
|
|
'pipeline_steps': steps_by_symbol.get(symbol, []),
|
|
'agent_reports_count': agent_counts.get(symbol, 0),
|
|
'has_debates': symbol in symbols_with_debates
|
|
})
|
|
|
|
return summaries
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def save_backtest_result(date: str, symbol: str, decision: str,
|
|
price_at_prediction: float, price_1d_later: float = None,
|
|
price_1w_later: float = None, price_1m_later: float = None,
|
|
return_1d: float = None, return_1w: float = None,
|
|
return_1m: float = None, prediction_correct: bool = None,
|
|
hold_days: int = None, return_at_hold: float = None):
|
|
"""Save a backtest result for a stock recommendation."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO backtest_results
|
|
(date, symbol, decision, price_at_prediction,
|
|
price_1d_later, price_1w_later, price_1m_later,
|
|
return_1d, return_1w, return_1m, prediction_correct, hold_days, return_at_hold)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date, symbol, decision, price_at_prediction,
|
|
price_1d_later, price_1w_later, price_1m_later,
|
|
return_1d, return_1w, return_1m,
|
|
1 if prediction_correct else 0 if prediction_correct is not None else None,
|
|
hold_days, return_at_hold
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_backtest_result(date: str, symbol: str) -> Optional[dict]:
|
|
"""Get backtest result for a specific stock and date."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT * FROM backtest_results WHERE date = ? AND symbol = ?
|
|
""", (date, symbol))
|
|
row = cursor.fetchone()
|
|
|
|
if row:
|
|
return {
|
|
'date': row['date'],
|
|
'symbol': row['symbol'],
|
|
'decision': row['decision'],
|
|
'price_at_prediction': row['price_at_prediction'],
|
|
'price_1d_later': row['price_1d_later'],
|
|
'price_1w_later': row['price_1w_later'],
|
|
'price_1m_later': row['price_1m_later'],
|
|
'return_1d': row['return_1d'],
|
|
'return_1w': row['return_1w'],
|
|
'return_1m': row['return_1m'],
|
|
'prediction_correct': bool(row['prediction_correct']) if row['prediction_correct'] is not None else None,
|
|
'hold_days': row['hold_days'] if 'hold_days' in row.keys() else None,
|
|
'return_at_hold': row['return_at_hold'] if 'return_at_hold' in row.keys() else None,
|
|
'calculated_at': row['calculated_at']
|
|
}
|
|
return None
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_backtest_results_by_date(date: str) -> list:
|
|
"""Get all backtest results for a specific date."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT * FROM backtest_results WHERE date = ?
|
|
""", (date,))
|
|
|
|
return [
|
|
{
|
|
'symbol': row['symbol'],
|
|
'decision': row['decision'],
|
|
'price_at_prediction': row['price_at_prediction'],
|
|
'price_1d_later': row['price_1d_later'],
|
|
'price_1w_later': row['price_1w_later'],
|
|
'price_1m_later': row['price_1m_later'],
|
|
'return_1d': row['return_1d'],
|
|
'return_1w': row['return_1w'],
|
|
'return_1m': row['return_1m'],
|
|
'prediction_correct': bool(row['prediction_correct']) if row['prediction_correct'] is not None else None,
|
|
'hold_days': row['hold_days'] if 'hold_days' in row.keys() else None,
|
|
'return_at_hold': row['return_at_hold'] if 'return_at_hold' in row.keys() else None,
|
|
}
|
|
for row in cursor.fetchall()
|
|
]
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_all_backtest_results_grouped() -> dict:
|
|
"""Get all backtest results grouped by date for the History page bundle.
|
|
|
|
Returns: { date: { symbol: { return_1d, return_1w, return_1m, return_at_hold, hold_days, prediction_correct, decision } } }
|
|
"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT date, symbol, decision, return_1d, return_1w, return_1m,
|
|
return_at_hold, hold_days, prediction_correct,
|
|
price_at_prediction
|
|
FROM backtest_results
|
|
ORDER BY date
|
|
""")
|
|
|
|
grouped: dict = {}
|
|
for row in cursor.fetchall():
|
|
date = row['date']
|
|
if date not in grouped:
|
|
grouped[date] = {}
|
|
grouped[date][row['symbol']] = {
|
|
'return_1d': row['return_1d'],
|
|
'return_1w': row['return_1w'],
|
|
'return_1m': row['return_1m'],
|
|
'return_at_hold': row['return_at_hold'],
|
|
'hold_days': row['hold_days'] if 'hold_days' in row.keys() else None,
|
|
'prediction_correct': bool(row['prediction_correct']) if row['prediction_correct'] is not None else None,
|
|
'decision': row['decision'],
|
|
}
|
|
return grouped
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def get_all_backtest_results() -> list:
|
|
"""Get all backtest results for accuracy calculation."""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT br.*, sa.confidence, sa.risk
|
|
FROM backtest_results br
|
|
LEFT JOIN stock_analysis sa ON br.date = sa.date AND br.symbol = sa.symbol
|
|
WHERE br.prediction_correct IS NOT NULL
|
|
ORDER BY br.date DESC
|
|
""")
|
|
|
|
return [
|
|
{
|
|
'date': row['date'],
|
|
'symbol': row['symbol'],
|
|
'decision': row['decision'],
|
|
'confidence': row['confidence'],
|
|
'risk': row['risk'],
|
|
'price_at_prediction': row['price_at_prediction'],
|
|
'return_1d': row['return_1d'],
|
|
'return_1w': row['return_1w'],
|
|
'return_1m': row['return_1m'],
|
|
'prediction_correct': bool(row['prediction_correct']),
|
|
'hold_days': row['hold_days'] if 'hold_days' in row.keys() else None,
|
|
'return_at_hold': row['return_at_hold'] if 'return_at_hold' in row.keys() else None,
|
|
}
|
|
for row in cursor.fetchall()
|
|
]
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def calculate_accuracy_metrics() -> dict:
|
|
"""Calculate overall backtest accuracy metrics.
|
|
|
|
Cross-references backtest_results with stock_analysis to use the correct
|
|
(sanitized) decision values and compute prediction correctness accurately.
|
|
"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
empty = {
|
|
'overall_accuracy': 0,
|
|
'total_predictions': 0,
|
|
'correct_predictions': 0,
|
|
'by_decision': {'BUY': {'accuracy': 0, 'total': 0, 'correct': 0},
|
|
'SELL': {'accuracy': 0, 'total': 0, 'correct': 0},
|
|
'HOLD': {'accuracy': 0, 'total': 0, 'correct': 0}},
|
|
'by_confidence': {}
|
|
}
|
|
|
|
try:
|
|
# Join backtest_results with stock_analysis to get the correct decision
|
|
cursor.execute("""
|
|
SELECT br.date, br.symbol, br.return_1d, br.return_1w, br.return_at_hold,
|
|
sa.decision as sa_decision, sa.confidence
|
|
FROM backtest_results br
|
|
JOIN stock_analysis sa ON br.date = sa.date AND br.symbol = sa.symbol
|
|
WHERE br.return_1d IS NOT NULL OR br.return_at_hold IS NOT NULL
|
|
""")
|
|
rows = cursor.fetchall()
|
|
|
|
if not rows:
|
|
return empty
|
|
|
|
# Compute accuracy using sanitized decisions and primaryReturn logic
|
|
total = 0
|
|
correct = 0
|
|
by_decision = {'BUY': {'total': 0, 'correct': 0}, 'SELL': {'total': 0, 'correct': 0}, 'HOLD': {'total': 0, 'correct': 0}}
|
|
|
|
for row in rows:
|
|
decision = sanitize_decision(row['sa_decision'])
|
|
primary_return = row['return_at_hold'] if row['return_at_hold'] is not None else row['return_1d']
|
|
if primary_return is None:
|
|
continue
|
|
|
|
total += 1
|
|
if decision in by_decision:
|
|
by_decision[decision]['total'] += 1
|
|
|
|
if decision in ('BUY', 'HOLD'):
|
|
is_correct = primary_return > 0
|
|
elif decision == 'SELL':
|
|
is_correct = primary_return < 0
|
|
else:
|
|
continue
|
|
|
|
if is_correct:
|
|
correct += 1
|
|
if decision in by_decision:
|
|
by_decision[decision]['correct'] += 1
|
|
|
|
# Build response
|
|
for d in by_decision:
|
|
t = by_decision[d]['total']
|
|
c = by_decision[d]['correct']
|
|
by_decision[d]['accuracy'] = round(c / t * 100, 1) if t > 0 else 0
|
|
|
|
return {
|
|
'overall_accuracy': round(correct / total * 100, 1) if total > 0 else 0,
|
|
'total_predictions': total,
|
|
'correct_predictions': correct,
|
|
'by_decision': by_decision,
|
|
'by_confidence': {}
|
|
}
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def compute_stock_rankings(date: str):
|
|
"""Compute and store rank (1..N) for all stocks analyzed on a given date.
|
|
|
|
Uses a deterministic composite score:
|
|
decision: BUY=30, HOLD=15, SELL=0
|
|
confidence: HIGH=20, MEDIUM=10, LOW=0
|
|
risk (inv): LOW=15, MEDIUM=8, HIGH=0
|
|
hold bonus: BUY with short hold gets up to +5
|
|
|
|
Score range: 0-70. Sorted descending; ties broken alphabetically.
|
|
"""
|
|
DECISION_W = {'BUY': 30, 'HOLD': 15, 'SELL': 0}
|
|
CONFIDENCE_W = {'HIGH': 20, 'MEDIUM': 10, 'LOW': 0}
|
|
RISK_W = {'LOW': 15, 'MEDIUM': 8, 'HIGH': 0}
|
|
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("""
|
|
SELECT id, symbol, decision, confidence, risk, hold_days
|
|
FROM stock_analysis WHERE date = ?
|
|
""", (date,))
|
|
rows = cursor.fetchall()
|
|
|
|
if not rows:
|
|
return
|
|
|
|
scored = []
|
|
for row in rows:
|
|
decision = sanitize_decision(row['decision'])
|
|
confidence = (row['confidence'] or 'MEDIUM').upper()
|
|
risk = (row['risk'] or 'MEDIUM').upper()
|
|
hold_days = row['hold_days']
|
|
|
|
score = DECISION_W.get(decision, 0)
|
|
score += CONFIDENCE_W.get(confidence, 0)
|
|
score += RISK_W.get(risk, 0)
|
|
|
|
# Hold days bonus: BUY with shorter hold = more immediate opportunity
|
|
if decision == 'BUY' and hold_days and hold_days > 0:
|
|
if hold_days <= 5:
|
|
score += 5
|
|
elif hold_days <= 10:
|
|
score += 4
|
|
elif hold_days <= 15:
|
|
score += 3
|
|
elif hold_days <= 20:
|
|
score += 2
|
|
else:
|
|
score += 1
|
|
|
|
scored.append((row['id'], row['symbol'], score))
|
|
|
|
# Sort by score descending, then symbol ascending for ties
|
|
scored.sort(key=lambda x: (-x[2], x[1]))
|
|
|
|
for rank, (row_id, _symbol, _score) in enumerate(scored, start=1):
|
|
cursor.execute(
|
|
"UPDATE stock_analysis SET rank = ? WHERE id = ?",
|
|
(rank, row_id)
|
|
)
|
|
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def update_daily_recommendation_summary(date: str):
|
|
"""Auto-create/update daily_recommendations from stock_analysis for a date.
|
|
|
|
Computes rankings first, then counts BUY/SELL/HOLD decisions, generates
|
|
rank-ordered top_picks and stocks_to_avoid, and upserts the row.
|
|
"""
|
|
# Compute rankings first so top_picks/stocks_to_avoid use rank order
|
|
compute_stock_rankings(date)
|
|
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
# Get all stock analyses ordered by rank
|
|
cursor.execute("""
|
|
SELECT symbol, company_name, decision, confidence, risk, raw_analysis, rank
|
|
FROM stock_analysis WHERE date = ?
|
|
ORDER BY rank ASC NULLS LAST
|
|
""", (date,))
|
|
rows = cursor.fetchall()
|
|
|
|
if not rows:
|
|
return
|
|
|
|
buy_count = 0
|
|
sell_count = 0
|
|
hold_count = 0
|
|
buy_stocks = []
|
|
sell_stocks = []
|
|
|
|
for row in rows:
|
|
decision = sanitize_decision(row['decision'])
|
|
if decision == 'BUY':
|
|
buy_count += 1
|
|
buy_stocks.append({
|
|
'symbol': row['symbol'],
|
|
'company_name': row['company_name'] or row['symbol'],
|
|
'confidence': row['confidence'] or 'MEDIUM',
|
|
'reason': (row['raw_analysis'] or '')[:200],
|
|
'rank': row['rank']
|
|
})
|
|
elif decision == 'SELL':
|
|
sell_count += 1
|
|
sell_stocks.append({
|
|
'symbol': row['symbol'],
|
|
'company_name': row['company_name'] or row['symbol'],
|
|
'confidence': row['confidence'] or 'MEDIUM',
|
|
'reason': (row['raw_analysis'] or '')[:200],
|
|
'rank': row['rank']
|
|
})
|
|
else:
|
|
hold_count += 1
|
|
|
|
total = buy_count + sell_count + hold_count
|
|
|
|
# Top picks: top 5 BUY stocks by rank (already rank-sorted)
|
|
top_picks = [
|
|
{'symbol': s['symbol'], 'company_name': s['company_name'],
|
|
'confidence': s['confidence'], 'reason': s['reason'],
|
|
'rank': s['rank']}
|
|
for s in buy_stocks[:5]
|
|
]
|
|
|
|
# Stocks to avoid: bottom-ranked SELL stocks (last 5)
|
|
stocks_to_avoid = [
|
|
{'symbol': s['symbol'], 'company_name': s['company_name'],
|
|
'confidence': s['confidence'], 'reason': s['reason'],
|
|
'rank': s['rank']}
|
|
for s in sell_stocks[-5:]
|
|
]
|
|
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO daily_recommendations
|
|
(date, summary_total, summary_buy, summary_sell, summary_hold, top_picks, stocks_to_avoid)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date, total, buy_count, sell_count, hold_count,
|
|
json.dumps(top_picks),
|
|
json.dumps(stocks_to_avoid)
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def rebuild_all_daily_recommendations():
|
|
"""Rebuild daily_recommendations for all dates that have stock_analysis data.
|
|
|
|
This ensures dates with stock_analysis but missing daily_recommendations
|
|
entries become visible to the API.
|
|
"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("SELECT DISTINCT date FROM stock_analysis")
|
|
dates = [row['date'] for row in cursor.fetchall()]
|
|
finally:
|
|
conn.close()
|
|
|
|
for date in dates:
|
|
update_daily_recommendation_summary(date)
|
|
|
|
if dates:
|
|
print(f"[DB] Rebuilt daily_recommendations for {len(dates)} dates: {sorted(dates)}")
|
|
|
|
|
|
# Initialize database on module import
|
|
init_db()
|