chore: linter formatting + ML scanner logging, prompt control, ranker reasoning

- Add ML signal scanner results table logging
- Add log_prompts_console config flag for prompt visibility control
- Expand ranker investment thesis to 4-6 sentence structured reasoning
- Linter auto-formatting across modified files

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Youssef Aitousarrah 2026-02-09 23:04:38 -08:00
parent 43bdd6de11
commit cb5ae49501
29 changed files with 1368 additions and 126 deletions

View File

@ -1,3 +1,4 @@
-e .
typing-extensions typing-extensions
langchain-openai langchain-openai
langchain-experimental langchain-experimental

View File

@ -166,7 +166,9 @@ def analyze_insider_transactions(ticker: str, save_csv: bool = False, output_dir
if pd.notna(row["Value"]) and row["Value"] > 0 if pd.notna(row["Value"]) and row["Value"] > 0
else f"{'N/A':>16}" else f"{'N/A':>16}"
) )
logger.info(f" {row['Transaction']:15} | {row['Shares']:>12,.0f} shares | {value_str}") logger.info(
f" {row['Transaction']:15} | {row['Shares']:>12,.0f} shares | {value_str}"
)
# ============================================================ # ============================================================
# OVERALL SENTIMENT # OVERALL SENTIMENT
@ -206,12 +208,16 @@ def analyze_insider_transactions(ticker: str, save_csv: bool = False, output_dir
) )
logger.info(f"Total Sales: {sales_count:>5} transactions | ${total_sales:>15,.0f}") logger.info(f"Total Sales: {sales_count:>5} transactions | ${total_sales:>15,.0f}")
logger.info(f"Total Purchases: {purchases_count:>5} transactions | ${total_purchases:>15,.0f}") logger.info(
f"Total Purchases: {purchases_count:>5} transactions | ${total_purchases:>15,.0f}"
)
if sentiment == "BULLISH": if sentiment == "BULLISH":
logger.info(f"\n⚡ BULLISH: Insiders are net BUYERS (${net_value:,.0f} net buying)") logger.info(f"\n⚡ BULLISH: Insiders are net BUYERS (${net_value:,.0f} net buying)")
elif sentiment == "BEARISH": elif sentiment == "BEARISH":
logger.info(f"\n⚠️ BEARISH: Significant insider SELLING (${-net_value:,.0f} net selling)") logger.info(
f"\n⚠️ BEARISH: Significant insider SELLING (${-net_value:,.0f} net selling)"
)
elif sentiment == "SLIGHTLY_BEARISH": elif sentiment == "SLIGHTLY_BEARISH":
logger.info( logger.info(
f"\n⚠️ SLIGHTLY BEARISH: More selling than buying (${-net_value:,.0f} net selling)" f"\n⚠️ SLIGHTLY BEARISH: More selling than buying (${-net_value:,.0f} net selling)"
@ -269,7 +275,9 @@ if __name__ == "__main__":
) )
logger.info("Example: python analyze_insider_transactions.py AAPL TSLA NVDA") logger.info("Example: python analyze_insider_transactions.py AAPL TSLA NVDA")
logger.info(" python analyze_insider_transactions.py AAPL --csv") logger.info(" python analyze_insider_transactions.py AAPL --csv")
logger.info(" python analyze_insider_transactions.py AAPL --csv --output-dir ./output") logger.info(
" python analyze_insider_transactions.py AAPL --csv --output-dir ./output"
)
sys.exit(1) sys.exit(1)
# Parse arguments # Parse arguments

View File

@ -18,7 +18,6 @@ import sys
import time import time
from pathlib import Path from pathlib import Path
import numpy as np
import pandas as pd import pandas as pd
# Add project root to path # Add project root to path
@ -40,35 +39,210 @@ logger = get_logger(__name__)
# Can be overridden via --ticker-file # Can be overridden via --ticker-file
DEFAULT_TICKERS = [ DEFAULT_TICKERS = [
# Mega-cap tech # Mega-cap tech
"AAPL", "MSFT", "GOOGL", "AMZN", "NVDA", "META", "TSLA", "AVGO", "ORCL", "CRM", "AAPL",
"AMD", "INTC", "CSCO", "ADBE", "NFLX", "QCOM", "TXN", "AMAT", "MU", "LRCX", "MSFT",
"KLAC", "MRVL", "SNPS", "CDNS", "PANW", "CRWD", "FTNT", "NOW", "UBER", "ABNB", "GOOGL",
"AMZN",
"NVDA",
"META",
"TSLA",
"AVGO",
"ORCL",
"CRM",
"AMD",
"INTC",
"CSCO",
"ADBE",
"NFLX",
"QCOM",
"TXN",
"AMAT",
"MU",
"LRCX",
"KLAC",
"MRVL",
"SNPS",
"CDNS",
"PANW",
"CRWD",
"FTNT",
"NOW",
"UBER",
"ABNB",
# Financials # Financials
"JPM", "BAC", "WFC", "GS", "MS", "C", "SCHW", "BLK", "AXP", "USB", "JPM",
"PNC", "TFC", "COF", "BK", "STT", "FITB", "HBAN", "RF", "CFG", "KEY", "BAC",
"WFC",
"GS",
"MS",
"C",
"SCHW",
"BLK",
"AXP",
"USB",
"PNC",
"TFC",
"COF",
"BK",
"STT",
"FITB",
"HBAN",
"RF",
"CFG",
"KEY",
# Healthcare # Healthcare
"UNH", "JNJ", "LLY", "PFE", "ABBV", "MRK", "TMO", "ABT", "DHR", "BMY", "UNH",
"AMGN", "GILD", "ISRG", "VRTX", "REGN", "MDT", "SYK", "BSX", "EW", "ZTS", "JNJ",
"LLY",
"PFE",
"ABBV",
"MRK",
"TMO",
"ABT",
"DHR",
"BMY",
"AMGN",
"GILD",
"ISRG",
"VRTX",
"REGN",
"MDT",
"SYK",
"BSX",
"EW",
"ZTS",
# Consumer # Consumer
"WMT", "PG", "KO", "PEP", "COST", "MCD", "NKE", "SBUX", "TGT", "LOW", "WMT",
"HD", "TJX", "ROST", "DG", "DLTR", "EL", "CL", "KMB", "GIS", "K", "PG",
"KO",
"PEP",
"COST",
"MCD",
"NKE",
"SBUX",
"TGT",
"LOW",
"HD",
"TJX",
"ROST",
"DG",
"DLTR",
"EL",
"CL",
"KMB",
"GIS",
"K",
# Energy # Energy
"XOM", "CVX", "COP", "EOG", "SLB", "MPC", "PSX", "VLO", "OXY", "DVN", "XOM",
"HAL", "FANG", "HES", "BKR", "KMI", "WMB", "OKE", "ET", "TRGP", "LNG", "CVX",
"COP",
"EOG",
"SLB",
"MPC",
"PSX",
"VLO",
"OXY",
"DVN",
"HAL",
"FANG",
"HES",
"BKR",
"KMI",
"WMB",
"OKE",
"ET",
"TRGP",
"LNG",
# Industrials # Industrials
"CAT", "DE", "UNP", "UPS", "HON", "RTX", "BA", "LMT", "GD", "NOC", "CAT",
"GE", "MMM", "EMR", "ITW", "PH", "ROK", "ETN", "SWK", "CMI", "PCAR", "DE",
"UNP",
"UPS",
"HON",
"RTX",
"BA",
"LMT",
"GD",
"NOC",
"GE",
"MMM",
"EMR",
"ITW",
"PH",
"ROK",
"ETN",
"SWK",
"CMI",
"PCAR",
# Materials & Utilities # Materials & Utilities
"LIN", "APD", "ECL", "SHW", "DD", "NEM", "FCX", "VMC", "MLM", "NUE", "LIN",
"NEE", "DUK", "SO", "D", "AEP", "EXC", "SRE", "XEL", "WEC", "ES", "APD",
"ECL",
"SHW",
"DD",
"NEM",
"FCX",
"VMC",
"MLM",
"NUE",
"NEE",
"DUK",
"SO",
"D",
"AEP",
"EXC",
"SRE",
"XEL",
"WEC",
"ES",
# REITs & Telecom # REITs & Telecom
"AMT", "PLD", "CCI", "EQIX", "SPG", "O", "PSA", "DLR", "WELL", "AVB", "AMT",
"T", "VZ", "TMUS", "CHTR", "CMCSA", "PLD",
"CCI",
"EQIX",
"SPG",
"O",
"PSA",
"DLR",
"WELL",
"AVB",
"T",
"VZ",
"TMUS",
"CHTR",
"CMCSA",
# High-volatility / popular retail # High-volatility / popular retail
"COIN", "MARA", "RIOT", "PLTR", "SOFI", "HOOD", "RBLX", "SNAP", "PINS", "SQ", "COIN",
"SHOP", "SE", "ROKU", "DKNG", "PENN", "WYNN", "MGM", "LVS", "DASH", "TTD", "MARA",
"RIOT",
"PLTR",
"SOFI",
"HOOD",
"RBLX",
"SNAP",
"PINS",
"SQ",
"SHOP",
"SE",
"ROKU",
"DKNG",
"PENN",
"WYNN",
"MGM",
"LVS",
"DASH",
"TTD",
# Biotech # Biotech
"MRNA", "BNTX", "BIIB", "SGEN", "ALNY", "BMRN", "EXAS", "DXCM", "HZNP", "INCY", "MRNA",
"BNTX",
"BIIB",
"SGEN",
"ALNY",
"BMRN",
"EXAS",
"DXCM",
"HZNP",
"INCY",
] ]
OUTPUT_DIR = Path("data/ml") OUTPUT_DIR = Path("data/ml")
@ -221,10 +395,16 @@ def build_dataset(
logger.info(f"\n{'='*60}") logger.info(f"\n{'='*60}")
logger.info(f"Dataset built: {len(dataset)} total samples from {len(all_data)} tickers") logger.info(f"Dataset built: {len(dataset)} total samples from {len(all_data)} tickers")
logger.info(f"Label distribution:") logger.info("Label distribution:")
logger.info(f" WIN (+1): {int((dataset['label'] == 1).sum()):>7} ({(dataset['label'] == 1).mean()*100:.1f}%)") logger.info(
logger.info(f" LOSS (-1): {int((dataset['label'] == -1).sum()):>7} ({(dataset['label'] == -1).mean()*100:.1f}%)") f" WIN (+1): {int((dataset['label'] == 1).sum()):>7} ({(dataset['label'] == 1).mean()*100:.1f}%)"
logger.info(f" TIMEOUT: {int((dataset['label'] == 0).sum()):>7} ({(dataset['label'] == 0).mean()*100:.1f}%)") )
logger.info(
f" LOSS (-1): {int((dataset['label'] == -1).sum()):>7} ({(dataset['label'] == -1).mean()*100:.1f}%)"
)
logger.info(
f" TIMEOUT: {int((dataset['label'] == 0).sum()):>7} ({(dataset['label'] == 0).mean()*100:.1f}%)"
)
logger.info(f"Features: {len(FEATURE_COLUMNS)}") logger.info(f"Features: {len(FEATURE_COLUMNS)}")
logger.info(f"{'='*60}") logger.info(f"{'='*60}")
@ -233,12 +413,20 @@ def build_dataset(
def main(): def main():
parser = argparse.ArgumentParser(description="Build ML training dataset") parser = argparse.ArgumentParser(description="Build ML training dataset")
parser.add_argument("--stocks", type=int, default=None, help="Limit to N stocks from default universe") parser.add_argument(
parser.add_argument("--ticker-file", type=str, default=None, help="File with tickers (one per line)") "--stocks", type=int, default=None, help="Limit to N stocks from default universe"
)
parser.add_argument(
"--ticker-file", type=str, default=None, help="File with tickers (one per line)"
)
parser.add_argument("--start", type=str, default="2022-01-01", help="Start date (YYYY-MM-DD)") parser.add_argument("--start", type=str, default="2022-01-01", help="Start date (YYYY-MM-DD)")
parser.add_argument("--end", type=str, default="2025-12-31", help="End date (YYYY-MM-DD)") parser.add_argument("--end", type=str, default="2025-12-31", help="End date (YYYY-MM-DD)")
parser.add_argument("--profit-target", type=float, default=0.05, help="Profit target fraction (default: 0.05)") parser.add_argument(
parser.add_argument("--stop-loss", type=float, default=0.03, help="Stop loss fraction (default: 0.03)") "--profit-target", type=float, default=0.05, help="Profit target fraction (default: 0.05)"
)
parser.add_argument(
"--stop-loss", type=float, default=0.03, help="Stop loss fraction (default: 0.03)"
)
parser.add_argument("--holding-days", type=int, default=7, help="Max holding days (default: 7)") parser.add_argument("--holding-days", type=int, default=7, help="Max holding days (default: 7)")
parser.add_argument("--output", type=str, default=None, help="Output parquet path") parser.add_argument("--output", type=str, default=None, help="Output parquet path")
args = parser.parse_args() args = parser.parse_args()
@ -246,7 +434,9 @@ def main():
# Determine ticker list # Determine ticker list
if args.ticker_file: if args.ticker_file:
with open(args.ticker_file) as f: with open(args.ticker_file) as f:
tickers = [line.strip().upper() for line in f if line.strip() and not line.startswith("#")] tickers = [
line.strip().upper() for line in f if line.strip() and not line.startswith("#")
]
logger.info(f"Loaded {len(tickers)} tickers from {args.ticker_file}") logger.info(f"Loaded {len(tickers)} tickers from {args.ticker_file}")
else: else:
tickers = DEFAULT_TICKERS tickers = DEFAULT_TICKERS

View File

@ -11,7 +11,6 @@ This script creates memory sets optimized for:
import os import os
import sys import sys
from pathlib import Path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

View File

@ -17,7 +17,6 @@ import json
import os import os
import sys import sys
from datetime import datetime from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List from typing import Any, Dict, List
# Add parent directory to path # Add parent directory to path

View File

@ -80,13 +80,16 @@ def time_split(
if max_train_samples is not None and len(train) > max_train_samples: if max_train_samples is not None and len(train) > max_train_samples:
train = train.sort_values("date").tail(max_train_samples) train = train.sort_values("date").tail(max_train_samples)
logger.info( logger.info(
f"Limiting training samples to most recent {max_train_samples} " f"Limiting training samples to most recent {max_train_samples} " f"before {val_start}"
f"before {val_start}"
) )
logger.info(f"Time-based split at {val_start}:") logger.info(f"Time-based split at {val_start}:")
logger.info(f" Train: {len(train)} samples ({train['date'].min().date()} to {train['date'].max().date()})") logger.info(
logger.info(f" Val: {len(val)} samples ({val['date'].min().date()} to {val['date'].max().date()})") f" Train: {len(train)} samples ({train['date'].min().date()} to {train['date'].max().date()})"
)
logger.info(
f" Val: {len(val)} samples ({val['date'].min().date()} to {val['date'].max().date()})"
)
X_train = train[FEATURE_COLUMNS].values X_train = train[FEATURE_COLUMNS].values
y_train = train["label"].values.astype(int) y_train = train["label"].values.astype(int)
@ -152,8 +155,12 @@ def train_lightgbm(X_train, y_train, X_val, y_val):
class_weight = {c: total / (n_classes * count) for c, count in class_counts.items()} class_weight = {c: total / (n_classes * count) for c, count in class_counts.items()}
sample_weights = np.array([class_weight[y] for y in y_train_mapped]) sample_weights = np.array([class_weight[y] for y in y_train_mapped])
train_data = lgb.Dataset(X_train, label=y_train_mapped, weight=sample_weights, feature_name=FEATURE_COLUMNS) train_data = lgb.Dataset(
val_data = lgb.Dataset(X_val, label=y_val_mapped, feature_name=FEATURE_COLUMNS, reference=train_data) X_train, label=y_train_mapped, weight=sample_weights, feature_name=FEATURE_COLUMNS
)
val_data = lgb.Dataset(
X_val, label=y_val_mapped, feature_name=FEATURE_COLUMNS, reference=train_data
)
params = { params = {
"objective": "multiclass", "objective": "multiclass",
@ -209,7 +216,8 @@ def evaluate(model, X_val, y_val, model_type: str) -> dict:
accuracy = accuracy_score(y_val, y_pred) accuracy = accuracy_score(y_val, y_pred)
report = classification_report( report = classification_report(
y_val, y_pred, y_val,
y_pred,
target_names=["LOSS (-1)", "TIMEOUT (0)", "WIN (+1)"], target_names=["LOSS (-1)", "TIMEOUT (0)", "WIN (+1)"],
output_dict=True, output_dict=True,
) )
@ -253,13 +261,21 @@ def evaluate(model, X_val, y_val, model_type: str) -> dict:
# Top decile (top 10% by P(WIN)) — most actionable metric # Top decile (top 10% by P(WIN)) — most actionable metric
top_decile_threshold = np.percentile(win_probs_all, 90) top_decile_threshold = np.percentile(win_probs_all, 90)
top_decile_mask = win_probs_all >= top_decile_threshold top_decile_mask = win_probs_all >= top_decile_threshold
top_decile_win_rate = float((y_val[top_decile_mask] == 1).mean()) if top_decile_mask.sum() > 0 else 0.0 top_decile_win_rate = (
top_decile_loss_rate = float((y_val[top_decile_mask] == -1).mean()) if top_decile_mask.sum() > 0 else 0.0 float((y_val[top_decile_mask] == 1).mean()) if top_decile_mask.sum() > 0 else 0.0
)
top_decile_loss_rate = (
float((y_val[top_decile_mask] == -1).mean()) if top_decile_mask.sum() > 0 else 0.0
)
metrics = { metrics = {
"model_type": model_type, "model_type": model_type,
"accuracy": round(accuracy, 4), "accuracy": round(accuracy, 4),
"per_class": {k: {kk: round(vv, 4) for kk, vv in v.items()} for k, v in report.items() if isinstance(v, dict)}, "per_class": {
k: {kk: round(vv, 4) for kk, vv in v.items()}
for k, v in report.items()
if isinstance(v, dict)
},
"confusion_matrix": cm.tolist(), "confusion_matrix": cm.tolist(),
"avg_win_prob_for_actual_wins": round(avg_win_prob_for_actual_wins, 4), "avg_win_prob_for_actual_wins": round(avg_win_prob_for_actual_wins, 4),
"high_confidence_win_precision": round(high_conf_precision, 4), "high_confidence_win_precision": round(high_conf_precision, 4),
@ -276,25 +292,31 @@ def evaluate(model, X_val, y_val, model_type: str) -> dict:
logger.info(f"\n{'='*60}") logger.info(f"\n{'='*60}")
logger.info(f"Model: {model_type}") logger.info(f"Model: {model_type}")
logger.info(f"Overall Accuracy: {accuracy:.1%}") logger.info(f"Overall Accuracy: {accuracy:.1%}")
logger.info(f"\nPer-class metrics:") logger.info("\nPer-class metrics:")
logger.info(f"{'':>15} {'Precision':>10} {'Recall':>10} {'F1':>10} {'Support':>10}") logger.info(f"{'':>15} {'Precision':>10} {'Recall':>10} {'F1':>10} {'Support':>10}")
for label, name in [(-1, "LOSS"), (0, "TIMEOUT"), (1, "WIN")]: for label, name in [(-1, "LOSS"), (0, "TIMEOUT"), (1, "WIN")]:
key = f"{name} ({label:+d})" key = f"{name} ({label:+d})"
if key in report: if key in report:
r = report[key] r = report[key]
logger.info(f"{name:>15} {r['precision']:>10.3f} {r['recall']:>10.3f} {r['f1-score']:>10.3f} {r['support']:>10.0f}") logger.info(
f"{name:>15} {r['precision']:>10.3f} {r['recall']:>10.3f} {r['f1-score']:>10.3f} {r['support']:>10.0f}"
)
logger.info(f"\nConfusion Matrix (rows=actual, cols=predicted):") logger.info("\nConfusion Matrix (rows=actual, cols=predicted):")
logger.info(f"{'':>10} {'LOSS':>8} {'TIMEOUT':>8} {'WIN':>8}") logger.info(f"{'':>10} {'LOSS':>8} {'TIMEOUT':>8} {'WIN':>8}")
for i, name in enumerate(["LOSS", "TIMEOUT", "WIN"]): for i, name in enumerate(["LOSS", "TIMEOUT", "WIN"]):
logger.info(f"{name:>10} {cm[i][0]:>8} {cm[i][1]:>8} {cm[i][2]:>8}") logger.info(f"{name:>10} {cm[i][0]:>8} {cm[i][1]:>8} {cm[i][2]:>8}")
logger.info(f"\nWin-class insights:") logger.info("\nWin-class insights:")
logger.info(f" Avg P(WIN) for actual winners: {avg_win_prob_for_actual_wins:.1%}") logger.info(f" Avg P(WIN) for actual winners: {avg_win_prob_for_actual_wins:.1%}")
logger.info(f" High-confidence (>60%) precision: {high_conf_precision:.1%} ({high_conf_count} samples)") logger.info(
f" High-confidence (>60%) precision: {high_conf_precision:.1%} ({high_conf_count} samples)"
)
logger.info("\nCalibration (does higher P(WIN) = more actual wins?):") logger.info("\nCalibration (does higher P(WIN) = more actual wins?):")
logger.info(f"{'Quintile':>10} {'Avg P(WIN)':>12} {'Actual WIN%':>12} {'Actual LOSS%':>13} {'Count':>8}") logger.info(
f"{'Quintile':>10} {'Avg P(WIN)':>12} {'Actual WIN%':>12} {'Actual LOSS%':>13} {'Count':>8}"
)
for q_name, q_data in calibration.items(): for q_name, q_data in calibration.items():
logger.info( logger.info(
f"{q_name:>10} {q_data['mean_predicted_win_prob']:>12.1%} " f"{q_name:>10} {q_data['mean_predicted_win_prob']:>12.1%} "
@ -304,7 +326,9 @@ def evaluate(model, X_val, y_val, model_type: str) -> dict:
logger.info("\nTop decile (top 10% by P(WIN)):") logger.info("\nTop decile (top 10% by P(WIN)):")
logger.info(f" Threshold: P(WIN) >= {top_decile_threshold:.1%}") logger.info(f" Threshold: P(WIN) >= {top_decile_threshold:.1%}")
logger.info(f" Actual win rate: {top_decile_win_rate:.1%} ({int(top_decile_mask.sum())} samples)") logger.info(
f" Actual win rate: {top_decile_win_rate:.1%} ({int(top_decile_mask.sum())} samples)"
)
logger.info(f" Actual loss rate: {top_decile_loss_rate:.1%}") logger.info(f" Actual loss rate: {top_decile_loss_rate:.1%}")
baseline_win = float((y_val == 1).mean()) baseline_win = float((y_val == 1).mean())
logger.info(f" Baseline win rate: {baseline_win:.1%}") logger.info(f" Baseline win rate: {baseline_win:.1%}")
@ -318,12 +342,25 @@ def evaluate(model, X_val, y_val, model_type: str) -> dict:
def main(): def main():
parser = argparse.ArgumentParser(description="Train ML model for win probability") parser = argparse.ArgumentParser(description="Train ML model for win probability")
parser.add_argument("--dataset", type=str, default="data/ml/training_dataset.parquet") parser.add_argument("--dataset", type=str, default="data/ml/training_dataset.parquet")
parser.add_argument("--model", type=str, choices=["tabpfn", "lightgbm", "auto"], default="auto", parser.add_argument(
help="Model type (auto tries TabPFN first, falls back to LightGBM)") "--model",
parser.add_argument("--val-start", type=str, default="2024-07-01", type=str,
help="Validation split date (default: 2024-07-01)") choices=["tabpfn", "lightgbm", "auto"],
parser.add_argument("--max-train-samples", type=int, default=None, default="auto",
help="Limit training samples to the most recent N before val-start") help="Model type (auto tries TabPFN first, falls back to LightGBM)",
)
parser.add_argument(
"--val-start",
type=str,
default="2024-07-01",
help="Validation split date (default: 2024-07-01)",
)
parser.add_argument(
"--max-train-samples",
type=int,
default=None,
help="Limit training samples to the most recent N before val-start",
)
parser.add_argument("--output-dir", type=str, default="data/ml") parser.add_argument("--output-dir", type=str, default="data/ml")
args = parser.parse_args() args = parser.parse_args()

File diff suppressed because one or more lines are too long

View File

@ -68,6 +68,8 @@ Choose BUY or SELL (no HOLD). If the edge is unclear, pick the less-bad side and
response_text = parse_llm_response(response.content) response_text = parse_llm_response(response.content)
argument = f"Neutral Analyst: {response_text}" argument = f"Neutral Analyst: {response_text}"
return {"risk_debate_state": update_risk_debate_state(risk_debate_state, argument, "Neutral")} return {
"risk_debate_state": update_risk_debate_state(risk_debate_state, argument, "Neutral")
}
return neutral_node return neutral_node

View File

@ -1,4 +1,4 @@
from typing import Any, Callable, Dict, List from typing import Any, Callable, Dict
from langchain_core.messages import HumanMessage, RemoveMessage from langchain_core.messages import HumanMessage, RemoveMessage
@ -95,9 +95,7 @@ def update_risk_debate_state(
"count": debate_state["count"] + 1, "count": debate_state["count"] + 1,
} }
# Append to the speaker's own history and set their current response # Append to the speaker's own history and set their current response
new_state[f"{role_key}_history"] = ( new_state[f"{role_key}_history"] = debate_state.get(f"{role_key}_history", "") + "\n" + argument
debate_state.get(f"{role_key}_history", "") + "\n" + argument
)
new_state[f"current_{role_key}_response"] = argument new_state[f"current_{role_key}_response"] = argument
return new_state return new_state

View File

@ -203,7 +203,9 @@ class HistoricalMemoryBuilder:
except (IndexError, KeyError): except (IndexError, KeyError):
continue continue
logger.info(f"Found {len([m for m in high_movers if m['ticker'] == ticker])} moves for {ticker}") logger.info(
f"Found {len([m for m in high_movers if m['ticker'] == ticker])} moves for {ticker}"
)
else: else:
logger.debug(f"{ticker}: No significant moves") logger.debug(f"{ticker}: No significant moves")
@ -440,7 +442,9 @@ class HistoricalMemoryBuilder:
high_movers = self.find_high_movers(tickers, start_date, end_date, min_move_pct) high_movers = self.find_high_movers(tickers, start_date, end_date, min_move_pct)
if not high_movers: if not high_movers:
logger.warning("⚠️ No high movers found. Try a different date range or lower threshold.") logger.warning(
"⚠️ No high movers found. Try a different date range or lower threshold."
)
return {} return {}
# Step 1.5: Sample/filter high movers based on strategy # Step 1.5: Sample/filter high movers based on strategy
@ -449,7 +453,9 @@ class HistoricalMemoryBuilder:
logger.info(f"📊 Sampling Strategy: {sample_strategy}") logger.info(f"📊 Sampling Strategy: {sample_strategy}")
logger.info(f"Total high movers found: {len(high_movers)}") logger.info(f"Total high movers found: {len(high_movers)}")
logger.info(f"Samples to analyze: {len(sampled_movers)}") logger.info(f"Samples to analyze: {len(sampled_movers)}")
logger.info(f"Estimated runtime: ~{len(sampled_movers) * len(analysis_windows) * 2} minutes") logger.info(
f"Estimated runtime: ~{len(sampled_movers) * len(analysis_windows) * 2} minutes"
)
# Initialize memory stores # Initialize memory stores
agent_memories = { agent_memories = {

View File

@ -11,6 +11,7 @@ from pathlib import Path
from typing import Annotated, Dict, List, Optional, Union from typing import Annotated, Dict, List, Optional, Union
import pandas as pd import pandas as pd
from tradingagents.dataflows.y_finance import _get_ticker_universe, get_ticker_history from tradingagents.dataflows.y_finance import _get_ticker_universe, get_ticker_history
from tradingagents.utils.logger import get_logger from tradingagents.utils.logger import get_logger
@ -460,7 +461,9 @@ def download_volume_data(
logger.info("Skipping cache (use_cache=False), forcing fresh download...") logger.info("Skipping cache (use_cache=False), forcing fresh download...")
# Download fresh data # Download fresh data
logger.info(f"Downloading {history_period_days} days of volume data for {len(tickers)} tickers...") logger.info(
f"Downloading {history_period_days} days of volume data for {len(tickers)} tickers..."
)
raw_data = {} raw_data = {}
with ThreadPoolExecutor(max_workers=15) as executor: with ThreadPoolExecutor(max_workers=15) as executor:

View File

@ -349,7 +349,9 @@ class DiscoveryAnalytics:
indent=2, indent=2,
) )
logger.info(f" 📊 Saved {len(enriched_rankings)} recommendations for tracking: {output_file}") logger.info(
f" 📊 Saved {len(enriched_rankings)} recommendations for tracking: {output_file}"
)
def save_discovery_results(self, state: dict, trade_date: str, config: Dict[str, Any]): def save_discovery_results(self, state: dict, trade_date: str, config: Dict[str, Any]):
"""Save full discovery results and tool logs.""" """Save full discovery results and tool logs."""

View File

@ -158,9 +158,7 @@ class DiscoveryConfig:
max_candidates_to_analyze=disc.get( max_candidates_to_analyze=disc.get(
"max_candidates_to_analyze", _rd.max_candidates_to_analyze "max_candidates_to_analyze", _rd.max_candidates_to_analyze
), ),
analyze_all_candidates=disc.get( analyze_all_candidates=disc.get("analyze_all_candidates", _rd.analyze_all_candidates),
"analyze_all_candidates", _rd.analyze_all_candidates
),
final_recommendations=disc.get("final_recommendations", _rd.final_recommendations), final_recommendations=disc.get("final_recommendations", _rd.final_recommendations),
truncate_ranking_context=disc.get( truncate_ranking_context=disc.get(
"truncate_ranking_context", _rd.truncate_ranking_context "truncate_ranking_context", _rd.truncate_ranking_context
@ -189,12 +187,8 @@ class DiscoveryConfig:
# Logging # Logging
logging_cfg = LoggingConfig( logging_cfg = LoggingConfig(
log_tool_calls=disc.get("log_tool_calls", _ld.log_tool_calls), log_tool_calls=disc.get("log_tool_calls", _ld.log_tool_calls),
log_tool_calls_console=disc.get( log_tool_calls_console=disc.get("log_tool_calls_console", _ld.log_tool_calls_console),
"log_tool_calls_console", _ld.log_tool_calls_console log_prompts_console=disc.get("log_prompts_console", _ld.log_prompts_console),
),
log_prompts_console=disc.get(
"log_prompts_console", _ld.log_prompts_console
),
tool_log_max_chars=disc.get("tool_log_max_chars", _ld.tool_log_max_chars), tool_log_max_chars=disc.get("tool_log_max_chars", _ld.tool_log_max_chars),
tool_log_exclude=disc.get("tool_log_exclude", _ld.tool_log_exclude), tool_log_exclude=disc.get("tool_log_exclude", _ld.tool_log_exclude),
) )

View File

@ -185,7 +185,9 @@ class CandidateFilter:
# Print consolidated list of failed tickers # Print consolidated list of failed tickers
if failed_tickers: if failed_tickers:
logger.warning(f"⚠️ {len(failed_tickers)} tickers failed data fetch (possibly delisted)") logger.warning(
f"⚠️ {len(failed_tickers)} tickers failed data fetch (possibly delisted)"
)
if len(failed_tickers) <= 10: if len(failed_tickers) <= 10:
logger.warning(f"{', '.join(failed_tickers)}") logger.warning(f"{', '.join(failed_tickers)}")
else: else:
@ -501,7 +503,9 @@ class CandidateFilter:
) )
# Extract short interest from fundamentals (no extra API call) # Extract short interest from fundamentals (no extra API call)
short_pct_raw = fund.get("ShortPercentOfFloat", fund.get("ShortPercentFloat")) short_pct_raw = fund.get(
"ShortPercentOfFloat", fund.get("ShortPercentFloat")
)
short_interest_pct = None short_interest_pct = None
if short_pct_raw and short_pct_raw != "N/A": if short_pct_raw and short_pct_raw != "N/A":
try: try:
@ -747,9 +751,7 @@ class CandidateFilter:
logger.info(f" ❌ No data available: {filtered_reasons['no_data']}") logger.info(f" ❌ No data available: {filtered_reasons['no_data']}")
logger.info(f" ✅ Passed filters: {len(filtered_candidates)}") logger.info(f" ✅ Passed filters: {len(filtered_candidates)}")
def _predict_ml( def _predict_ml(self, cand: Dict[str, Any], ticker: str, end_date: str) -> Any:
self, cand: Dict[str, Any], ticker: str, end_date: str
) -> Any:
"""Run ML win probability prediction for a candidate.""" """Run ML win probability prediction for a candidate."""
# Lazy-load predictor on first call # Lazy-load predictor on first call
if not self._ml_predictor_loaded: if not self._ml_predictor_loaded:
@ -767,10 +769,10 @@ class CandidateFilter:
return None return None
try: try:
from tradingagents.dataflows.y_finance import download_history
from tradingagents.ml.feature_engineering import ( from tradingagents.ml.feature_engineering import (
compute_features_single, compute_features_single,
) )
from tradingagents.dataflows.y_finance import download_history
# Fetch OHLCV for feature computation (needs ~210 rows of history) # Fetch OHLCV for feature computation (needs ~210 rows of history)
ohlcv = download_history( ohlcv = download_history(

View File

@ -52,7 +52,9 @@ class StockRanking(BaseModel):
strategy_match: str = Field(description="Strategy that matched") strategy_match: str = Field(description="Strategy that matched")
final_score: int = Field(description="Score 0-100") final_score: int = Field(description="Score 0-100")
confidence: int = Field(description="Confidence 1-10") confidence: int = Field(description="Confidence 1-10")
reason: str = Field(description="Detailed investment thesis (4-6 sentences) defending the trade with specific catalysts, risk/reward, and timing") reason: str = Field(
description="Detailed investment thesis (4-6 sentences) defending the trade with specific catalysts, risk/reward, and timing"
)
description: str = Field(description="Company description") description: str = Field(description="Company description")

View File

@ -5,10 +5,10 @@ from . import (
earnings_calendar, # noqa: F401 earnings_calendar, # noqa: F401
insider_buying, # noqa: F401 insider_buying, # noqa: F401
market_movers, # noqa: F401 market_movers, # noqa: F401
ml_signal, # noqa: F401
options_flow, # noqa: F401 options_flow, # noqa: F401
reddit_dd, # noqa: F401 reddit_dd, # noqa: F401
reddit_trending, # noqa: F401 reddit_trending, # noqa: F401
semantic_news, # noqa: F401 semantic_news, # noqa: F401
volume_accumulation, # noqa: F401 volume_accumulation, # noqa: F401
ml_signal, # noqa: F401
) )

View File

@ -7,7 +7,6 @@ Default: data/tickers.txt. Override via config: discovery.scanners.ml_signal.tic
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
import numpy as np
import pandas as pd import pandas as pd
from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner
@ -109,7 +108,9 @@ class MLSignalScanner(BaseScanner):
# Log individual candidate results # Log individual candidate results
if candidates: if candidates:
header = f"{'Ticker':<8} {'P(WIN)':>8} {'P(LOSS)':>9} {'Prediction':>12} {'Priority':>10}" header = (
f"{'Ticker':<8} {'P(WIN)':>8} {'P(LOSS)':>9} {'Prediction':>12} {'Priority':>10}"
)
separator = "-" * len(header) separator = "-" * len(header)
lines = ["\n ML Signal Scanner Results:", f" {header}", f" {separator}"] lines = ["\n ML Signal Scanner Results:", f" {header}", f" {separator}"]
for c in candidates: for c in candidates:
@ -143,7 +144,9 @@ class MLSignalScanner(BaseScanner):
try: try:
from tradingagents.dataflows.y_finance import download_history from tradingagents.dataflows.y_finance import download_history
logger.info(f"Batch-downloading {len(self.universe)} tickers ({self.lookback_period})...") logger.info(
f"Batch-downloading {len(self.universe)} tickers ({self.lookback_period})..."
)
# yfinance batch download — single HTTP request for all tickers # yfinance batch download — single HTTP request for all tickers
raw = download_history( raw = download_history(

View File

@ -4,9 +4,8 @@ from typing import Annotated, Any, Dict
import finnhub import finnhub
from dotenv import load_dotenv from dotenv import load_dotenv
from tradingagents.utils.logger import get_logger
from tradingagents.config import config from tradingagents.config import config
from tradingagents.utils.logger import get_logger
load_dotenv() load_dotenv()

View File

@ -7,11 +7,11 @@ import pandas as pd
from dateutil.relativedelta import relativedelta from dateutil.relativedelta import relativedelta
from tqdm import tqdm from tqdm import tqdm
from tradingagents.utils.logger import get_logger
from .config import DATA_DIR from .config import DATA_DIR
from .reddit_utils import fetch_top_from_category from .reddit_utils import fetch_top_from_category
from tradingagents.utils.logger import get_logger
logger = get_logger(__name__) logger = get_logger(__name__)

View File

@ -807,11 +807,15 @@ Return as JSON with "news" array."""
logger.info(f"Found {len(google_news)} items from Google News") logger.info(f"Found {len(google_news)} items from Google News")
min_date, max_date = self._publish_date_range(google_news) min_date, max_date = self._publish_date_range(google_news)
if min_date: if min_date:
logger.debug(f"Min publish date (Google News): {min_date.strftime('%Y-%m-%d %H:%M')}") logger.debug(
f"Min publish date (Google News): {min_date.strftime('%Y-%m-%d %H:%M')}"
)
else: else:
logger.debug("Min publish date (Google News): N/A") logger.debug("Min publish date (Google News): N/A")
if max_date: if max_date:
logger.debug(f"Max publish date (Google News): {max_date.strftime('%Y-%m-%d %H:%M')}") logger.debug(
f"Max publish date (Google News): {max_date.strftime('%Y-%m-%d %H:%M')}"
)
else: else:
logger.debug("Max publish date (Google News): N/A") logger.debug("Max publish date (Google News): N/A")
@ -837,11 +841,15 @@ Return as JSON with "news" array."""
logger.info(f"Found {len(av_news)} items from Alpha Vantage") logger.info(f"Found {len(av_news)} items from Alpha Vantage")
min_date, max_date = self._publish_date_range(av_news) min_date, max_date = self._publish_date_range(av_news)
if min_date: if min_date:
logger.debug(f"Min publish date (Alpha Vantage): {min_date.strftime('%Y-%m-%d %H:%M')}") logger.debug(
f"Min publish date (Alpha Vantage): {min_date.strftime('%Y-%m-%d %H:%M')}"
)
else: else:
logger.debug("Min publish date (Alpha Vantage): N/A") logger.debug("Min publish date (Alpha Vantage): N/A")
if max_date: if max_date:
logger.debug(f"Max publish date (Alpha Vantage): {max_date.strftime('%Y-%m-%d %H:%M')}") logger.debug(
f"Max publish date (Alpha Vantage): {max_date.strftime('%Y-%m-%d %H:%M')}"
)
else: else:
logger.debug("Max publish date (Alpha Vantage): N/A") logger.debug("Max publish date (Alpha Vantage): N/A")

View File

@ -493,7 +493,9 @@ Extract all stock ticker symbols mentioned in the post or comments."""
# Handle None result (Gemini blocked content despite safety settings) # Handle None result (Gemini blocked content despite safety settings)
if result is None: if result is None:
logger.warning(f"⚠️ Content blocked for '{post['title'][:50]}...' - Skipping") logger.warning(
f"⚠️ Content blocked for '{post['title'][:50]}...' - Skipping"
)
post["quality_score"] = 0 post["quality_score"] = 0
post["quality_reason"] = ( post["quality_reason"] = (
"Content blocked by LLM safety filter. " "Content blocked by LLM safety filter. "

View File

@ -286,9 +286,7 @@ class DiscoveryGraph:
else: else:
self._add_context(incoming_context, existing, prepend=False) self._add_context(incoming_context, existing, prepend=False)
def _add_context( def _add_context(self, new_context: str, candidate: Dict[str, Any], *, prepend: bool) -> None:
self, new_context: str, candidate: Dict[str, Any], *, prepend: bool
) -> None:
""" """
Add context string to a candidate's context fields. Add context string to a candidate's context fields.
@ -492,7 +490,9 @@ class DiscoveryGraph:
try: try:
# Get result with per-scanner timeout # Get result with per-scanner timeout
name, pipeline, candidates, error, scanner_logs = future.result(timeout=timeout_seconds) name, pipeline, candidates, error, scanner_logs = future.result(
timeout=timeout_seconds
)
# Initialize pipeline list if needed # Initialize pipeline list if needed
if pipeline not in pipeline_candidates: if pipeline not in pipeline_candidates:

View File

@ -324,11 +324,7 @@ def _extract_close_series(data: Any) -> Any:
if isinstance(data.columns, pd.MultiIndex): if isinstance(data.columns, pd.MultiIndex):
if "Close" in data.columns.get_level_values(0): if "Close" in data.columns.get_level_values(0):
close_data = data["Close"] close_data = data["Close"]
series = ( series = close_data.iloc[:, 0] if isinstance(close_data, pd.DataFrame) else close_data
close_data.iloc[:, 0]
if isinstance(close_data, pd.DataFrame)
else close_data
)
elif "Close" in data.columns: elif "Close" in data.columns:
series = data["Close"] series = data["Close"]

View File

@ -14,7 +14,6 @@ from tradingagents.default_config import DEFAULT_CONFIG
# Import tools from new registry-based system # Import tools from new registry-based system
from tradingagents.tools.generator import get_agent_tools from tradingagents.tools.generator import get_agent_tools
from tradingagents.utils.logger import get_logger from tradingagents.utils.logger import get_logger
from .conditional_logic import ConditionalLogic from .conditional_logic import ConditionalLogic

View File

@ -132,9 +132,7 @@ def compute_features_bulk(ohlcv: pd.DataFrame, market_cap: Optional[float] = Non
# 7. Position within Bollinger Bands (0 = lower band, 1 = upper band) # 7. Position within Bollinger Bands (0 = lower band, 1 = upper band)
bb_range = bb_upper - bb_lower bb_range = bb_upper - bb_lower
features["bb_position"] = np.where( features["bb_position"] = np.where(bb_range > 0, (close - bb_lower) / bb_range, 0.5)
bb_range > 0, (close - bb_lower) / bb_range, 0.5
)
# 8. ADX (trend strength) # 8. ADX (trend strength)
features["adx"] = ss["dx_14"] features["adx"] = ss["dx_14"]
@ -181,7 +179,9 @@ def compute_features_bulk(ohlcv: pd.DataFrame, market_cap: Optional[float] = Non
# 21. Momentum × Compression: strong trend direction + tight Bollinger = breakout setup # 21. Momentum × Compression: strong trend direction + tight Bollinger = breakout setup
# High absolute MACD + low BB width = coiled spring # High absolute MACD + low BB width = coiled spring
features["momentum_x_compression"] = features["macd_hist"].abs() / features["bb_width_pct"].replace(0, np.nan) features["momentum_x_compression"] = features["macd_hist"].abs() / features[
"bb_width_pct"
].replace(0, np.nan)
# 22. RSI momentum: 5-day rate of change of RSI (acceleration of momentum) # 22. RSI momentum: 5-day rate of change of RSI (acceleration of momentum)
features["rsi_momentum"] = features["rsi_14"] - features["rsi_14"].shift(5) features["rsi_momentum"] = features["rsi_14"] - features["rsi_14"].shift(5)
@ -190,7 +190,9 @@ def compute_features_bulk(ohlcv: pd.DataFrame, market_cap: Optional[float] = Non
features["volume_price_confirm"] = features["volume_ratio_5d"] * features["return_1d"] features["volume_price_confirm"] = features["volume_ratio_5d"] * features["return_1d"]
# 24. Trend alignment: both SMAs agree (1 = aligned bullish, -1 = aligned bearish) # 24. Trend alignment: both SMAs agree (1 = aligned bullish, -1 = aligned bearish)
features["trend_alignment"] = np.sign(features["sma50_distance"]) * np.sign(features["sma200_distance"]) features["trend_alignment"] = np.sign(features["sma50_distance"]) * np.sign(
features["sma200_distance"]
)
# 25. Volatility regime: ATR percentile within rolling 60-day window (0-1) # 25. Volatility regime: ATR percentile within rolling 60-day window (0-1)
atr_pct_series = features["atr_pct"] atr_pct_series = features["atr_pct"]
@ -202,18 +204,20 @@ def compute_features_bulk(ohlcv: pd.DataFrame, market_cap: Optional[float] = Non
# 26. Mean reversion signal: oversold RSI + price below lower Bollinger # 26. Mean reversion signal: oversold RSI + price below lower Bollinger
features["mean_reversion_signal"] = ( features["mean_reversion_signal"] = (
(100 - features["rsi_14"]) / 100 # inversed RSI (higher = more oversold) (100 - features["rsi_14"]) / 100 # inversed RSI (higher = more oversold)
) * (1 - features["bb_position"].clip(0, 1)) # below lower band amplifies signal ) * (
1 - features["bb_position"].clip(0, 1)
) # below lower band amplifies signal
# 27. Breakout signal: above upper BB + high volume ratio # 27. Breakout signal: above upper BB + high volume ratio
features["breakout_signal"] = ( features["breakout_signal"] = features["bb_position"].clip(0, 2) * features["volume_ratio_20d"]
features["bb_position"].clip(0, 2) * features["volume_ratio_20d"]
)
# 28. MACD strength: histogram normalized by volatility # 28. MACD strength: histogram normalized by volatility
features["macd_strength"] = features["macd_hist"] / features["atr_pct"].replace(0, np.nan) features["macd_strength"] = features["macd_hist"] / features["atr_pct"].replace(0, np.nan)
# 29. Return/Volatility ratio: Sharpe-like metric # 29. Return/Volatility ratio: Sharpe-like metric
features["return_volatility_ratio"] = features["return_5d"] / features["atr_pct"].replace(0, np.nan) features["return_volatility_ratio"] = features["return_5d"] / features["atr_pct"].replace(
0, np.nan
)
# 30. Trend-momentum composite score # 30. Trend-momentum composite score
features["trend_momentum_score"] = ( features["trend_momentum_score"] = (

View File

@ -9,7 +9,7 @@ from __future__ import annotations
import os import os
import pickle import pickle
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple from typing import Any, Dict, List, Optional
import numpy as np import numpy as np
import pandas as pd import pandas as pd

View File

@ -12,8 +12,6 @@ Adding a new tool: Just add one entry here, everything else is auto-generated.
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from tradingagents.utils.logger import get_logger
from tradingagents.dataflows.alpha_vantage import ( from tradingagents.dataflows.alpha_vantage import (
get_balance_sheet as get_alpha_vantage_balance_sheet, get_balance_sheet as get_alpha_vantage_balance_sheet,
) )
@ -105,6 +103,7 @@ from tradingagents.dataflows.y_finance import (
from tradingagents.dataflows.y_finance import ( from tradingagents.dataflows.y_finance import (
validate_tickers_batch as validate_tickers_batch_yfinance, validate_tickers_batch as validate_tickers_batch_yfinance,
) )
from tradingagents.utils.logger import get_logger
logger = get_logger(__name__) logger = get_logger(__name__)

View File

@ -33,7 +33,9 @@ def render() -> None:
# Check if data is available # Check if data is available
if not strategy_metrics: if not strategy_metrics:
st.warning("No strategy performance data available. Run performance tracking to generate data.") st.warning(
"No strategy performance data available. Run performance tracking to generate data."
)
return return
# Strategy Performance section # Strategy Performance section

View File

@ -66,8 +66,7 @@ def render():
with col1: with col1:
pipelines = list( pipelines = list(
set( set(
(r.get("pipeline") or r.get("strategy_match") or "unknown") (r.get("pipeline") or r.get("strategy_match") or "unknown") for r in recommendations
for r in recommendations
) )
) )
pipeline_filter = st.multiselect("Pipeline", pipelines, default=pipelines) pipeline_filter = st.multiselect("Pipeline", pipelines, default=pipelines)