chore: linter formatting + ML scanner logging, prompt control, ranker reasoning

- Add ML signal scanner results table logging - Add log_prompts_console config flag for prompt visibility control - Expand ranker investment thesis to 4-6 sentence structured reasoning - Linter auto-formatting across modified files Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 23:04:38 -08:00 · 2026-02-09 23:04:38 -08:00 · cb5ae49501
parent 43bdd6de11
commit cb5ae49501
29 changed files with 1368 additions and 126 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,4 @@
 -e .
 typing-extensions
 langchain-openai
 langchain-experimental
--- a/scripts/analyze_insider_transactions.py
+++ b/scripts/analyze_insider_transactions.py
@ -166,7 +166,9 @@ def analyze_insider_transactions(ticker: str, save_csv: bool = False, output_dir
                    if pd.notna(row["Value"]) and row["Value"] > 0
                    else f"{'N/A':>16}"
                )
-                logger.info(f"  {row['Transaction']:15} | {row['Shares']:>12,.0f} shares | {value_str}")
+                logger.info(
                    f"  {row['Transaction']:15} | {row['Shares']:>12,.0f} shares | {value_str}"
                )
        # ============================================================
        # OVERALL SENTIMENT
@ -206,12 +208,16 @@ def analyze_insider_transactions(ticker: str, save_csv: bool = False, output_dir
        )
        logger.info(f"Total Sales:      {sales_count:>5} transactions | ${total_sales:>15,.0f}")
-        logger.info(f"Total Purchases:  {purchases_count:>5} transactions | ${total_purchases:>15,.0f}")
+        logger.info(
            f"Total Purchases:  {purchases_count:>5} transactions | ${total_purchases:>15,.0f}"
        )
        if sentiment == "BULLISH":
            logger.info(f"\n⚡ BULLISH: Insiders are net BUYERS (${net_value:,.0f} net buying)")
        elif sentiment == "BEARISH":
-            logger.info(f"\n⚠️  BEARISH: Significant insider SELLING (${-net_value:,.0f} net selling)")
+            logger.info(
                f"\n⚠️  BEARISH: Significant insider SELLING (${-net_value:,.0f} net selling)"
            )
        elif sentiment == "SLIGHTLY_BEARISH":
            logger.info(
                f"\n⚠️  SLIGHTLY BEARISH: More selling than buying (${-net_value:,.0f} net selling)"
@ -269,7 +275,9 @@ if __name__ == "__main__":
        )
        logger.info("Example: python analyze_insider_transactions.py AAPL TSLA NVDA")
        logger.info("         python analyze_insider_transactions.py AAPL --csv")
-        logger.info("         python analyze_insider_transactions.py AAPL --csv --output-dir ./output")
+        logger.info(
            "         python analyze_insider_transactions.py AAPL --csv --output-dir ./output"
        )
        sys.exit(1)
    # Parse arguments
--- a/scripts/build_ml_dataset.py
+++ b/scripts/build_ml_dataset.py
@ -18,7 +18,6 @@ import sys
 import time
 from pathlib import Path
 import numpy as np
 import pandas as pd
 # Add project root to path
@ -40,35 +39,210 @@ logger = get_logger(__name__)
 # Can be overridden via --ticker-file
 DEFAULT_TICKERS = [
    # Mega-cap tech
-    "AAPL", "MSFT", "GOOGL", "AMZN", "NVDA", "META", "TSLA", "AVGO", "ORCL", "CRM",
+    "AAPL",
-    "AMD", "INTC", "CSCO", "ADBE", "NFLX", "QCOM", "TXN", "AMAT", "MU", "LRCX",
+    "MSFT",
-    "KLAC", "MRVL", "SNPS", "CDNS", "PANW", "CRWD", "FTNT", "NOW", "UBER", "ABNB",
+    "GOOGL",
    "AMZN",
    "NVDA",
    "META",
    "TSLA",
    "AVGO",
    "ORCL",
    "CRM",
    "AMD",
    "INTC",
    "CSCO",
    "ADBE",
    "NFLX",
    "QCOM",
    "TXN",
    "AMAT",
    "MU",
    "LRCX",
    "KLAC",
    "MRVL",
    "SNPS",
    "CDNS",
    "PANW",
    "CRWD",
    "FTNT",
    "NOW",
    "UBER",
    "ABNB",
    # Financials
-    "JPM", "BAC", "WFC", "GS", "MS", "C", "SCHW", "BLK", "AXP", "USB",
+    "JPM",
-    "PNC", "TFC", "COF", "BK", "STT", "FITB", "HBAN", "RF", "CFG", "KEY",
+    "BAC",
    "WFC",
    "GS",
    "MS",
    "C",
    "SCHW",
    "BLK",
    "AXP",
    "USB",
    "PNC",
    "TFC",
    "COF",
    "BK",
    "STT",
    "FITB",
    "HBAN",
    "RF",
    "CFG",
    "KEY",
    # Healthcare
-    "UNH", "JNJ", "LLY", "PFE", "ABBV", "MRK", "TMO", "ABT", "DHR", "BMY",
+    "UNH",
-    "AMGN", "GILD", "ISRG", "VRTX", "REGN", "MDT", "SYK", "BSX", "EW", "ZTS",
+    "JNJ",
    "LLY",
    "PFE",
    "ABBV",
    "MRK",
    "TMO",
    "ABT",
    "DHR",
    "BMY",
    "AMGN",
    "GILD",
    "ISRG",
    "VRTX",
    "REGN",
    "MDT",
    "SYK",
    "BSX",
    "EW",
    "ZTS",
    # Consumer
-    "WMT", "PG", "KO", "PEP", "COST", "MCD", "NKE", "SBUX", "TGT", "LOW",
+    "WMT",
-    "HD", "TJX", "ROST", "DG", "DLTR", "EL", "CL", "KMB", "GIS", "K",
+    "PG",
    "KO",
    "PEP",
    "COST",
    "MCD",
    "NKE",
    "SBUX",
    "TGT",
    "LOW",
    "HD",
    "TJX",
    "ROST",
    "DG",
    "DLTR",
    "EL",
    "CL",
    "KMB",
    "GIS",
    "K",
    # Energy
-    "XOM", "CVX", "COP", "EOG", "SLB", "MPC", "PSX", "VLO", "OXY", "DVN",
+    "XOM",
-    "HAL", "FANG", "HES", "BKR", "KMI", "WMB", "OKE", "ET", "TRGP", "LNG",
+    "CVX",
    "COP",
    "EOG",
    "SLB",
    "MPC",
    "PSX",
    "VLO",
    "OXY",
    "DVN",
    "HAL",
    "FANG",
    "HES",
    "BKR",
    "KMI",
    "WMB",
    "OKE",
    "ET",
    "TRGP",
    "LNG",
    # Industrials
-    "CAT", "DE", "UNP", "UPS", "HON", "RTX", "BA", "LMT", "GD", "NOC",
+    "CAT",
-    "GE", "MMM", "EMR", "ITW", "PH", "ROK", "ETN", "SWK", "CMI", "PCAR",
+    "DE",
    "UNP",
    "UPS",
    "HON",
    "RTX",
    "BA",
    "LMT",
    "GD",
    "NOC",
    "GE",
    "MMM",
    "EMR",
    "ITW",
    "PH",
    "ROK",
    "ETN",
    "SWK",
    "CMI",
    "PCAR",
    # Materials & Utilities
-    "LIN", "APD", "ECL", "SHW", "DD", "NEM", "FCX", "VMC", "MLM", "NUE",
+    "LIN",
-    "NEE", "DUK", "SO", "D", "AEP", "EXC", "SRE", "XEL", "WEC", "ES",
+    "APD",
    "ECL",
    "SHW",
    "DD",
    "NEM",
    "FCX",
    "VMC",
    "MLM",
    "NUE",
    "NEE",
    "DUK",
    "SO",
    "D",
    "AEP",
    "EXC",
    "SRE",
    "XEL",
    "WEC",
    "ES",
    # REITs & Telecom
-    "AMT", "PLD", "CCI", "EQIX", "SPG", "O", "PSA", "DLR", "WELL", "AVB",
+    "AMT",
-    "T", "VZ", "TMUS", "CHTR", "CMCSA",
+    "PLD",
    "CCI",
    "EQIX",
    "SPG",
    "O",
    "PSA",
    "DLR",
    "WELL",
    "AVB",
    "T",
    "VZ",
    "TMUS",
    "CHTR",
    "CMCSA",
    # High-volatility / popular retail
-    "COIN", "MARA", "RIOT", "PLTR", "SOFI", "HOOD", "RBLX", "SNAP", "PINS", "SQ",
+    "COIN",
-    "SHOP", "SE", "ROKU", "DKNG", "PENN", "WYNN", "MGM", "LVS", "DASH", "TTD",
+    "MARA",
    "RIOT",
    "PLTR",
    "SOFI",
    "HOOD",
    "RBLX",
    "SNAP",
    "PINS",
    "SQ",
    "SHOP",
    "SE",
    "ROKU",
    "DKNG",
    "PENN",
    "WYNN",
    "MGM",
    "LVS",
    "DASH",
    "TTD",
    # Biotech
-    "MRNA", "BNTX", "BIIB", "SGEN", "ALNY", "BMRN", "EXAS", "DXCM", "HZNP", "INCY",
+    "MRNA",
    "BNTX",
    "BIIB",
    "SGEN",
    "ALNY",
    "BMRN",
    "EXAS",
    "DXCM",
    "HZNP",
    "INCY",
 ]
 OUTPUT_DIR = Path("data/ml")
@ -221,10 +395,16 @@ def build_dataset(
    logger.info(f"\n{'='*60}")
    logger.info(f"Dataset built: {len(dataset)} total samples from {len(all_data)} tickers")
-    logger.info(f"Label distribution:")
+    logger.info("Label distribution:")
-    logger.info(f"  WIN  (+1): {int((dataset['label'] == 1).sum()):>7} ({(dataset['label'] == 1).mean()*100:.1f}%)")
+    logger.info(
-    logger.info(f"  LOSS (-1): {int((dataset['label'] == -1).sum()):>7} ({(dataset['label'] == -1).mean()*100:.1f}%)")
+        f"  WIN  (+1): {int((dataset['label'] == 1).sum()):>7} ({(dataset['label'] == 1).mean()*100:.1f}%)"
-    logger.info(f"  TIMEOUT:   {int((dataset['label'] == 0).sum()):>7} ({(dataset['label'] == 0).mean()*100:.1f}%)")
+    )
    logger.info(
        f"  LOSS (-1): {int((dataset['label'] == -1).sum()):>7} ({(dataset['label'] == -1).mean()*100:.1f}%)"
    )
    logger.info(
        f"  TIMEOUT:   {int((dataset['label'] == 0).sum()):>7} ({(dataset['label'] == 0).mean()*100:.1f}%)"
    )
    logger.info(f"Features: {len(FEATURE_COLUMNS)}")
    logger.info(f"{'='*60}")
@ -233,12 +413,20 @@ def build_dataset(
 def main():
    parser = argparse.ArgumentParser(description="Build ML training dataset")
-    parser.add_argument("--stocks", type=int, default=None, help="Limit to N stocks from default universe")
+    parser.add_argument(
-    parser.add_argument("--ticker-file", type=str, default=None, help="File with tickers (one per line)")
+        "--stocks", type=int, default=None, help="Limit to N stocks from default universe"
    )
    parser.add_argument(
        "--ticker-file", type=str, default=None, help="File with tickers (one per line)"
    )
    parser.add_argument("--start", type=str, default="2022-01-01", help="Start date (YYYY-MM-DD)")
    parser.add_argument("--end", type=str, default="2025-12-31", help="End date (YYYY-MM-DD)")
-    parser.add_argument("--profit-target", type=float, default=0.05, help="Profit target fraction (default: 0.05)")
+    parser.add_argument(
-    parser.add_argument("--stop-loss", type=float, default=0.03, help="Stop loss fraction (default: 0.03)")
+        "--profit-target", type=float, default=0.05, help="Profit target fraction (default: 0.05)"
    )
    parser.add_argument(
        "--stop-loss", type=float, default=0.03, help="Stop loss fraction (default: 0.03)"
    )
    parser.add_argument("--holding-days", type=int, default=7, help="Max holding days (default: 7)")
    parser.add_argument("--output", type=str, default=None, help="Output parquet path")
    args = parser.parse_args()
@ -246,7 +434,9 @@ def main():
    # Determine ticker list
    if args.ticker_file:
        with open(args.ticker_file) as f:
-            tickers = [line.strip().upper() for line in f if line.strip() and not line.startswith("#")]
+            tickers = [
                line.strip().upper() for line in f if line.strip() and not line.startswith("#")
            ]
        logger.info(f"Loaded {len(tickers)} tickers from {args.ticker_file}")
    else:
        tickers = DEFAULT_TICKERS
--- a/scripts/build_strategy_specific_memories.py
+++ b/scripts/build_strategy_specific_memories.py
@ -11,7 +11,6 @@ This script creates memory sets optimized for:
 import os
 import sys
 from pathlib import Path
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
--- a/scripts/track_recommendation_performance.py
+++ b/scripts/track_recommendation_performance.py
@ -17,7 +17,6 @@ import json
 import os
 import sys
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, List
 # Add parent directory to path
--- a/scripts/train_ml_model.py
+++ b/scripts/train_ml_model.py
@ -80,13 +80,16 @@ def time_split(
    if max_train_samples is not None and len(train) > max_train_samples:
        train = train.sort_values("date").tail(max_train_samples)
        logger.info(
-            f"Limiting training samples to most recent {max_train_samples} "
+            f"Limiting training samples to most recent {max_train_samples} " f"before {val_start}"
            f"before {val_start}"
        )
    logger.info(f"Time-based split at {val_start}:")
-    logger.info(f"  Train: {len(train)} samples ({train['date'].min().date()} to {train['date'].max().date()})")
+    logger.info(
-    logger.info(f"  Val:   {len(val)} samples ({val['date'].min().date()} to {val['date'].max().date()})")
+        f"  Train: {len(train)} samples ({train['date'].min().date()} to {train['date'].max().date()})"
    )
    logger.info(
        f"  Val:   {len(val)} samples ({val['date'].min().date()} to {val['date'].max().date()})"
    )
    X_train = train[FEATURE_COLUMNS].values
    y_train = train["label"].values.astype(int)
@ -152,8 +155,12 @@ def train_lightgbm(X_train, y_train, X_val, y_val):
    class_weight = {c: total / (n_classes * count) for c, count in class_counts.items()}
    sample_weights = np.array([class_weight[y] for y in y_train_mapped])
-    train_data = lgb.Dataset(X_train, label=y_train_mapped, weight=sample_weights, feature_name=FEATURE_COLUMNS)
+    train_data = lgb.Dataset(
-    val_data = lgb.Dataset(X_val, label=y_val_mapped, feature_name=FEATURE_COLUMNS, reference=train_data)
+        X_train, label=y_train_mapped, weight=sample_weights, feature_name=FEATURE_COLUMNS
    )
    val_data = lgb.Dataset(
        X_val, label=y_val_mapped, feature_name=FEATURE_COLUMNS, reference=train_data
    )
    params = {
        "objective": "multiclass",
@ -209,7 +216,8 @@ def evaluate(model, X_val, y_val, model_type: str) -> dict:
    accuracy = accuracy_score(y_val, y_pred)
    report = classification_report(
-        y_val, y_pred,
+        y_val,
        y_pred,
        target_names=["LOSS (-1)", "TIMEOUT (0)", "WIN (+1)"],
        output_dict=True,
    )
@ -253,13 +261,21 @@ def evaluate(model, X_val, y_val, model_type: str) -> dict:
    # Top decile (top 10% by P(WIN)) — most actionable metric
    top_decile_threshold = np.percentile(win_probs_all, 90)
    top_decile_mask = win_probs_all >= top_decile_threshold
-    top_decile_win_rate = float((y_val[top_decile_mask] == 1).mean()) if top_decile_mask.sum() > 0 else 0.0
+    top_decile_win_rate = (
-    top_decile_loss_rate = float((y_val[top_decile_mask] == -1).mean()) if top_decile_mask.sum() > 0 else 0.0
+        float((y_val[top_decile_mask] == 1).mean()) if top_decile_mask.sum() > 0 else 0.0
    )
    top_decile_loss_rate = (
        float((y_val[top_decile_mask] == -1).mean()) if top_decile_mask.sum() > 0 else 0.0
    )
    metrics = {
        "model_type": model_type,
        "accuracy": round(accuracy, 4),
-        "per_class": {k: {kk: round(vv, 4) for kk, vv in v.items()} for k, v in report.items() if isinstance(v, dict)},
+        "per_class": {
            k: {kk: round(vv, 4) for kk, vv in v.items()}
            for k, v in report.items()
            if isinstance(v, dict)
        },
        "confusion_matrix": cm.tolist(),
        "avg_win_prob_for_actual_wins": round(avg_win_prob_for_actual_wins, 4),
        "high_confidence_win_precision": round(high_conf_precision, 4),
@ -276,25 +292,31 @@ def evaluate(model, X_val, y_val, model_type: str) -> dict:
    logger.info(f"\n{'='*60}")
    logger.info(f"Model: {model_type}")
    logger.info(f"Overall Accuracy: {accuracy:.1%}")
-    logger.info(f"\nPer-class metrics:")
+    logger.info("\nPer-class metrics:")
    logger.info(f"{'':>15} {'Precision':>10} {'Recall':>10} {'F1':>10} {'Support':>10}")
    for label, name in [(-1, "LOSS"), (0, "TIMEOUT"), (1, "WIN")]:
        key = f"{name} ({label:+d})"
        if key in report:
            r = report[key]
-            logger.info(f"{name:>15} {r['precision']:>10.3f} {r['recall']:>10.3f} {r['f1-score']:>10.3f} {r['support']:>10.0f}")
+            logger.info(
                f"{name:>15} {r['precision']:>10.3f} {r['recall']:>10.3f} {r['f1-score']:>10.3f} {r['support']:>10.0f}"
            )
-    logger.info(f"\nConfusion Matrix (rows=actual, cols=predicted):")
+    logger.info("\nConfusion Matrix (rows=actual, cols=predicted):")
    logger.info(f"{'':>10} {'LOSS':>8} {'TIMEOUT':>8} {'WIN':>8}")
    for i, name in enumerate(["LOSS", "TIMEOUT", "WIN"]):
        logger.info(f"{name:>10} {cm[i][0]:>8} {cm[i][1]:>8} {cm[i][2]:>8}")
-    logger.info(f"\nWin-class insights:")
+    logger.info("\nWin-class insights:")
    logger.info(f"  Avg P(WIN) for actual winners: {avg_win_prob_for_actual_wins:.1%}")
-    logger.info(f"  High-confidence (>60%) precision: {high_conf_precision:.1%} ({high_conf_count} samples)")
+    logger.info(
        f"  High-confidence (>60%) precision: {high_conf_precision:.1%} ({high_conf_count} samples)"
    )
    logger.info("\nCalibration (does higher P(WIN) = more actual wins?):")
-    logger.info(f"{'Quintile':>10} {'Avg P(WIN)':>12} {'Actual WIN%':>12} {'Actual LOSS%':>13} {'Count':>8}")
+    logger.info(
        f"{'Quintile':>10} {'Avg P(WIN)':>12} {'Actual WIN%':>12} {'Actual LOSS%':>13} {'Count':>8}"
    )
    for q_name, q_data in calibration.items():
        logger.info(
            f"{q_name:>10} {q_data['mean_predicted_win_prob']:>12.1%} "
@ -304,7 +326,9 @@ def evaluate(model, X_val, y_val, model_type: str) -> dict:
    logger.info("\nTop decile (top 10% by P(WIN)):")
    logger.info(f"  Threshold: P(WIN) >= {top_decile_threshold:.1%}")
-    logger.info(f"  Actual win rate: {top_decile_win_rate:.1%} ({int(top_decile_mask.sum())} samples)")
+    logger.info(
        f"  Actual win rate: {top_decile_win_rate:.1%} ({int(top_decile_mask.sum())} samples)"
    )
    logger.info(f"  Actual loss rate: {top_decile_loss_rate:.1%}")
    baseline_win = float((y_val == 1).mean())
    logger.info(f"  Baseline win rate: {baseline_win:.1%}")
@ -318,12 +342,25 @@ def evaluate(model, X_val, y_val, model_type: str) -> dict:
 def main():
    parser = argparse.ArgumentParser(description="Train ML model for win probability")
    parser.add_argument("--dataset", type=str, default="data/ml/training_dataset.parquet")
-    parser.add_argument("--model", type=str, choices=["tabpfn", "lightgbm", "auto"], default="auto",
+    parser.add_argument(
-                        help="Model type (auto tries TabPFN first, falls back to LightGBM)")
+        "--model",
-    parser.add_argument("--val-start", type=str, default="2024-07-01",
+        type=str,
-                        help="Validation split date (default: 2024-07-01)")
+        choices=["tabpfn", "lightgbm", "auto"],
-    parser.add_argument("--max-train-samples", type=int, default=None,
+        default="auto",
-                        help="Limit training samples to the most recent N before val-start")
+        help="Model type (auto tries TabPFN first, falls back to LightGBM)",
    )
    parser.add_argument(
        "--val-start",
        type=str,
        default="2024-07-01",
        help="Validation split date (default: 2024-07-01)",
    )
    parser.add_argument(
        "--max-train-samples",
        type=int,
        default=None,
        help="Limit training samples to the most recent N before val-start",
    )
    parser.add_argument("--output-dir", type=str, default="data/ml")
    args = parser.parse_args()
--- a/tools_testing.ipynb
+++ b/tools_testing.ipynb
--- a/tradingagents/agents/risk_mgmt/neutral_debator.py
+++ b/tradingagents/agents/risk_mgmt/neutral_debator.py
@ -68,6 +68,8 @@ Choose BUY or SELL (no HOLD). If the edge is unclear, pick the less-bad side and
        response_text = parse_llm_response(response.content)
        argument = f"Neutral Analyst: {response_text}"
-        return {"risk_debate_state": update_risk_debate_state(risk_debate_state, argument, "Neutral")}
+        return {
            "risk_debate_state": update_risk_debate_state(risk_debate_state, argument, "Neutral")
        }
    return neutral_node
--- a/tradingagents/agents/utils/agent_utils.py
+++ b/tradingagents/agents/utils/agent_utils.py
@ -1,4 +1,4 @@
-from typing import Any, Callable, Dict, List
+from typing import Any, Callable, Dict
 from langchain_core.messages import HumanMessage, RemoveMessage
@ -95,9 +95,7 @@ def update_risk_debate_state(
        "count": debate_state["count"] + 1,
    }
    # Append to the speaker's own history and set their current response
-    new_state[f"{role_key}_history"] = (
+    new_state[f"{role_key}_history"] = debate_state.get(f"{role_key}_history", "") + "\n" + argument
        debate_state.get(f"{role_key}_history", "") + "\n" + argument
    )
    new_state[f"current_{role_key}_response"] = argument
    return new_state
--- a/tradingagents/agents/utils/historical_memory_builder.py
+++ b/tradingagents/agents/utils/historical_memory_builder.py
@ -203,7 +203,9 @@ class HistoricalMemoryBuilder:
                        except (IndexError, KeyError):
                            continue
-                    logger.info(f"Found {len([m for m in high_movers if m['ticker'] == ticker])} moves for {ticker}")
+                    logger.info(
                        f"Found {len([m for m in high_movers if m['ticker'] == ticker])} moves for {ticker}"
                    )
                else:
                    logger.debug(f"{ticker}: No significant moves")
@ -440,7 +442,9 @@ class HistoricalMemoryBuilder:
        high_movers = self.find_high_movers(tickers, start_date, end_date, min_move_pct)
        if not high_movers:
-            logger.warning("⚠️  No high movers found. Try a different date range or lower threshold.")
+            logger.warning(
                "⚠️  No high movers found. Try a different date range or lower threshold."
            )
            return {}
        # Step 1.5: Sample/filter high movers based on strategy
@ -449,7 +453,9 @@ class HistoricalMemoryBuilder:
        logger.info(f"📊 Sampling Strategy: {sample_strategy}")
        logger.info(f"Total high movers found: {len(high_movers)}")
        logger.info(f"Samples to analyze: {len(sampled_movers)}")
-        logger.info(f"Estimated runtime: ~{len(sampled_movers) * len(analysis_windows) * 2} minutes")
+        logger.info(
            f"Estimated runtime: ~{len(sampled_movers) * len(analysis_windows) * 2} minutes"
        )
        # Initialize memory stores
        agent_memories = {
--- a/tradingagents/dataflows/alpha_vantage_volume.py
+++ b/tradingagents/dataflows/alpha_vantage_volume.py
@ -11,6 +11,7 @@ from pathlib import Path
 from typing import Annotated, Dict, List, Optional, Union
 import pandas as pd
 from tradingagents.dataflows.y_finance import _get_ticker_universe, get_ticker_history
 from tradingagents.utils.logger import get_logger
@ -460,7 +461,9 @@ def download_volume_data(
        logger.info("Skipping cache (use_cache=False), forcing fresh download...")
    # Download fresh data
-    logger.info(f"Downloading {history_period_days} days of volume data for {len(tickers)} tickers...")
+    logger.info(
        f"Downloading {history_period_days} days of volume data for {len(tickers)} tickers..."
    )
    raw_data = {}
    with ThreadPoolExecutor(max_workers=15) as executor:
--- a/tradingagents/dataflows/discovery/analytics.py
+++ b/tradingagents/dataflows/discovery/analytics.py
@ -349,7 +349,9 @@ class DiscoveryAnalytics:
                indent=2,
            )
-        logger.info(f"   📊 Saved {len(enriched_rankings)} recommendations for tracking: {output_file}")
+        logger.info(
            f"   📊 Saved {len(enriched_rankings)} recommendations for tracking: {output_file}"
        )
    def save_discovery_results(self, state: dict, trade_date: str, config: Dict[str, Any]):
        """Save full discovery results and tool logs."""
--- a/tradingagents/dataflows/discovery/discovery_config.py
+++ b/tradingagents/dataflows/discovery/discovery_config.py
@ -158,9 +158,7 @@ class DiscoveryConfig:
            max_candidates_to_analyze=disc.get(
                "max_candidates_to_analyze", _rd.max_candidates_to_analyze
            ),
-            analyze_all_candidates=disc.get(
+            analyze_all_candidates=disc.get("analyze_all_candidates", _rd.analyze_all_candidates),
                "analyze_all_candidates", _rd.analyze_all_candidates
            ),
            final_recommendations=disc.get("final_recommendations", _rd.final_recommendations),
            truncate_ranking_context=disc.get(
                "truncate_ranking_context", _rd.truncate_ranking_context
@ -189,12 +187,8 @@ class DiscoveryConfig:
        # Logging
        logging_cfg = LoggingConfig(
            log_tool_calls=disc.get("log_tool_calls", _ld.log_tool_calls),
-            log_tool_calls_console=disc.get(
+            log_tool_calls_console=disc.get("log_tool_calls_console", _ld.log_tool_calls_console),
-                "log_tool_calls_console", _ld.log_tool_calls_console
+            log_prompts_console=disc.get("log_prompts_console", _ld.log_prompts_console),
            ),
            log_prompts_console=disc.get(
                "log_prompts_console", _ld.log_prompts_console
            ),
            tool_log_max_chars=disc.get("tool_log_max_chars", _ld.tool_log_max_chars),
            tool_log_exclude=disc.get("tool_log_exclude", _ld.tool_log_exclude),
        )
--- a/tradingagents/dataflows/discovery/filter.py
+++ b/tradingagents/dataflows/discovery/filter.py
@ -185,7 +185,9 @@ class CandidateFilter:
        # Print consolidated list of failed tickers
        if failed_tickers:
-            logger.warning(f"⚠️  {len(failed_tickers)} tickers failed data fetch (possibly delisted)")
+            logger.warning(
                f"⚠️  {len(failed_tickers)} tickers failed data fetch (possibly delisted)"
            )
            if len(failed_tickers) <= 10:
                logger.warning(f"{', '.join(failed_tickers)}")
            else:
@ -501,7 +503,9 @@ class CandidateFilter:
                                )
                        # Extract short interest from fundamentals (no extra API call)
-                        short_pct_raw = fund.get("ShortPercentOfFloat", fund.get("ShortPercentFloat"))
+                        short_pct_raw = fund.get(
                            "ShortPercentOfFloat", fund.get("ShortPercentFloat")
                        )
                        short_interest_pct = None
                        if short_pct_raw and short_pct_raw != "N/A":
                            try:
@ -747,9 +751,7 @@ class CandidateFilter:
            logger.info(f"      ❌ No data available: {filtered_reasons['no_data']}")
        logger.info(f"      ✅ Passed filters: {len(filtered_candidates)}")
-    def _predict_ml(
+    def _predict_ml(self, cand: Dict[str, Any], ticker: str, end_date: str) -> Any:
        self, cand: Dict[str, Any], ticker: str, end_date: str
    ) -> Any:
        """Run ML win probability prediction for a candidate."""
        # Lazy-load predictor on first call
        if not self._ml_predictor_loaded:
@ -767,10 +769,10 @@ class CandidateFilter:
            return None
        try:
            from tradingagents.dataflows.y_finance import download_history
            from tradingagents.ml.feature_engineering import (
                compute_features_single,
            )
            from tradingagents.dataflows.y_finance import download_history
            # Fetch OHLCV for feature computation (needs ~210 rows of history)
            ohlcv = download_history(
--- a/tradingagents/dataflows/discovery/ranker.py
+++ b/tradingagents/dataflows/discovery/ranker.py
@ -52,7 +52,9 @@ class StockRanking(BaseModel):
    strategy_match: str = Field(description="Strategy that matched")
    final_score: int = Field(description="Score 0-100")
    confidence: int = Field(description="Confidence 1-10")
-    reason: str = Field(description="Detailed investment thesis (4-6 sentences) defending the trade with specific catalysts, risk/reward, and timing")
+    reason: str = Field(
        description="Detailed investment thesis (4-6 sentences) defending the trade with specific catalysts, risk/reward, and timing"
    )
    description: str = Field(description="Company description")
--- a/tradingagents/dataflows/discovery/scanners/init.py
+++ b/tradingagents/dataflows/discovery/scanners/init.py
@ -5,10 +5,10 @@ from . import (
    earnings_calendar,  # noqa: F401
    insider_buying,  # noqa: F401
    market_movers,  # noqa: F401
    ml_signal,  # noqa: F401
    options_flow,  # noqa: F401
    reddit_dd,  # noqa: F401
    reddit_trending,  # noqa: F401
    semantic_news,  # noqa: F401
    volume_accumulation,  # noqa: F401
    ml_signal,  # noqa: F401
 )
--- a/tradingagents/dataflows/discovery/scanners/ml_signal.py
+++ b/tradingagents/dataflows/discovery/scanners/ml_signal.py
@ -7,7 +7,6 @@ Default: data/tickers.txt. Override via config: discovery.scanners.ml_signal.tic
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Any, Dict, List, Optional
 import numpy as np
 import pandas as pd
 from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner
@ -109,7 +108,9 @@ class MLSignalScanner(BaseScanner):
        # Log individual candidate results
        if candidates:
-            header = f"{'Ticker':<8} {'P(WIN)':>8} {'P(LOSS)':>9} {'Prediction':>12} {'Priority':>10}"
+            header = (
                f"{'Ticker':<8} {'P(WIN)':>8} {'P(LOSS)':>9} {'Prediction':>12} {'Priority':>10}"
            )
            separator = "-" * len(header)
            lines = ["\n  ML Signal Scanner Results:", f"  {header}", f"  {separator}"]
            for c in candidates:
@ -143,7 +144,9 @@ class MLSignalScanner(BaseScanner):
        try:
            from tradingagents.dataflows.y_finance import download_history
-            logger.info(f"Batch-downloading {len(self.universe)} tickers ({self.lookback_period})...")
+            logger.info(
                f"Batch-downloading {len(self.universe)} tickers ({self.lookback_period})..."
            )
            # yfinance batch download — single HTTP request for all tickers
            raw = download_history(
--- a/tradingagents/dataflows/finnhub_api.py
+++ b/tradingagents/dataflows/finnhub_api.py
@ -4,9 +4,8 @@ from typing import Annotated, Any, Dict
 import finnhub
 from dotenv import load_dotenv
 from tradingagents.utils.logger import get_logger
 from tradingagents.config import config
 from tradingagents.utils.logger import get_logger
 load_dotenv()
--- a/tradingagents/dataflows/local.py
+++ b/tradingagents/dataflows/local.py
@ -7,11 +7,11 @@ import pandas as pd
 from dateutil.relativedelta import relativedelta
 from tqdm import tqdm
 from tradingagents.utils.logger import get_logger
 from .config import DATA_DIR
 from .reddit_utils import fetch_top_from_category
 from tradingagents.utils.logger import get_logger
 logger = get_logger(__name__)
--- a/tradingagents/dataflows/news_semantic_scanner.py
+++ b/tradingagents/dataflows/news_semantic_scanner.py
@ -807,11 +807,15 @@ Return as JSON with "news" array."""
            logger.info(f"Found {len(google_news)} items from Google News")
            min_date, max_date = self._publish_date_range(google_news)
            if min_date:
-                logger.debug(f"Min publish date (Google News): {min_date.strftime('%Y-%m-%d %H:%M')}")
+                logger.debug(
                    f"Min publish date (Google News): {min_date.strftime('%Y-%m-%d %H:%M')}"
                )
            else:
                logger.debug("Min publish date (Google News): N/A")
            if max_date:
-                logger.debug(f"Max publish date (Google News): {max_date.strftime('%Y-%m-%d %H:%M')}")
+                logger.debug(
                    f"Max publish date (Google News): {max_date.strftime('%Y-%m-%d %H:%M')}"
                )
            else:
                logger.debug("Max publish date (Google News): N/A")
@ -837,11 +841,15 @@ Return as JSON with "news" array."""
            logger.info(f"Found {len(av_news)} items from Alpha Vantage")
            min_date, max_date = self._publish_date_range(av_news)
            if min_date:
-                logger.debug(f"Min publish date (Alpha Vantage): {min_date.strftime('%Y-%m-%d %H:%M')}")
+                logger.debug(
                    f"Min publish date (Alpha Vantage): {min_date.strftime('%Y-%m-%d %H:%M')}"
                )
            else:
                logger.debug("Min publish date (Alpha Vantage): N/A")
            if max_date:
-                logger.debug(f"Max publish date (Alpha Vantage): {max_date.strftime('%Y-%m-%d %H:%M')}")
+                logger.debug(
                    f"Max publish date (Alpha Vantage): {max_date.strftime('%Y-%m-%d %H:%M')}"
                )
            else:
                logger.debug("Max publish date (Alpha Vantage): N/A")
--- a/tradingagents/dataflows/reddit_api.py
+++ b/tradingagents/dataflows/reddit_api.py
@ -493,7 +493,9 @@ Extract all stock ticker symbols mentioned in the post or comments."""
                    # Handle None result (Gemini blocked content despite safety settings)
                    if result is None:
-                        logger.warning(f"⚠️  Content blocked for '{post['title'][:50]}...' - Skipping")
+                        logger.warning(
                            f"⚠️  Content blocked for '{post['title'][:50]}...' - Skipping"
                        )
                        post["quality_score"] = 0
                        post["quality_reason"] = (
                            "Content blocked by LLM safety filter. "
--- a/tradingagents/graph/discovery_graph.py
+++ b/tradingagents/graph/discovery_graph.py
@ -286,9 +286,7 @@ class DiscoveryGraph:
                else:
                    self._add_context(incoming_context, existing, prepend=False)
-    def _add_context(
+    def _add_context(self, new_context: str, candidate: Dict[str, Any], *, prepend: bool) -> None:
        self, new_context: str, candidate: Dict[str, Any], *, prepend: bool
    ) -> None:
        """
        Add context string to a candidate's context fields.
@ -492,7 +490,9 @@ class DiscoveryGraph:
                try:
                    # Get result with per-scanner timeout
-                    name, pipeline, candidates, error, scanner_logs = future.result(timeout=timeout_seconds)
+                    name, pipeline, candidates, error, scanner_logs = future.result(
                        timeout=timeout_seconds
                    )
                    # Initialize pipeline list if needed
                    if pipeline not in pipeline_candidates:
--- a/tradingagents/graph/price_charts.py
+++ b/tradingagents/graph/price_charts.py
@ -324,11 +324,7 @@ def _extract_close_series(data: Any) -> Any:
    if isinstance(data.columns, pd.MultiIndex):
        if "Close" in data.columns.get_level_values(0):
            close_data = data["Close"]
-            series = (
+            series = close_data.iloc[:, 0] if isinstance(close_data, pd.DataFrame) else close_data
                close_data.iloc[:, 0]
                if isinstance(close_data, pd.DataFrame)
                else close_data
            )
    elif "Close" in data.columns:
        series = data["Close"]
--- a/tradingagents/graph/trading_graph.py
+++ b/tradingagents/graph/trading_graph.py
@ -14,7 +14,6 @@ from tradingagents.default_config import DEFAULT_CONFIG
 # Import tools from new registry-based system
 from tradingagents.tools.generator import get_agent_tools
 from tradingagents.utils.logger import get_logger
 from .conditional_logic import ConditionalLogic
--- a/tradingagents/ml/feature_engineering.py
+++ b/tradingagents/ml/feature_engineering.py
@ -132,9 +132,7 @@ def compute_features_bulk(ohlcv: pd.DataFrame, market_cap: Optional[float] = Non
    # 7. Position within Bollinger Bands (0 = lower band, 1 = upper band)
    bb_range = bb_upper - bb_lower
-    features["bb_position"] = np.where(
+    features["bb_position"] = np.where(bb_range > 0, (close - bb_lower) / bb_range, 0.5)
        bb_range > 0, (close - bb_lower) / bb_range, 0.5
    )
    # 8. ADX (trend strength)
    features["adx"] = ss["dx_14"]
@ -181,7 +179,9 @@ def compute_features_bulk(ohlcv: pd.DataFrame, market_cap: Optional[float] = Non
    # 21. Momentum × Compression: strong trend direction + tight Bollinger = breakout setup
    #     High absolute MACD + low BB width = coiled spring
-    features["momentum_x_compression"] = features["macd_hist"].abs() / features["bb_width_pct"].replace(0, np.nan)
+    features["momentum_x_compression"] = features["macd_hist"].abs() / features[
        "bb_width_pct"
    ].replace(0, np.nan)
    # 22. RSI momentum: 5-day rate of change of RSI (acceleration of momentum)
    features["rsi_momentum"] = features["rsi_14"] - features["rsi_14"].shift(5)
@ -190,7 +190,9 @@ def compute_features_bulk(ohlcv: pd.DataFrame, market_cap: Optional[float] = Non
    features["volume_price_confirm"] = features["volume_ratio_5d"] * features["return_1d"]
    # 24. Trend alignment: both SMAs agree (1 = aligned bullish, -1 = aligned bearish)
-    features["trend_alignment"] = np.sign(features["sma50_distance"]) * np.sign(features["sma200_distance"])
+    features["trend_alignment"] = np.sign(features["sma50_distance"]) * np.sign(
        features["sma200_distance"]
    )
    # 25. Volatility regime: ATR percentile within rolling 60-day window (0-1)
    atr_pct_series = features["atr_pct"]
@ -202,18 +204,20 @@ def compute_features_bulk(ohlcv: pd.DataFrame, market_cap: Optional[float] = Non
    # 26. Mean reversion signal: oversold RSI + price below lower Bollinger
    features["mean_reversion_signal"] = (
        (100 - features["rsi_14"]) / 100  # inversed RSI (higher = more oversold)
-    ) * (1 - features["bb_position"].clip(0, 1))  # below lower band amplifies signal
+    ) * (
        1 - features["bb_position"].clip(0, 1)
    )  # below lower band amplifies signal
    # 27. Breakout signal: above upper BB + high volume ratio
-    features["breakout_signal"] = (
+    features["breakout_signal"] = features["bb_position"].clip(0, 2) * features["volume_ratio_20d"]
        features["bb_position"].clip(0, 2) * features["volume_ratio_20d"]
    )
    # 28. MACD strength: histogram normalized by volatility
    features["macd_strength"] = features["macd_hist"] / features["atr_pct"].replace(0, np.nan)
    # 29. Return/Volatility ratio: Sharpe-like metric
-    features["return_volatility_ratio"] = features["return_5d"] / features["atr_pct"].replace(0, np.nan)
+    features["return_volatility_ratio"] = features["return_5d"] / features["atr_pct"].replace(
        0, np.nan
    )
    # 30. Trend-momentum composite score
    features["trend_momentum_score"] = (
--- a/tradingagents/ml/predictor.py
+++ b/tradingagents/ml/predictor.py
@ -9,7 +9,7 @@ from __future__ import annotations
 import os
 import pickle
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional
 import numpy as np
 import pandas as pd
--- a/tradingagents/tools/registry.py
+++ b/tradingagents/tools/registry.py
@ -12,8 +12,6 @@ Adding a new tool: Just add one entry here, everything else is auto-generated.
 from typing import Any, Dict, List, Optional
 from tradingagents.utils.logger import get_logger
 from tradingagents.dataflows.alpha_vantage import (
    get_balance_sheet as get_alpha_vantage_balance_sheet,
 )
@ -105,6 +103,7 @@ from tradingagents.dataflows.y_finance import (
 from tradingagents.dataflows.y_finance import (
    validate_tickers_batch as validate_tickers_batch_yfinance,
 )
 from tradingagents.utils.logger import get_logger
 logger = get_logger(__name__)
--- a/tradingagents/ui/pages/performance.py
+++ b/tradingagents/ui/pages/performance.py
@ -33,7 +33,9 @@ def render() -> None:
    # Check if data is available
    if not strategy_metrics:
-        st.warning("No strategy performance data available. Run performance tracking to generate data.")
+        st.warning(
            "No strategy performance data available. Run performance tracking to generate data."
        )
        return
    # Strategy Performance section
--- a/tradingagents/ui/pages/todays_picks.py
+++ b/tradingagents/ui/pages/todays_picks.py
@ -66,8 +66,7 @@ def render():
    with col1:
        pipelines = list(
            set(
-                (r.get("pipeline") or r.get("strategy_match") or "unknown")
+                (r.get("pipeline") or r.get("strategy_match") or "unknown") for r in recommendations
                for r in recommendations
            )
        )
        pipeline_filter = st.multiselect("Pipeline", pipelines, default=pipelines)