chore: linter formatting + ML scanner logging, prompt control, ranker reasoning

- Add ML signal scanner results table logging
- Add log_prompts_console config flag for prompt visibility control
- Expand ranker investment thesis to 4-6 sentence structured reasoning
- Linter auto-formatting across modified files

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Youssef Aitousarrah 2026-02-09 23:04:38 -08:00
parent 43bdd6de11
commit cb5ae49501
29 changed files with 1368 additions and 126 deletions

View File

@ -1,3 +1,4 @@
-e .
typing-extensions
langchain-openai
langchain-experimental

View File

@ -166,7 +166,9 @@ def analyze_insider_transactions(ticker: str, save_csv: bool = False, output_dir
if pd.notna(row["Value"]) and row["Value"] > 0
else f"{'N/A':>16}"
)
logger.info(f" {row['Transaction']:15} | {row['Shares']:>12,.0f} shares | {value_str}")
logger.info(
f" {row['Transaction']:15} | {row['Shares']:>12,.0f} shares | {value_str}"
)
# ============================================================
# OVERALL SENTIMENT
@ -206,12 +208,16 @@ def analyze_insider_transactions(ticker: str, save_csv: bool = False, output_dir
)
logger.info(f"Total Sales: {sales_count:>5} transactions | ${total_sales:>15,.0f}")
logger.info(f"Total Purchases: {purchases_count:>5} transactions | ${total_purchases:>15,.0f}")
logger.info(
f"Total Purchases: {purchases_count:>5} transactions | ${total_purchases:>15,.0f}"
)
if sentiment == "BULLISH":
logger.info(f"\n⚡ BULLISH: Insiders are net BUYERS (${net_value:,.0f} net buying)")
elif sentiment == "BEARISH":
logger.info(f"\n⚠️ BEARISH: Significant insider SELLING (${-net_value:,.0f} net selling)")
logger.info(
f"\n⚠️ BEARISH: Significant insider SELLING (${-net_value:,.0f} net selling)"
)
elif sentiment == "SLIGHTLY_BEARISH":
logger.info(
f"\n⚠️ SLIGHTLY BEARISH: More selling than buying (${-net_value:,.0f} net selling)"
@ -269,7 +275,9 @@ if __name__ == "__main__":
)
logger.info("Example: python analyze_insider_transactions.py AAPL TSLA NVDA")
logger.info(" python analyze_insider_transactions.py AAPL --csv")
logger.info(" python analyze_insider_transactions.py AAPL --csv --output-dir ./output")
logger.info(
" python analyze_insider_transactions.py AAPL --csv --output-dir ./output"
)
sys.exit(1)
# Parse arguments

View File

@ -18,7 +18,6 @@ import sys
import time
from pathlib import Path
import numpy as np
import pandas as pd
# Add project root to path
@ -40,35 +39,210 @@ logger = get_logger(__name__)
# Can be overridden via --ticker-file
DEFAULT_TICKERS = [
# Mega-cap tech
"AAPL", "MSFT", "GOOGL", "AMZN", "NVDA", "META", "TSLA", "AVGO", "ORCL", "CRM",
"AMD", "INTC", "CSCO", "ADBE", "NFLX", "QCOM", "TXN", "AMAT", "MU", "LRCX",
"KLAC", "MRVL", "SNPS", "CDNS", "PANW", "CRWD", "FTNT", "NOW", "UBER", "ABNB",
"AAPL",
"MSFT",
"GOOGL",
"AMZN",
"NVDA",
"META",
"TSLA",
"AVGO",
"ORCL",
"CRM",
"AMD",
"INTC",
"CSCO",
"ADBE",
"NFLX",
"QCOM",
"TXN",
"AMAT",
"MU",
"LRCX",
"KLAC",
"MRVL",
"SNPS",
"CDNS",
"PANW",
"CRWD",
"FTNT",
"NOW",
"UBER",
"ABNB",
# Financials
"JPM", "BAC", "WFC", "GS", "MS", "C", "SCHW", "BLK", "AXP", "USB",
"PNC", "TFC", "COF", "BK", "STT", "FITB", "HBAN", "RF", "CFG", "KEY",
"JPM",
"BAC",
"WFC",
"GS",
"MS",
"C",
"SCHW",
"BLK",
"AXP",
"USB",
"PNC",
"TFC",
"COF",
"BK",
"STT",
"FITB",
"HBAN",
"RF",
"CFG",
"KEY",
# Healthcare
"UNH", "JNJ", "LLY", "PFE", "ABBV", "MRK", "TMO", "ABT", "DHR", "BMY",
"AMGN", "GILD", "ISRG", "VRTX", "REGN", "MDT", "SYK", "BSX", "EW", "ZTS",
"UNH",
"JNJ",
"LLY",
"PFE",
"ABBV",
"MRK",
"TMO",
"ABT",
"DHR",
"BMY",
"AMGN",
"GILD",
"ISRG",
"VRTX",
"REGN",
"MDT",
"SYK",
"BSX",
"EW",
"ZTS",
# Consumer
"WMT", "PG", "KO", "PEP", "COST", "MCD", "NKE", "SBUX", "TGT", "LOW",
"HD", "TJX", "ROST", "DG", "DLTR", "EL", "CL", "KMB", "GIS", "K",
"WMT",
"PG",
"KO",
"PEP",
"COST",
"MCD",
"NKE",
"SBUX",
"TGT",
"LOW",
"HD",
"TJX",
"ROST",
"DG",
"DLTR",
"EL",
"CL",
"KMB",
"GIS",
"K",
# Energy
"XOM", "CVX", "COP", "EOG", "SLB", "MPC", "PSX", "VLO", "OXY", "DVN",
"HAL", "FANG", "HES", "BKR", "KMI", "WMB", "OKE", "ET", "TRGP", "LNG",
"XOM",
"CVX",
"COP",
"EOG",
"SLB",
"MPC",
"PSX",
"VLO",
"OXY",
"DVN",
"HAL",
"FANG",
"HES",
"BKR",
"KMI",
"WMB",
"OKE",
"ET",
"TRGP",
"LNG",
# Industrials
"CAT", "DE", "UNP", "UPS", "HON", "RTX", "BA", "LMT", "GD", "NOC",
"GE", "MMM", "EMR", "ITW", "PH", "ROK", "ETN", "SWK", "CMI", "PCAR",
"CAT",
"DE",
"UNP",
"UPS",
"HON",
"RTX",
"BA",
"LMT",
"GD",
"NOC",
"GE",
"MMM",
"EMR",
"ITW",
"PH",
"ROK",
"ETN",
"SWK",
"CMI",
"PCAR",
# Materials & Utilities
"LIN", "APD", "ECL", "SHW", "DD", "NEM", "FCX", "VMC", "MLM", "NUE",
"NEE", "DUK", "SO", "D", "AEP", "EXC", "SRE", "XEL", "WEC", "ES",
"LIN",
"APD",
"ECL",
"SHW",
"DD",
"NEM",
"FCX",
"VMC",
"MLM",
"NUE",
"NEE",
"DUK",
"SO",
"D",
"AEP",
"EXC",
"SRE",
"XEL",
"WEC",
"ES",
# REITs & Telecom
"AMT", "PLD", "CCI", "EQIX", "SPG", "O", "PSA", "DLR", "WELL", "AVB",
"T", "VZ", "TMUS", "CHTR", "CMCSA",
"AMT",
"PLD",
"CCI",
"EQIX",
"SPG",
"O",
"PSA",
"DLR",
"WELL",
"AVB",
"T",
"VZ",
"TMUS",
"CHTR",
"CMCSA",
# High-volatility / popular retail
"COIN", "MARA", "RIOT", "PLTR", "SOFI", "HOOD", "RBLX", "SNAP", "PINS", "SQ",
"SHOP", "SE", "ROKU", "DKNG", "PENN", "WYNN", "MGM", "LVS", "DASH", "TTD",
"COIN",
"MARA",
"RIOT",
"PLTR",
"SOFI",
"HOOD",
"RBLX",
"SNAP",
"PINS",
"SQ",
"SHOP",
"SE",
"ROKU",
"DKNG",
"PENN",
"WYNN",
"MGM",
"LVS",
"DASH",
"TTD",
# Biotech
"MRNA", "BNTX", "BIIB", "SGEN", "ALNY", "BMRN", "EXAS", "DXCM", "HZNP", "INCY",
"MRNA",
"BNTX",
"BIIB",
"SGEN",
"ALNY",
"BMRN",
"EXAS",
"DXCM",
"HZNP",
"INCY",
]
OUTPUT_DIR = Path("data/ml")
@ -221,10 +395,16 @@ def build_dataset(
logger.info(f"\n{'='*60}")
logger.info(f"Dataset built: {len(dataset)} total samples from {len(all_data)} tickers")
logger.info(f"Label distribution:")
logger.info(f" WIN (+1): {int((dataset['label'] == 1).sum()):>7} ({(dataset['label'] == 1).mean()*100:.1f}%)")
logger.info(f" LOSS (-1): {int((dataset['label'] == -1).sum()):>7} ({(dataset['label'] == -1).mean()*100:.1f}%)")
logger.info(f" TIMEOUT: {int((dataset['label'] == 0).sum()):>7} ({(dataset['label'] == 0).mean()*100:.1f}%)")
logger.info("Label distribution:")
logger.info(
f" WIN (+1): {int((dataset['label'] == 1).sum()):>7} ({(dataset['label'] == 1).mean()*100:.1f}%)"
)
logger.info(
f" LOSS (-1): {int((dataset['label'] == -1).sum()):>7} ({(dataset['label'] == -1).mean()*100:.1f}%)"
)
logger.info(
f" TIMEOUT: {int((dataset['label'] == 0).sum()):>7} ({(dataset['label'] == 0).mean()*100:.1f}%)"
)
logger.info(f"Features: {len(FEATURE_COLUMNS)}")
logger.info(f"{'='*60}")
@ -233,12 +413,20 @@ def build_dataset(
def main():
parser = argparse.ArgumentParser(description="Build ML training dataset")
parser.add_argument("--stocks", type=int, default=None, help="Limit to N stocks from default universe")
parser.add_argument("--ticker-file", type=str, default=None, help="File with tickers (one per line)")
parser.add_argument(
"--stocks", type=int, default=None, help="Limit to N stocks from default universe"
)
parser.add_argument(
"--ticker-file", type=str, default=None, help="File with tickers (one per line)"
)
parser.add_argument("--start", type=str, default="2022-01-01", help="Start date (YYYY-MM-DD)")
parser.add_argument("--end", type=str, default="2025-12-31", help="End date (YYYY-MM-DD)")
parser.add_argument("--profit-target", type=float, default=0.05, help="Profit target fraction (default: 0.05)")
parser.add_argument("--stop-loss", type=float, default=0.03, help="Stop loss fraction (default: 0.03)")
parser.add_argument(
"--profit-target", type=float, default=0.05, help="Profit target fraction (default: 0.05)"
)
parser.add_argument(
"--stop-loss", type=float, default=0.03, help="Stop loss fraction (default: 0.03)"
)
parser.add_argument("--holding-days", type=int, default=7, help="Max holding days (default: 7)")
parser.add_argument("--output", type=str, default=None, help="Output parquet path")
args = parser.parse_args()
@ -246,7 +434,9 @@ def main():
# Determine ticker list
if args.ticker_file:
with open(args.ticker_file) as f:
tickers = [line.strip().upper() for line in f if line.strip() and not line.startswith("#")]
tickers = [
line.strip().upper() for line in f if line.strip() and not line.startswith("#")
]
logger.info(f"Loaded {len(tickers)} tickers from {args.ticker_file}")
else:
tickers = DEFAULT_TICKERS

View File

@ -11,7 +11,6 @@ This script creates memory sets optimized for:
import os
import sys
from pathlib import Path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

View File

@ -17,7 +17,6 @@ import json
import os
import sys
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List
# Add parent directory to path

View File

@ -80,13 +80,16 @@ def time_split(
if max_train_samples is not None and len(train) > max_train_samples:
train = train.sort_values("date").tail(max_train_samples)
logger.info(
f"Limiting training samples to most recent {max_train_samples} "
f"before {val_start}"
f"Limiting training samples to most recent {max_train_samples} " f"before {val_start}"
)
logger.info(f"Time-based split at {val_start}:")
logger.info(f" Train: {len(train)} samples ({train['date'].min().date()} to {train['date'].max().date()})")
logger.info(f" Val: {len(val)} samples ({val['date'].min().date()} to {val['date'].max().date()})")
logger.info(
f" Train: {len(train)} samples ({train['date'].min().date()} to {train['date'].max().date()})"
)
logger.info(
f" Val: {len(val)} samples ({val['date'].min().date()} to {val['date'].max().date()})"
)
X_train = train[FEATURE_COLUMNS].values
y_train = train["label"].values.astype(int)
@ -152,8 +155,12 @@ def train_lightgbm(X_train, y_train, X_val, y_val):
class_weight = {c: total / (n_classes * count) for c, count in class_counts.items()}
sample_weights = np.array([class_weight[y] for y in y_train_mapped])
train_data = lgb.Dataset(X_train, label=y_train_mapped, weight=sample_weights, feature_name=FEATURE_COLUMNS)
val_data = lgb.Dataset(X_val, label=y_val_mapped, feature_name=FEATURE_COLUMNS, reference=train_data)
train_data = lgb.Dataset(
X_train, label=y_train_mapped, weight=sample_weights, feature_name=FEATURE_COLUMNS
)
val_data = lgb.Dataset(
X_val, label=y_val_mapped, feature_name=FEATURE_COLUMNS, reference=train_data
)
params = {
"objective": "multiclass",
@ -209,7 +216,8 @@ def evaluate(model, X_val, y_val, model_type: str) -> dict:
accuracy = accuracy_score(y_val, y_pred)
report = classification_report(
y_val, y_pred,
y_val,
y_pred,
target_names=["LOSS (-1)", "TIMEOUT (0)", "WIN (+1)"],
output_dict=True,
)
@ -253,13 +261,21 @@ def evaluate(model, X_val, y_val, model_type: str) -> dict:
# Top decile (top 10% by P(WIN)) — most actionable metric
top_decile_threshold = np.percentile(win_probs_all, 90)
top_decile_mask = win_probs_all >= top_decile_threshold
top_decile_win_rate = float((y_val[top_decile_mask] == 1).mean()) if top_decile_mask.sum() > 0 else 0.0
top_decile_loss_rate = float((y_val[top_decile_mask] == -1).mean()) if top_decile_mask.sum() > 0 else 0.0
top_decile_win_rate = (
float((y_val[top_decile_mask] == 1).mean()) if top_decile_mask.sum() > 0 else 0.0
)
top_decile_loss_rate = (
float((y_val[top_decile_mask] == -1).mean()) if top_decile_mask.sum() > 0 else 0.0
)
metrics = {
"model_type": model_type,
"accuracy": round(accuracy, 4),
"per_class": {k: {kk: round(vv, 4) for kk, vv in v.items()} for k, v in report.items() if isinstance(v, dict)},
"per_class": {
k: {kk: round(vv, 4) for kk, vv in v.items()}
for k, v in report.items()
if isinstance(v, dict)
},
"confusion_matrix": cm.tolist(),
"avg_win_prob_for_actual_wins": round(avg_win_prob_for_actual_wins, 4),
"high_confidence_win_precision": round(high_conf_precision, 4),
@ -276,25 +292,31 @@ def evaluate(model, X_val, y_val, model_type: str) -> dict:
logger.info(f"\n{'='*60}")
logger.info(f"Model: {model_type}")
logger.info(f"Overall Accuracy: {accuracy:.1%}")
logger.info(f"\nPer-class metrics:")
logger.info("\nPer-class metrics:")
logger.info(f"{'':>15} {'Precision':>10} {'Recall':>10} {'F1':>10} {'Support':>10}")
for label, name in [(-1, "LOSS"), (0, "TIMEOUT"), (1, "WIN")]:
key = f"{name} ({label:+d})"
if key in report:
r = report[key]
logger.info(f"{name:>15} {r['precision']:>10.3f} {r['recall']:>10.3f} {r['f1-score']:>10.3f} {r['support']:>10.0f}")
logger.info(
f"{name:>15} {r['precision']:>10.3f} {r['recall']:>10.3f} {r['f1-score']:>10.3f} {r['support']:>10.0f}"
)
logger.info(f"\nConfusion Matrix (rows=actual, cols=predicted):")
logger.info("\nConfusion Matrix (rows=actual, cols=predicted):")
logger.info(f"{'':>10} {'LOSS':>8} {'TIMEOUT':>8} {'WIN':>8}")
for i, name in enumerate(["LOSS", "TIMEOUT", "WIN"]):
logger.info(f"{name:>10} {cm[i][0]:>8} {cm[i][1]:>8} {cm[i][2]:>8}")
logger.info(f"\nWin-class insights:")
logger.info("\nWin-class insights:")
logger.info(f" Avg P(WIN) for actual winners: {avg_win_prob_for_actual_wins:.1%}")
logger.info(f" High-confidence (>60%) precision: {high_conf_precision:.1%} ({high_conf_count} samples)")
logger.info(
f" High-confidence (>60%) precision: {high_conf_precision:.1%} ({high_conf_count} samples)"
)
logger.info("\nCalibration (does higher P(WIN) = more actual wins?):")
logger.info(f"{'Quintile':>10} {'Avg P(WIN)':>12} {'Actual WIN%':>12} {'Actual LOSS%':>13} {'Count':>8}")
logger.info(
f"{'Quintile':>10} {'Avg P(WIN)':>12} {'Actual WIN%':>12} {'Actual LOSS%':>13} {'Count':>8}"
)
for q_name, q_data in calibration.items():
logger.info(
f"{q_name:>10} {q_data['mean_predicted_win_prob']:>12.1%} "
@ -304,7 +326,9 @@ def evaluate(model, X_val, y_val, model_type: str) -> dict:
logger.info("\nTop decile (top 10% by P(WIN)):")
logger.info(f" Threshold: P(WIN) >= {top_decile_threshold:.1%}")
logger.info(f" Actual win rate: {top_decile_win_rate:.1%} ({int(top_decile_mask.sum())} samples)")
logger.info(
f" Actual win rate: {top_decile_win_rate:.1%} ({int(top_decile_mask.sum())} samples)"
)
logger.info(f" Actual loss rate: {top_decile_loss_rate:.1%}")
baseline_win = float((y_val == 1).mean())
logger.info(f" Baseline win rate: {baseline_win:.1%}")
@ -318,12 +342,25 @@ def evaluate(model, X_val, y_val, model_type: str) -> dict:
def main():
parser = argparse.ArgumentParser(description="Train ML model for win probability")
parser.add_argument("--dataset", type=str, default="data/ml/training_dataset.parquet")
parser.add_argument("--model", type=str, choices=["tabpfn", "lightgbm", "auto"], default="auto",
help="Model type (auto tries TabPFN first, falls back to LightGBM)")
parser.add_argument("--val-start", type=str, default="2024-07-01",
help="Validation split date (default: 2024-07-01)")
parser.add_argument("--max-train-samples", type=int, default=None,
help="Limit training samples to the most recent N before val-start")
parser.add_argument(
"--model",
type=str,
choices=["tabpfn", "lightgbm", "auto"],
default="auto",
help="Model type (auto tries TabPFN first, falls back to LightGBM)",
)
parser.add_argument(
"--val-start",
type=str,
default="2024-07-01",
help="Validation split date (default: 2024-07-01)",
)
parser.add_argument(
"--max-train-samples",
type=int,
default=None,
help="Limit training samples to the most recent N before val-start",
)
parser.add_argument("--output-dir", type=str, default="data/ml")
args = parser.parse_args()

File diff suppressed because one or more lines are too long

View File

@ -68,6 +68,8 @@ Choose BUY or SELL (no HOLD). If the edge is unclear, pick the less-bad side and
response_text = parse_llm_response(response.content)
argument = f"Neutral Analyst: {response_text}"
return {"risk_debate_state": update_risk_debate_state(risk_debate_state, argument, "Neutral")}
return {
"risk_debate_state": update_risk_debate_state(risk_debate_state, argument, "Neutral")
}
return neutral_node

View File

@ -1,4 +1,4 @@
from typing import Any, Callable, Dict, List
from typing import Any, Callable, Dict
from langchain_core.messages import HumanMessage, RemoveMessage
@ -95,9 +95,7 @@ def update_risk_debate_state(
"count": debate_state["count"] + 1,
}
# Append to the speaker's own history and set their current response
new_state[f"{role_key}_history"] = (
debate_state.get(f"{role_key}_history", "") + "\n" + argument
)
new_state[f"{role_key}_history"] = debate_state.get(f"{role_key}_history", "") + "\n" + argument
new_state[f"current_{role_key}_response"] = argument
return new_state

View File

@ -203,7 +203,9 @@ class HistoricalMemoryBuilder:
except (IndexError, KeyError):
continue
logger.info(f"Found {len([m for m in high_movers if m['ticker'] == ticker])} moves for {ticker}")
logger.info(
f"Found {len([m for m in high_movers if m['ticker'] == ticker])} moves for {ticker}"
)
else:
logger.debug(f"{ticker}: No significant moves")
@ -440,7 +442,9 @@ class HistoricalMemoryBuilder:
high_movers = self.find_high_movers(tickers, start_date, end_date, min_move_pct)
if not high_movers:
logger.warning("⚠️ No high movers found. Try a different date range or lower threshold.")
logger.warning(
"⚠️ No high movers found. Try a different date range or lower threshold."
)
return {}
# Step 1.5: Sample/filter high movers based on strategy
@ -449,7 +453,9 @@ class HistoricalMemoryBuilder:
logger.info(f"📊 Sampling Strategy: {sample_strategy}")
logger.info(f"Total high movers found: {len(high_movers)}")
logger.info(f"Samples to analyze: {len(sampled_movers)}")
logger.info(f"Estimated runtime: ~{len(sampled_movers) * len(analysis_windows) * 2} minutes")
logger.info(
f"Estimated runtime: ~{len(sampled_movers) * len(analysis_windows) * 2} minutes"
)
# Initialize memory stores
agent_memories = {

View File

@ -11,6 +11,7 @@ from pathlib import Path
from typing import Annotated, Dict, List, Optional, Union
import pandas as pd
from tradingagents.dataflows.y_finance import _get_ticker_universe, get_ticker_history
from tradingagents.utils.logger import get_logger
@ -460,7 +461,9 @@ def download_volume_data(
logger.info("Skipping cache (use_cache=False), forcing fresh download...")
# Download fresh data
logger.info(f"Downloading {history_period_days} days of volume data for {len(tickers)} tickers...")
logger.info(
f"Downloading {history_period_days} days of volume data for {len(tickers)} tickers..."
)
raw_data = {}
with ThreadPoolExecutor(max_workers=15) as executor:

View File

@ -349,7 +349,9 @@ class DiscoveryAnalytics:
indent=2,
)
logger.info(f" 📊 Saved {len(enriched_rankings)} recommendations for tracking: {output_file}")
logger.info(
f" 📊 Saved {len(enriched_rankings)} recommendations for tracking: {output_file}"
)
def save_discovery_results(self, state: dict, trade_date: str, config: Dict[str, Any]):
"""Save full discovery results and tool logs."""

View File

@ -158,9 +158,7 @@ class DiscoveryConfig:
max_candidates_to_analyze=disc.get(
"max_candidates_to_analyze", _rd.max_candidates_to_analyze
),
analyze_all_candidates=disc.get(
"analyze_all_candidates", _rd.analyze_all_candidates
),
analyze_all_candidates=disc.get("analyze_all_candidates", _rd.analyze_all_candidates),
final_recommendations=disc.get("final_recommendations", _rd.final_recommendations),
truncate_ranking_context=disc.get(
"truncate_ranking_context", _rd.truncate_ranking_context
@ -189,12 +187,8 @@ class DiscoveryConfig:
# Logging
logging_cfg = LoggingConfig(
log_tool_calls=disc.get("log_tool_calls", _ld.log_tool_calls),
log_tool_calls_console=disc.get(
"log_tool_calls_console", _ld.log_tool_calls_console
),
log_prompts_console=disc.get(
"log_prompts_console", _ld.log_prompts_console
),
log_tool_calls_console=disc.get("log_tool_calls_console", _ld.log_tool_calls_console),
log_prompts_console=disc.get("log_prompts_console", _ld.log_prompts_console),
tool_log_max_chars=disc.get("tool_log_max_chars", _ld.tool_log_max_chars),
tool_log_exclude=disc.get("tool_log_exclude", _ld.tool_log_exclude),
)

View File

@ -185,7 +185,9 @@ class CandidateFilter:
# Print consolidated list of failed tickers
if failed_tickers:
logger.warning(f"⚠️ {len(failed_tickers)} tickers failed data fetch (possibly delisted)")
logger.warning(
f"⚠️ {len(failed_tickers)} tickers failed data fetch (possibly delisted)"
)
if len(failed_tickers) <= 10:
logger.warning(f"{', '.join(failed_tickers)}")
else:
@ -501,7 +503,9 @@ class CandidateFilter:
)
# Extract short interest from fundamentals (no extra API call)
short_pct_raw = fund.get("ShortPercentOfFloat", fund.get("ShortPercentFloat"))
short_pct_raw = fund.get(
"ShortPercentOfFloat", fund.get("ShortPercentFloat")
)
short_interest_pct = None
if short_pct_raw and short_pct_raw != "N/A":
try:
@ -747,9 +751,7 @@ class CandidateFilter:
logger.info(f" ❌ No data available: {filtered_reasons['no_data']}")
logger.info(f" ✅ Passed filters: {len(filtered_candidates)}")
def _predict_ml(
self, cand: Dict[str, Any], ticker: str, end_date: str
) -> Any:
def _predict_ml(self, cand: Dict[str, Any], ticker: str, end_date: str) -> Any:
"""Run ML win probability prediction for a candidate."""
# Lazy-load predictor on first call
if not self._ml_predictor_loaded:
@ -767,10 +769,10 @@ class CandidateFilter:
return None
try:
from tradingagents.dataflows.y_finance import download_history
from tradingagents.ml.feature_engineering import (
compute_features_single,
)
from tradingagents.dataflows.y_finance import download_history
# Fetch OHLCV for feature computation (needs ~210 rows of history)
ohlcv = download_history(

View File

@ -52,7 +52,9 @@ class StockRanking(BaseModel):
strategy_match: str = Field(description="Strategy that matched")
final_score: int = Field(description="Score 0-100")
confidence: int = Field(description="Confidence 1-10")
reason: str = Field(description="Detailed investment thesis (4-6 sentences) defending the trade with specific catalysts, risk/reward, and timing")
reason: str = Field(
description="Detailed investment thesis (4-6 sentences) defending the trade with specific catalysts, risk/reward, and timing"
)
description: str = Field(description="Company description")

View File

@ -5,10 +5,10 @@ from . import (
earnings_calendar, # noqa: F401
insider_buying, # noqa: F401
market_movers, # noqa: F401
ml_signal, # noqa: F401
options_flow, # noqa: F401
reddit_dd, # noqa: F401
reddit_trending, # noqa: F401
semantic_news, # noqa: F401
volume_accumulation, # noqa: F401
ml_signal, # noqa: F401
)

View File

@ -7,7 +7,6 @@ Default: data/tickers.txt. Override via config: discovery.scanners.ml_signal.tic
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Any, Dict, List, Optional
import numpy as np
import pandas as pd
from tradingagents.dataflows.discovery.scanner_registry import SCANNER_REGISTRY, BaseScanner
@ -109,7 +108,9 @@ class MLSignalScanner(BaseScanner):
# Log individual candidate results
if candidates:
header = f"{'Ticker':<8} {'P(WIN)':>8} {'P(LOSS)':>9} {'Prediction':>12} {'Priority':>10}"
header = (
f"{'Ticker':<8} {'P(WIN)':>8} {'P(LOSS)':>9} {'Prediction':>12} {'Priority':>10}"
)
separator = "-" * len(header)
lines = ["\n ML Signal Scanner Results:", f" {header}", f" {separator}"]
for c in candidates:
@ -143,7 +144,9 @@ class MLSignalScanner(BaseScanner):
try:
from tradingagents.dataflows.y_finance import download_history
logger.info(f"Batch-downloading {len(self.universe)} tickers ({self.lookback_period})...")
logger.info(
f"Batch-downloading {len(self.universe)} tickers ({self.lookback_period})..."
)
# yfinance batch download — single HTTP request for all tickers
raw = download_history(

View File

@ -4,9 +4,8 @@ from typing import Annotated, Any, Dict
import finnhub
from dotenv import load_dotenv
from tradingagents.utils.logger import get_logger
from tradingagents.config import config
from tradingagents.utils.logger import get_logger
load_dotenv()

View File

@ -7,11 +7,11 @@ import pandas as pd
from dateutil.relativedelta import relativedelta
from tqdm import tqdm
from tradingagents.utils.logger import get_logger
from .config import DATA_DIR
from .reddit_utils import fetch_top_from_category
from tradingagents.utils.logger import get_logger
logger = get_logger(__name__)

View File

@ -807,11 +807,15 @@ Return as JSON with "news" array."""
logger.info(f"Found {len(google_news)} items from Google News")
min_date, max_date = self._publish_date_range(google_news)
if min_date:
logger.debug(f"Min publish date (Google News): {min_date.strftime('%Y-%m-%d %H:%M')}")
logger.debug(
f"Min publish date (Google News): {min_date.strftime('%Y-%m-%d %H:%M')}"
)
else:
logger.debug("Min publish date (Google News): N/A")
if max_date:
logger.debug(f"Max publish date (Google News): {max_date.strftime('%Y-%m-%d %H:%M')}")
logger.debug(
f"Max publish date (Google News): {max_date.strftime('%Y-%m-%d %H:%M')}"
)
else:
logger.debug("Max publish date (Google News): N/A")
@ -837,11 +841,15 @@ Return as JSON with "news" array."""
logger.info(f"Found {len(av_news)} items from Alpha Vantage")
min_date, max_date = self._publish_date_range(av_news)
if min_date:
logger.debug(f"Min publish date (Alpha Vantage): {min_date.strftime('%Y-%m-%d %H:%M')}")
logger.debug(
f"Min publish date (Alpha Vantage): {min_date.strftime('%Y-%m-%d %H:%M')}"
)
else:
logger.debug("Min publish date (Alpha Vantage): N/A")
if max_date:
logger.debug(f"Max publish date (Alpha Vantage): {max_date.strftime('%Y-%m-%d %H:%M')}")
logger.debug(
f"Max publish date (Alpha Vantage): {max_date.strftime('%Y-%m-%d %H:%M')}"
)
else:
logger.debug("Max publish date (Alpha Vantage): N/A")

View File

@ -493,7 +493,9 @@ Extract all stock ticker symbols mentioned in the post or comments."""
# Handle None result (Gemini blocked content despite safety settings)
if result is None:
logger.warning(f"⚠️ Content blocked for '{post['title'][:50]}...' - Skipping")
logger.warning(
f"⚠️ Content blocked for '{post['title'][:50]}...' - Skipping"
)
post["quality_score"] = 0
post["quality_reason"] = (
"Content blocked by LLM safety filter. "

View File

@ -286,9 +286,7 @@ class DiscoveryGraph:
else:
self._add_context(incoming_context, existing, prepend=False)
def _add_context(
self, new_context: str, candidate: Dict[str, Any], *, prepend: bool
) -> None:
def _add_context(self, new_context: str, candidate: Dict[str, Any], *, prepend: bool) -> None:
"""
Add context string to a candidate's context fields.
@ -492,7 +490,9 @@ class DiscoveryGraph:
try:
# Get result with per-scanner timeout
name, pipeline, candidates, error, scanner_logs = future.result(timeout=timeout_seconds)
name, pipeline, candidates, error, scanner_logs = future.result(
timeout=timeout_seconds
)
# Initialize pipeline list if needed
if pipeline not in pipeline_candidates:

View File

@ -324,11 +324,7 @@ def _extract_close_series(data: Any) -> Any:
if isinstance(data.columns, pd.MultiIndex):
if "Close" in data.columns.get_level_values(0):
close_data = data["Close"]
series = (
close_data.iloc[:, 0]
if isinstance(close_data, pd.DataFrame)
else close_data
)
series = close_data.iloc[:, 0] if isinstance(close_data, pd.DataFrame) else close_data
elif "Close" in data.columns:
series = data["Close"]

View File

@ -14,7 +14,6 @@ from tradingagents.default_config import DEFAULT_CONFIG
# Import tools from new registry-based system
from tradingagents.tools.generator import get_agent_tools
from tradingagents.utils.logger import get_logger
from .conditional_logic import ConditionalLogic

View File

@ -132,9 +132,7 @@ def compute_features_bulk(ohlcv: pd.DataFrame, market_cap: Optional[float] = Non
# 7. Position within Bollinger Bands (0 = lower band, 1 = upper band)
bb_range = bb_upper - bb_lower
features["bb_position"] = np.where(
bb_range > 0, (close - bb_lower) / bb_range, 0.5
)
features["bb_position"] = np.where(bb_range > 0, (close - bb_lower) / bb_range, 0.5)
# 8. ADX (trend strength)
features["adx"] = ss["dx_14"]
@ -181,7 +179,9 @@ def compute_features_bulk(ohlcv: pd.DataFrame, market_cap: Optional[float] = Non
# 21. Momentum × Compression: strong trend direction + tight Bollinger = breakout setup
# High absolute MACD + low BB width = coiled spring
features["momentum_x_compression"] = features["macd_hist"].abs() / features["bb_width_pct"].replace(0, np.nan)
features["momentum_x_compression"] = features["macd_hist"].abs() / features[
"bb_width_pct"
].replace(0, np.nan)
# 22. RSI momentum: 5-day rate of change of RSI (acceleration of momentum)
features["rsi_momentum"] = features["rsi_14"] - features["rsi_14"].shift(5)
@ -190,7 +190,9 @@ def compute_features_bulk(ohlcv: pd.DataFrame, market_cap: Optional[float] = Non
features["volume_price_confirm"] = features["volume_ratio_5d"] * features["return_1d"]
# 24. Trend alignment: both SMAs agree (1 = aligned bullish, -1 = aligned bearish)
features["trend_alignment"] = np.sign(features["sma50_distance"]) * np.sign(features["sma200_distance"])
features["trend_alignment"] = np.sign(features["sma50_distance"]) * np.sign(
features["sma200_distance"]
)
# 25. Volatility regime: ATR percentile within rolling 60-day window (0-1)
atr_pct_series = features["atr_pct"]
@ -202,18 +204,20 @@ def compute_features_bulk(ohlcv: pd.DataFrame, market_cap: Optional[float] = Non
# 26. Mean reversion signal: oversold RSI + price below lower Bollinger
features["mean_reversion_signal"] = (
(100 - features["rsi_14"]) / 100 # inversed RSI (higher = more oversold)
) * (1 - features["bb_position"].clip(0, 1)) # below lower band amplifies signal
) * (
1 - features["bb_position"].clip(0, 1)
) # below lower band amplifies signal
# 27. Breakout signal: above upper BB + high volume ratio
features["breakout_signal"] = (
features["bb_position"].clip(0, 2) * features["volume_ratio_20d"]
)
features["breakout_signal"] = features["bb_position"].clip(0, 2) * features["volume_ratio_20d"]
# 28. MACD strength: histogram normalized by volatility
features["macd_strength"] = features["macd_hist"] / features["atr_pct"].replace(0, np.nan)
# 29. Return/Volatility ratio: Sharpe-like metric
features["return_volatility_ratio"] = features["return_5d"] / features["atr_pct"].replace(0, np.nan)
features["return_volatility_ratio"] = features["return_5d"] / features["atr_pct"].replace(
0, np.nan
)
# 30. Trend-momentum composite score
features["trend_momentum_score"] = (

View File

@ -9,7 +9,7 @@ from __future__ import annotations
import os
import pickle
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, List, Optional
import numpy as np
import pandas as pd

View File

@ -12,8 +12,6 @@ Adding a new tool: Just add one entry here, everything else is auto-generated.
from typing import Any, Dict, List, Optional
from tradingagents.utils.logger import get_logger
from tradingagents.dataflows.alpha_vantage import (
get_balance_sheet as get_alpha_vantage_balance_sheet,
)
@ -105,6 +103,7 @@ from tradingagents.dataflows.y_finance import (
from tradingagents.dataflows.y_finance import (
validate_tickers_batch as validate_tickers_batch_yfinance,
)
from tradingagents.utils.logger import get_logger
logger = get_logger(__name__)

View File

@ -33,7 +33,9 @@ def render() -> None:
# Check if data is available
if not strategy_metrics:
st.warning("No strategy performance data available. Run performance tracking to generate data.")
st.warning(
"No strategy performance data available. Run performance tracking to generate data."
)
return
# Strategy Performance section

View File

@ -66,8 +66,7 @@ def render():
with col1:
pipelines = list(
set(
(r.get("pipeline") or r.get("strategy_match") or "unknown")
for r in recommendations
(r.get("pipeline") or r.get("strategy_match") or "unknown") for r in recommendations
)
)
pipeline_filter = st.multiselect("Pipeline", pipelines, default=pipelines)