Modified evaluation logic for tradingagent to align with rule-based methods

This commit is contained in:
Quanliang Liu 2025-11-06 16:57:23 -06:00
parent e42a7edea3
commit ffff3050c8
5 changed files with 172 additions and 88 deletions

View File

@ -1,8 +1,16 @@
"""
Backtesting engine for TradingAgents and baseline strategies.
Both TradingAgents and rule-based strategies use identical return calculation logic:
1. Generate signals/actions: 1 (BUY), 0 (HOLD), -1 (SELL)
2. Convert actions to positions: 1 (long), 0 (flat)
3. Calculate returns: strategy_return = position.shift(1) * market_return
This ensures apples-to-apples comparison across all strategies.
"""
import pandas as pd
import numpy as np
from typing import Dict, List
from pathlib import Path
import json
@ -14,36 +22,37 @@ STD_FIELDS = {"Open", "High", "Low", "Close", "Adj Close", "Volume"}
class TradingAgentsBacktester:
"""Backtest engine for TradingAgents framework."""
def __init__(self, trading_agents_graph, initial_capital=100000):
def __init__(self, trading_agents_graph, initial_capital=100000, output_dir=None):
self.graph = trading_agents_graph
self.initial_capital = float(initial_capital)
self.name = "TradingAgents"
self.output_dir = output_dir
def backtest(self, ticker: str, start_date: str, end_date: str, data: pd.DataFrame) -> pd.DataFrame:
"""
Backtest TradingAgents using the same return calculation logic as rule-based strategies.
Process:
1. Collect signals (actions: 1=BUY, 0=HOLD, -1=SELL) for all dates
2. Convert actions to positions (0=flat, 1=long) using same logic as baselines
3. Calculate returns as: strategy_return = position.shift(1) * market_return
"""
# Restrict to window
df = data.loc[start_date:end_date].copy()
portfolio = pd.DataFrame(index=df.index)
portfolio["close"] = df["Close"]
if "Volume" in df.columns:
portfolio["Volume"] = df["Volume"]
portfolio["signal"] = 0
portfolio["position"] = 0.0
portfolio["cash"] = self.initial_capital
portfolio["shares"] = 0.0
portfolio["portfolio_value"] = self.initial_capital
decisions: List[Dict] = []
signals = pd.Series(0, index=df.index, dtype=float)
print(f"\nRunning TradingAgents backtest on {ticker} from {start_date} to {end_date}")
print(f"Total trading days: {len(df)}")
print("-" * 80)
# Step 1: Collect all signals/decisions
for i, (date, row) in enumerate(df.iterrows()):
date_str = date.strftime("%Y-%m-%d")
price = float(row["Close"])
# Get decision
# Get decision from TradingAgents graph
try:
print(f"\n[{i+1}/{len(df)}] {date_str} ... ", end="")
final_state, decision = self.graph.propagate(ticker, date_str)
@ -56,51 +65,60 @@ class TradingAgentsBacktester:
signal = 0
decisions.append({"date": date_str, "decision": "ERROR", "signal": 0, "price": price, "error": str(e)})
# Previous day state
if i > 0:
prev_cash = float(portfolio["cash"].iloc[i - 1])
prev_shares = float(portfolio["shares"].iloc[i - 1])
prev_pos = float(portfolio["position"].iloc[i - 1])
else:
prev_cash = self.initial_capital
prev_shares = 0.0
prev_pos = 0.0
signals.loc[date] = signal
cash, shares, position = prev_cash, prev_shares, prev_pos
# Execute: BUY opens/keeps long with all cash; SELL closes to cash; HOLD keeps.
if signal == 1 and prev_pos <= 0:
# Go long full notional
shares = cash / price if price > 0 else 0.0
cash = 0.0
position = 1.0
elif signal == -1 and prev_pos > 0:
# Exit long to cash (no shorting here; paper's figs show short arrows,
# but transactions table is still long/flat in our public code)
cash = shares * price
shares = 0.0
position = 0.0
else:
# Hold current stance
position = prev_pos
portval = cash + shares * price
portfolio.loc[date, "signal"] = signal
portfolio.loc[date, "position"] = position
portfolio.loc[date, "cash"] = cash
portfolio.loc[date, "shares"] = shares
portfolio.loc[date, "portfolio_value"] = portval
# Returns
portfolio["market_return"] = portfolio["close"].pct_change().fillna(0.0)
portfolio["portfolio_return"] = portfolio["portfolio_value"].pct_change().fillna(0.0)
portfolio["strategy_return"] = portfolio["portfolio_return"]
portfolio["cumulative_return"] = (1.0 + portfolio["strategy_return"]).cumprod()
# Step 2: Convert actions to positions (same logic as baseline strategies)
position = self._actions_to_position(signals)
# Step 3: Calculate returns using standardized logic
close = pd.to_numeric(df["Close"], errors="coerce")
market_ret = close.pct_change().fillna(0.0)
exposure = position.shift(1).fillna(0.0) # Yesterday's position determines today's exposure
strat_ret = (exposure * market_ret).astype(float)
cumret = (1.0 + strat_ret).cumprod()
portval = self.initial_capital * cumret
# Build portfolio DataFrame with same structure as baseline strategies
portfolio = pd.DataFrame(index=df.index)
portfolio["action"] = signals # 1=BUY, 0=HOLD, -1=SELL
portfolio["position"] = position # 1=long, 0=flat
portfolio["close"] = close
if "Volume" in df.columns:
vol = df["Volume"]
if isinstance(vol, pd.DataFrame) and vol.shape[1] == 1:
vol = vol.iloc[:, 0]
if isinstance(vol, pd.Series):
portfolio["Volume"] = vol
portfolio["market_return"] = market_ret
portfolio["strategy_return"] = strat_ret
portfolio["cumulative_return"] = cumret
portfolio["portfolio_value"] = portval
portfolio["trade_delta"] = portfolio["position"].diff().fillna(0.0) # +1=buy, -1=sell
self._save_decisions_log(ticker, decisions, start_date, end_date)
return portfolio
@staticmethod
def _actions_to_position(actions: pd.Series) -> pd.Series:
"""
Convert action series to a long-only position series in {0,1}.
Same logic as baseline strategies for consistency.
"""
a = actions.astype(float).fillna(0.0).clip(-1, 1).values
pos = np.zeros_like(a, dtype=float)
for i in range(len(a)):
if i == 0:
pos[i] = 1.0 if a[i] > 0 else 0.0
else:
if a[i] > 0: # buy → long
pos[i] = 1.0
elif a[i] < 0: # sell → flat
pos[i] = 0.0
else: # hold → keep previous
pos[i] = pos[i-1]
return pd.Series(pos, index=actions.index, name="position")
def _parse_decision(self, decision: str) -> int:
"""
Parse decision to signal.
@ -117,12 +135,23 @@ class TradingAgentsBacktester:
return 0
def _save_decisions_log(self, ticker: str, decisions: List[Dict], start_date: str, end_date: str):
out = Path(f"eval_results/{ticker}/TradingAgents_backtest")
# Use output_dir if provided, otherwise use default
if self.output_dir:
out = Path(self.output_dir) / ticker / "TradingAgents"
else:
out = Path(f"eval_results/{ticker}/TradingAgents")
out.mkdir(parents=True, exist_ok=True)
fp = out / f"decisions_{start_date}_to_{end_date}.json"
with open(fp, "w") as f:
json.dump(decisions, f, indent=2)
print(f"\nDecisions log saved to: {fp}")
json.dump({
"strategy": "TradingAgents",
"ticker": ticker,
"start_date": start_date,
"end_date": end_date,
"total_days": len(decisions),
"decisions": decisions
}, f, indent=2)
print(f" ✓ Saved TradingAgents detailed decisions to: {fp}")
class BacktestEngine:

View File

@ -8,6 +8,7 @@ import sys
from pathlib import Path
from datetime import datetime
import pandas as pd
import json
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
@ -15,7 +16,6 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
from evaluation.baseline_strategies import get_all_baseline_strategies
from evaluation.backtest import BacktestEngine, TradingAgentsBacktester, load_stock_data, standardize_single_ticker
from evaluation.metrics import calculate_all_metrics, create_comparison_table, print_metrics
from evaluation.visualize import create_summary_report
from tradingagents.graph.trading_graph import TradingAgentsGraph
from tradingagents.default_config import DEFAULT_CONFIG
@ -28,6 +28,58 @@ def is_debugging() -> bool:
return False
def save_strategy_actions_to_json(
portfolio: pd.DataFrame,
strategy_name: str,
ticker: str,
start_date: str,
end_date: str,
output_dir: str
) -> None:
"""
Save daily actions from a strategy to a JSON file.
Args:
portfolio: Portfolio DataFrame with action, position, close, etc.
strategy_name: Name of the strategy
ticker: Stock ticker symbol
start_date: Start date of backtest
end_date: End date of backtest
output_dir: Directory to save the JSON file
"""
out = Path(output_dir) / ticker / strategy_name
out.mkdir(parents=True, exist_ok=True)
# Build actions list with relevant daily info
actions = []
for date, row in portfolio.iterrows():
date_str = date.strftime("%Y-%m-%d")
action_record = {
"date": date_str,
"action": int(row["action"]) if pd.notna(row["action"]) else 0, # 1=BUY, 0=HOLD, -1=SELL
"position": int(row["position"]) if pd.notna(row["position"]) else 0, # 1=long, 0=flat
"close_price": float(row["close"]) if pd.notna(row["close"]) else None,
"portfolio_value": float(row["portfolio_value"]) if pd.notna(row["portfolio_value"]) else None,
"strategy_return": float(row["strategy_return"]) if pd.notna(row["strategy_return"]) else 0.0,
"cumulative_return": float(row["cumulative_return"]) if pd.notna(row["cumulative_return"]) else 1.0
}
actions.append(action_record)
# Save to JSON
fp = out / f"actions_{start_date}_to_{end_date}.json"
with open(fp, "w") as f:
json.dump({
"strategy": strategy_name,
"ticker": ticker,
"start_date": start_date,
"end_date": end_date,
"total_days": len(actions),
"actions": actions
}, f, indent=2)
print(f" ✓ Saved {strategy_name} actions to: {fp}")
def run_evaluation(
ticker: str,
start_date: str,
@ -72,6 +124,8 @@ def run_evaluation(
print(f"\nRunning {name}...", end=" ")
portfolio = engine.run_strategy(strategy, start_date, end_date)
print("✓ Complete")
# Save actions to JSON
save_strategy_actions_to_json(portfolio, name, ticker, start_date, end_date, output_dir)
except Exception as e:
print(f"✗ Failed: {e}")
@ -100,11 +154,14 @@ def run_evaluation(
debug=False,
config=cfg
)
ta_backtester = TradingAgentsBacktester(graph, initial_capital)
ta_backtester = TradingAgentsBacktester(graph, initial_capital, output_dir)
ta_portfolio = ta_backtester.backtest(ticker, start_date, end_date, data)
engine.results["TradingAgents"] = ta_portfolio
print("\n✓ TradingAgents backtest complete")
# Save TradingAgents actions to JSON (in consistent format with baselines)
save_strategy_actions_to_json(ta_portfolio, "TradingAgents", ticker, start_date, end_date, output_dir)
except Exception as e:
print(f"\n✗ TradingAgents failed: {e}")
@ -121,35 +178,15 @@ def run_evaluation(
all_metrics[name] = metrics
print_metrics(metrics, name)
comparison_df = create_comparison_table(all_metrics)
print("\n" + "="*80)
print("PERFORMANCE COMPARISON TABLE")
print("="*80)
print(comparison_df.to_string())
print("\n")
comparison_df.to_csv(out / f"{ticker}_comparison.csv")
print(f"Comparison table saved to: {out / f'{ticker}_comparison.csv'}")
# Visuals
print("\n" + "="*80)
print("STEP 5: Generating Visualizations")
print("="*80)
create_summary_report(ticker, engine.results, comparison_df, output_dir)
print("\n" + "="*80)
print("EVALUATION COMPLETE")
print("="*80)
print(f"\nResults saved to: {out}")
print(f" - Comparison table: {ticker}_comparison.csv")
print(f" - Cumulative returns plot: {ticker}_cumulative_returns.png")
print(f" - Metrics comparison: {ticker}_metrics_comparison.png")
if include_tradingagents and "TradingAgents" in engine.results:
print(f" - Transaction history: {ticker}_TradingAgents_transactions.png")
print(f" - Drawdown analysis: {ticker}_drawdown.png")
print(f"\nDaily actions JSON files saved for:")
for name in engine.results.keys():
print(f"{name}")
return engine.results, comparison_df
return engine.results, all_metrics
def main():
@ -158,10 +195,10 @@ def main():
parser.add_argument("--start-date", type=str, required=True, help="Start date (YYYY-MM-DD)")
parser.add_argument("--end-date", type=str, required=True, help="End date (YYYY-MM-DD)")
parser.add_argument("--capital", type=float, default=100000, help="Initial capital (default: 100000)")
parser.add_argument("--no-tradingagents", action="store_true", help="Skip TradingAgents")
parser.add_argument("--skip-tradingagents", action="store_true", help="Skip TradingAgents evaluation")
parser.add_argument("--output-dir", type=str, default=None, help="Output directory for results")
parser.add_argument("--deep-llm", type=str, default="gpt-4o-mini", help="Deep thinking LLM model")
parser.add_argument("--quick-llm", type=str, default="gpt-5-nano", help="Quick thinking LLM model")
parser.add_argument("--deep-llm", type=str, default="o4-mini", help="Deep thinking LLM model")
parser.add_argument("--quick-llm", type=str, default="gpt-4o-mini", help="Quick thinking LLM model")
parser.add_argument("--debate-rounds", type=int, default=1, help="Number of debate rounds (default: 1)")
# Used for debugging
@ -200,7 +237,7 @@ def main():
start_date=args.start_date,
end_date=args.end_date,
initial_capital=args.capital,
include_tradingagents=not args.no_tradingagents,
include_tradingagents=not args.skip_tradingagents,
output_dir=args.output_dir,
config=config
)

View File

@ -27,3 +27,4 @@ langchain-google-genai
matplotlib
seaborn
numpy
python-dotenv

View File

@ -1,4 +1,12 @@
import os
from pathlib import Path
from dotenv import load_dotenv
# Load environment variables from .env file
# Look for .env in the project root (parent of tradingagents directory)
project_root = Path(__file__).parent.parent
dotenv_path = project_root / ".env"
load_dotenv(dotenv_path=dotenv_path)
DEFAULT_CONFIG = {
"project_dir": os.path.abspath(os.path.join(os.path.dirname(__file__), ".")),
@ -13,6 +21,7 @@ DEFAULT_CONFIG = {
"deep_think_llm": "o4-mini",
"quick_think_llm": "gpt-4o-mini",
"backend_url": "https://api.openai.com/v1",
"openai_api_key": os.getenv("OPENAI_API_KEY"), # Load from .env file
# Debate and discussion settings
"max_debate_rounds": 1,
"max_risk_discuss_rounds": 1,

View File

@ -73,8 +73,16 @@ class TradingAgentsGraph:
# Initialize LLMs
if self.config["llm_provider"].lower() == "openai" or self.config["llm_provider"] == "ollama" or self.config["llm_provider"] == "openrouter":
self.deep_thinking_llm = ChatOpenAI(model=self.config["deep_think_llm"], base_url=self.config["backend_url"])
self.quick_thinking_llm = ChatOpenAI(model=self.config["quick_think_llm"], base_url=self.config["backend_url"])
self.deep_thinking_llm = ChatOpenAI(
model=self.config["deep_think_llm"],
base_url=self.config["backend_url"],
api_key=self.config.get("openai_api_key")
)
self.quick_thinking_llm = ChatOpenAI(
model=self.config["quick_think_llm"],
base_url=self.config["backend_url"],
api_key=self.config.get("openai_api_key")
)
elif self.config["llm_provider"].lower() == "anthropic":
self.deep_thinking_llm = ChatAnthropic(model=self.config["deep_think_llm"], base_url=self.config["backend_url"])
self.quick_thinking_llm = ChatAnthropic(model=self.config["quick_think_llm"], base_url=self.config["backend_url"])