TradingAgents/tests/test_compare_hypothesis.py

136 lines
4.6 KiB
Python

"""Tests for the hypothesis comparison script."""
import json
import sys
from datetime import date, timedelta
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
sys.path.insert(0, str(Path(__file__).parent.parent))
from scripts.compare_hypothesis import (
compute_metrics,
compute_7d_return,
load_baseline_metrics,
make_decision,
)
# ── compute_metrics ──────────────────────────────────────────────────────────
def test_compute_metrics_empty():
result = compute_metrics([])
assert result == {"count": 0, "evaluated": 0, "win_rate": None, "avg_return": None}
def test_compute_metrics_all_wins():
picks = [
{"return_7d": 5.0, "win_7d": True},
{"return_7d": 3.0, "win_7d": True},
]
result = compute_metrics(picks)
assert result["win_rate"] == 100.0
assert result["avg_return"] == 4.0
assert result["evaluated"] == 2
def test_compute_metrics_mixed():
picks = [
{"return_7d": 10.0, "win_7d": True},
{"return_7d": -5.0, "win_7d": False},
{"return_7d": None, "win_7d": None}, # pending — excluded
]
result = compute_metrics(picks)
assert result["win_rate"] == 50.0
assert result["avg_return"] == 2.5
assert result["evaluated"] == 2
assert result["count"] == 3
# ── compute_7d_return ────────────────────────────────────────────────────────
def test_compute_7d_return_positive():
import pandas as pd
close_data = [100.0, 101.0, 102.0, 103.0, 104.0, 110.0]
mock_df = pd.DataFrame({"Close": close_data})
with patch("scripts.compare_hypothesis.download_history", return_value=mock_df):
ret, win = compute_7d_return("AAPL", "2026-03-01")
assert ret == pytest.approx(10.0, rel=0.01)
assert win is True
def test_compute_7d_return_empty_data():
import pandas as pd
mock_df = pd.DataFrame()
with patch("scripts.compare_hypothesis.download_history", return_value=mock_df):
ret, win = compute_7d_return("AAPL", "2026-03-01")
assert ret is None
assert win is None
# ── load_baseline_metrics ────────────────────────────────────────────────────
def test_load_baseline_metrics(tmp_path):
db = {
"recommendations_by_date": {
"2026-03-01": [
{"strategy_match": "options_flow", "return_7d": 5.0, "win_7d": True},
{"strategy_match": "options_flow", "return_7d": -2.0, "win_7d": False},
{"strategy_match": "reddit_dd", "return_7d": 3.0, "win_7d": True},
]
}
}
db_file = tmp_path / "performance_database.json"
db_file.write_text(json.dumps(db))
result = load_baseline_metrics("options_flow", str(db_file))
assert result["win_rate"] == 50.0
assert result["avg_return"] == 1.5
assert result["count"] == 2
def test_load_baseline_metrics_missing_file(tmp_path):
result = load_baseline_metrics("options_flow", str(tmp_path / "missing.json"))
assert result == {"count": 0, "win_rate": None, "avg_return": None}
# ── make_decision ─────────────────────────────────────────────────────────────
def test_make_decision_accepted_by_win_rate():
hyp = {"win_rate": 60.0, "avg_return": 0.5, "evaluated": 10}
baseline = {"win_rate": 50.0, "avg_return": 0.5}
decision, reason = make_decision(hyp, baseline)
assert decision == "accepted"
assert "win rate" in reason.lower()
def test_make_decision_accepted_by_return():
hyp = {"win_rate": 52.0, "avg_return": 3.0, "evaluated": 10}
baseline = {"win_rate": 50.0, "avg_return": 1.5}
decision, reason = make_decision(hyp, baseline)
assert decision == "accepted"
assert "return" in reason.lower()
def test_make_decision_rejected():
hyp = {"win_rate": 48.0, "avg_return": 0.2, "evaluated": 10}
baseline = {"win_rate": 50.0, "avg_return": 1.0}
decision, reason = make_decision(hyp, baseline)
assert decision == "rejected"
def test_make_decision_insufficient_data():
hyp = {"win_rate": 80.0, "avg_return": 5.0, "evaluated": 2}
baseline = {"win_rate": 50.0, "avg_return": 1.0}
decision, reason = make_decision(hyp, baseline)
assert decision == "rejected"
assert "insufficient" in reason.lower()