feat(hypotheses): add daily hypothesis runner workflow

2026-04-10 09:49:10 -07:00 · 2026-04-10 09:49:10 -07:00 · 1b782b1cd6
parent 38b9cef41c
commit 1b782b1cd6
2 changed files with 357 additions and 0 deletions
--- a/.github/workflows/hypothesis-runner.yml
+++ b/.github/workflows/hypothesis-runner.yml
@ -0,0 +1,74 @@
+name: Hypothesis Runner
+
+on:
+  schedule:
+    # 8:00 AM UTC daily — runs after iterate (06:00 UTC)
+    - cron: "0 8 * * *"
+  workflow_dispatch:
+    inputs:
+      hypothesis_id:
+        description: "Run a specific hypothesis ID only (blank = all running)"
+        required: false
+        default: ""
+
+env:
+  PYTHON_VERSION: "3.10"
+
+jobs:
+  run-hypotheses:
+    runs-on: ubuntu-latest
+    environment: TradingAgent
+    timeout-minutes: 60
+    permissions:
+      contents: write
+      pull-requests: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.GH_TOKEN }}
+
+      - name: Set up git identity
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+          cache: pip
+
+      - name: Install dependencies
+        run: pip install --upgrade pip && pip install -e .
+
+      - name: Run hypothesis experiments
+        env:
+          GH_TOKEN: ${{ secrets.GH_TOKEN }}
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          FINNHUB_API_KEY: ${{ secrets.FINNHUB_API_KEY }}
+          ALPHA_VANTAGE_API_KEY: ${{ secrets.ALPHA_VANTAGE_API_KEY }}
+          FMP_API_KEY: ${{ secrets.FMP_API_KEY }}
+          REDDIT_CLIENT_ID: ${{ secrets.REDDIT_CLIENT_ID }}
+          REDDIT_CLIENT_SECRET: ${{ secrets.REDDIT_CLIENT_SECRET }}
+          TRADIER_API_KEY: ${{ secrets.TRADIER_API_KEY }}
+          FILTER_ID: ${{ inputs.hypothesis_id }}
+        run: |
+          python scripts/run_hypothesis_runner.py
+
+      - name: Commit active.json updates
+        env:
+          GH_TOKEN: ${{ secrets.GH_TOKEN }}
+        run: |
+          git add docs/iterations/hypotheses/active.json docs/iterations/hypotheses/concluded/ || true
+          if git diff --cached --quiet; then
+            echo "No registry changes"
+          else
+            git commit -m "chore(hypotheses): update registry $(date -u +%Y-%m-%d)"
+            git pull --rebase origin main
+            git push origin main
+          fi
--- a/scripts/run_hypothesis_runner.py
+++ b/scripts/run_hypothesis_runner.py
@ -0,0 +1,283 @@
+#!/usr/bin/env python3
+"""
+Hypothesis Runner — orchestrates daily experiment cycles.
+
+For each running hypothesis in active.json:
+  1. Creates a git worktree for the hypothesis branch
+  2. Runs the daily discovery pipeline in that worktree
+  3. Extracts picks from the discovery result, appends to picks.json
+  4. Commits and pushes picks to hypothesis branch
+  5. Removes worktree
+  6. Updates active.json (days_elapsed, picks_log)
+  7. If days_elapsed >= min_days: concludes the hypothesis
+
+After all hypotheses: promotes highest-priority pending → running if a slot opened.
+
+Environment variables:
+  FILTER_ID — if set, only run the hypothesis with this ID
+"""
+
+import json
+import os
+import subprocess
+import sys
+from datetime import datetime
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(ROOT))
+
+ACTIVE_JSON = ROOT / "docs/iterations/hypotheses/active.json"
+CONCLUDED_DIR = ROOT / "docs/iterations/hypotheses/concluded"
+DB_PATH = ROOT / "data/recommendations/performance_database.json"
+TODAY = datetime.utcnow().strftime("%Y-%m-%d")
+
+
+def load_registry() -> dict:
+    with open(ACTIVE_JSON) as f:
+        return json.load(f)
+
+
+def save_registry(registry: dict) -> None:
+    with open(ACTIVE_JSON, "w") as f:
+        json.dump(registry, f, indent=2)
+
+
+def run(cmd: list, cwd: str = None, check: bool = True) -> subprocess.CompletedProcess:
+    print(f"  $ {' '.join(cmd)}", flush=True)
+    return subprocess.run(cmd, cwd=cwd or str(ROOT), check=check, capture_output=False)
+
+
+def extract_picks(worktree: str, scanner: str) -> list:
+    """Extract picks for the given scanner from the most recent discovery result in the worktree."""
+    results_dir = Path(worktree) / "results" / "discovery" / TODAY
+    if not results_dir.exists():
+        print(f"    No discovery results for {TODAY} in worktree", flush=True)
+        return []
+    picks = []
+    for run_dir in sorted(results_dir.iterdir()):
+        result_file = run_dir / "discovery_result.json"
+        if not result_file.exists():
+            continue
+        try:
+            with open(result_file) as f:
+                data = json.load(f)
+            for item in data.get("final_ranking", []):
+                if item.get("strategy_match") == scanner:
+                    picks.append({
+                        "date": TODAY,
+                        "ticker": item["ticker"],
+                        "score": item.get("final_score"),
+                        "confidence": item.get("confidence"),
+                        "scanner": scanner,
+                        "return_7d": None,
+                        "win_7d": None,
+                    })
+        except Exception as e:
+            print(f"    Warning: could not read {result_file}: {e}", flush=True)
+    return picks
+
+
+def load_picks_from_branch(hypothesis_id: str, branch: str) -> list:
+    """Load picks.json from the hypothesis branch using git show."""
+    picks_path = f"docs/iterations/hypotheses/{hypothesis_id}/picks.json"
+    result = subprocess.run(
+        ["git", "show", f"{branch}:{picks_path}"],
+        cwd=str(ROOT),
+        capture_output=True,
+        text=True,
+    )
+    if result.returncode != 0:
+        return []
+    try:
+        return json.loads(result.stdout).get("picks", [])
+    except Exception:
+        return []
+
+
+def save_picks_to_worktree(worktree: str, hypothesis_id: str, scanner: str, picks: list) -> None:
+    """Write updated picks.json into the worktree and commit."""
+    picks_dir = Path(worktree) / "docs" / "iterations" / "hypotheses" / hypothesis_id
+    picks_dir.mkdir(parents=True, exist_ok=True)
+    picks_file = picks_dir / "picks.json"
+    payload = {"hypothesis_id": hypothesis_id, "scanner": scanner, "picks": picks}
+    picks_file.write_text(json.dumps(payload, indent=2))
+    run(["git", "add", str(picks_file)], cwd=worktree)
+    result = subprocess.run(["git", "diff", "--cached", "--quiet"], cwd=worktree)
+    if result.returncode != 0:
+        run(
+            ["git", "commit", "-m", f"chore(hypotheses): picks {TODAY} for {hypothesis_id}"],
+            cwd=worktree,
+        )
+
+
+def run_hypothesis(hyp: dict) -> bool:
+    """Run one hypothesis experiment cycle. Returns True if the experiment concluded."""
+    hid = hyp["id"]
+    branch = hyp["branch"]
+    scanner = hyp["scanner"]
+    worktree = f"/tmp/hyp-{hid}"
+
+    print(f"\n── Hypothesis: {hid} ──", flush=True)
+
+    run(["git", "fetch", "origin", branch], check=False)
+    run(["git", "worktree", "add", worktree, branch])
+
+    try:
+        result = subprocess.run(
+            [sys.executable, "scripts/run_daily_discovery.py", "--date", TODAY, "--no-update-positions"],
+            cwd=worktree,
+            check=False,
+        )
+        if result.returncode != 0:
+            print(f"    Discovery failed for {hid}, skipping picks update", flush=True)
+        else:
+            new_picks = extract_picks(worktree, scanner)
+            existing_picks = load_picks_from_branch(hid, branch)
+            seen = {(p["date"], p["ticker"]) for p in existing_picks}
+            merged = existing_picks + [p for p in new_picks if (p["date"], p["ticker"]) not in seen]
+            save_picks_to_worktree(worktree, hid, scanner, merged)
+            run(["git", "push", "origin", f"HEAD:{branch}"], cwd=worktree)
+
+        if TODAY not in hyp.get("picks_log", []):
+            hyp.setdefault("picks_log", []).append(TODAY)
+        hyp["days_elapsed"] = len(hyp["picks_log"])
+
+        if hyp["days_elapsed"] >= hyp["min_days"]:
+            return conclude_hypothesis(hyp)
+
+    finally:
+        run(["git", "worktree", "remove", "--force", worktree], check=False)
+
+    return False
+
+
+def conclude_hypothesis(hyp: dict) -> bool:
+    """Run comparison, write conclusion doc, close/merge PR. Returns True."""
+    hid = hyp["id"]
+    scanner = hyp["scanner"]
+    branch = hyp["branch"]
+
+    print(f"\n  Concluding {hid}...", flush=True)
+
+    picks = load_picks_from_branch(hid, branch)
+    if not picks:
+        conclusion = {
+            "decision": "rejected",
+            "reason": "No picks were collected during the experiment period",
+            "hypothesis": {"count": 0, "evaluated": 0, "win_rate": None, "avg_return": None},
+            "baseline": {"count": 0, "win_rate": None, "avg_return": None},
+        }
+    else:
+        result = subprocess.run(
+            [
+                sys.executable, "scripts/compare_hypothesis.py",
+                "--hypothesis-id", hid,
+                "--picks-json", json.dumps(picks),
+                "--scanner", scanner,
+                "--db-path", str(DB_PATH),
+            ],
+            cwd=str(ROOT),
+            capture_output=True,
+            text=True,
+        )
+        if result.returncode != 0:
+            print(f"    compare_hypothesis.py failed: {result.stderr}", flush=True)
+            return False
+        conclusion = json.loads(result.stdout)
+
+    decision = conclusion["decision"]
+    hyp_metrics = conclusion["hypothesis"]
+    base_metrics = conclusion["baseline"]
+
+    period_start = hyp.get("created_at", TODAY)
+    concluded_doc = CONCLUDED_DIR / f"{TODAY}-{hid}.md"
+    concluded_doc.write_text(
+        f"# Hypothesis: {hyp['title']}\n\n"
+        f"**Scanner:** {scanner}\n"
+        f"**Branch:** {branch}\n"
+        f"**Period:** {period_start} → {TODAY} ({hyp['days_elapsed']} days)\n"
+        f"**Outcome:** {'accepted ✅' if decision == 'accepted' else 'rejected ❌'}\n\n"
+        f"## Hypothesis\n{hyp.get('description', hyp['title'])}\n\n"
+        f"## Results\n\n"
+        f"| Metric | Baseline | Experiment | Delta |\n"
+        f"|---|---|---|---|\n"
+        f"| 7d win rate | {base_metrics.get('win_rate') or '—'}% | "
+        f"{hyp_metrics.get('win_rate') or '—'}% | "
+        f"{_delta_str(hyp_metrics.get('win_rate'), base_metrics.get('win_rate'), 'pp')} |\n"
+        f"| Avg return | {base_metrics.get('avg_return') or '—'}% | "
+        f"{hyp_metrics.get('avg_return') or '—'}% | "
+        f"{_delta_str(hyp_metrics.get('avg_return'), base_metrics.get('avg_return'), '%')} |\n"
+        f"| Picks | {base_metrics.get('count', '—')} | {hyp_metrics.get('count', '—')} | — |\n\n"
+        f"## Decision\n{conclusion['reason']}\n\n"
+        f"## Action\n"
+        f"{'Branch merged into main.' if decision == 'accepted' else 'Branch closed without merging.'}\n"
+    )
+
+    run(["git", "add", str(concluded_doc)], check=False)
+
+    pr = hyp.get("pr_number")
+    if pr:
+        if decision == "accepted":
+            subprocess.run(
+                ["gh", "pr", "merge", str(pr), "--squash", "--delete-branch"],
+                cwd=str(ROOT), check=False,
+            )
+        else:
+            subprocess.run(
+                ["gh", "pr", "close", str(pr), "--delete-branch"],
+                cwd=str(ROOT), check=False,
+            )
+
+    hyp["status"] = "concluded"
+    hyp["conclusion"] = decision
+
+    print(f"  {hid}: {decision} — {conclusion['reason']}", flush=True)
+    return True
+
+
+def _delta_str(hyp_val, base_val, unit: str) -> str:
+    if hyp_val is None or base_val is None:
+        return "—"
+    delta = hyp_val - base_val
+    sign = "+" if delta >= 0 else ""
+    return f"{sign}{delta:.1f}{unit}"
+
+
+def promote_pending(registry: dict) -> None:
+    """Promote the highest-priority pending hypothesis to running if a slot is open."""
+    running_count = sum(1 for h in registry["hypotheses"] if h["status"] == "running")
+    max_active = registry.get("max_active", 5)
+    if running_count >= max_active:
+        return
+    pending = [h for h in registry["hypotheses"] if h["status"] == "pending"]
+    if not pending:
+        return
+    to_promote = max(pending, key=lambda h: h.get("priority", 0))
+    to_promote["status"] = "running"
+    print(f"\n  Promoted pending hypothesis to running: {to_promote['id']}", flush=True)
+
+
+def main():
+    registry = load_registry()
+    filter_id = os.environ.get("FILTER_ID", "").strip()
+
+    hypotheses = registry.get("hypotheses", [])
+    running = [
+        h for h in hypotheses
+        if h["status"] == "running" and (not filter_id or h["id"] == filter_id)
+    ]
+
+    if not running:
+        print("No running hypotheses to process.", flush=True)
+    else:
+        for hyp in running:
+            run_hypothesis(hyp)
+
+    promote_pending(registry)
+    save_registry(registry)
+    print("\nRegistry updated.", flush=True)
+
+
+if __name__ == "__main__":
+    main()