feat(hypotheses): uncap statistical hypotheses from max_active limit
Statistical hypotheses now conclude immediately on the next runner cycle without counting toward max_active. Only implementation hypotheses occupy runner slots. Added conclude_statistical_hypothesis() for instant analysis against existing performance data with Gemini LLM enrichment. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
79a58a540c
commit
662fdb5753
|
|
@ -35,13 +35,41 @@ without any code change. Examples:
|
||||||
|
|
||||||
If statistical: run the analysis now against `data/recommendations/performance_database.json`.
|
If statistical: run the analysis now against `data/recommendations/performance_database.json`.
|
||||||
Write the finding to the relevant scanner domain file under **Evidence Log**. Print a summary.
|
Write the finding to the relevant scanner domain file under **Evidence Log**. Print a summary.
|
||||||
Done — no branch needed.
|
|
||||||
|
Then register the hypothesis in `docs/iterations/hypotheses/active.json` as `status: "pending"`
|
||||||
|
so the runner picks it up on the next cycle and attaches LLM analysis to the report:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "<scanner>-<slug>",
|
||||||
|
"scanner": "<scanner>",
|
||||||
|
"title": "<title>",
|
||||||
|
"description": "<description>",
|
||||||
|
"branch": null,
|
||||||
|
"pr_number": null,
|
||||||
|
"status": "pending",
|
||||||
|
"priority": 0,
|
||||||
|
"expected_impact": "low",
|
||||||
|
"hypothesis_type": "statistical",
|
||||||
|
"created_at": "<YYYY-MM-DD>",
|
||||||
|
"min_days": 0,
|
||||||
|
"days_elapsed": 0,
|
||||||
|
"picks_log": [],
|
||||||
|
"baseline_scanner": "<scanner>",
|
||||||
|
"conclusion": null
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Commit and push the updated `active.json` to `main`. Done — no branch or worktree needed.
|
||||||
|
|
||||||
## Step 3b: Implementation Path
|
## Step 3b: Implementation Path
|
||||||
|
|
||||||
### 3b-i: Capacity check
|
### 3b-i: Capacity check
|
||||||
|
|
||||||
Count running hypotheses from `active.json`. If fewer than `max_active` running, proceed.
|
Count running hypotheses where `hypothesis_type == "implementation"` from `active.json`.
|
||||||
|
Statistical hypotheses do not consume runner slots and are excluded from this count.
|
||||||
|
|
||||||
|
If fewer than `max_active` implementation hypotheses are running, proceed.
|
||||||
If at capacity: add the new hypothesis as `status: "pending"` — running experiments are NEVER
|
If at capacity: add the new hypothesis as `status: "pending"` — running experiments are NEVER
|
||||||
paused mid-streak. Inform the user which slot it is queued behind and when it will likely start.
|
paused mid-streak. Inform the user which slot it is queued behind and when it will likely start.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -409,9 +409,89 @@ def _delta_str(hyp_val, base_val, unit: str) -> str:
|
||||||
return f"{sign}{delta:.1f}{unit}"
|
return f"{sign}{delta:.1f}{unit}"
|
||||||
|
|
||||||
|
|
||||||
|
def conclude_statistical_hypothesis(hyp: dict) -> None:
|
||||||
|
"""
|
||||||
|
Conclude a statistical hypothesis immediately using existing performance data.
|
||||||
|
|
||||||
|
Statistical hypotheses don't require worktrees or code changes — they answer
|
||||||
|
a question against already-collected pick data. This runs synchronously and
|
||||||
|
writes a markdown report to docs/iterations/hypotheses/concluded/.
|
||||||
|
"""
|
||||||
|
hid = hyp["id"]
|
||||||
|
scanner = hyp["scanner"]
|
||||||
|
print(f"\n── Statistical hypothesis: {hid} ──", flush=True)
|
||||||
|
|
||||||
|
# Load performance database
|
||||||
|
picks = []
|
||||||
|
if DB_PATH.exists():
|
||||||
|
try:
|
||||||
|
with open(DB_PATH) as f:
|
||||||
|
db = json.load(f)
|
||||||
|
picks = [p for p in db if p.get("scanner") == scanner or p.get("strategy_match") == scanner]
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Could not read performance database: {e}", flush=True)
|
||||||
|
|
||||||
|
n = len(picks)
|
||||||
|
print(f" Found {n} picks for scanner '{scanner}'", flush=True)
|
||||||
|
|
||||||
|
# Compute basic stats
|
||||||
|
scores = [p["final_score"] for p in picks if p.get("final_score") is not None]
|
||||||
|
avg_score = round(sum(scores) / len(scores), 1) if scores else None
|
||||||
|
|
||||||
|
returns_7d = [p["return_7d"] for p in picks if p.get("return_7d") is not None]
|
||||||
|
win_rate = round(100 * sum(1 for r in returns_7d if r > 0) / len(returns_7d), 1) if returns_7d else None
|
||||||
|
avg_return = round(sum(returns_7d) / len(returns_7d), 2) if returns_7d else None
|
||||||
|
|
||||||
|
stats_block = (
|
||||||
|
f"- Total picks: {n}\n"
|
||||||
|
f"- Avg score: {avg_score if avg_score is not None else '—'}\n"
|
||||||
|
f"- 7d win rate: {win_rate if win_rate is not None else '—'}%\n"
|
||||||
|
f"- Avg 7d return: {avg_return if avg_return is not None else '—'}%\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Read scanner domain for LLM context
|
||||||
|
scanner_domain = ""
|
||||||
|
domain_file = ROOT / "docs" / "iterations" / "scanners" / f"{scanner}.md"
|
||||||
|
if domain_file.exists():
|
||||||
|
scanner_domain = domain_file.read_text()[:3000]
|
||||||
|
|
||||||
|
# LLM analysis — reuse llm_analysis() with a synthetic conclusion dict
|
||||||
|
conclusion = {
|
||||||
|
"decision": "statistical",
|
||||||
|
"reason": hyp.get("description", "Statistical analysis of existing pick data"),
|
||||||
|
"hypothesis": {"count": n, "win_rate": win_rate, "avg_return": avg_return},
|
||||||
|
"baseline": {},
|
||||||
|
}
|
||||||
|
llm_insight = llm_analysis(hyp, conclusion, scanner_domain)
|
||||||
|
|
||||||
|
# Write concluded report
|
||||||
|
CONCLUDED_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
report_path = CONCLUDED_DIR / f"{hid}.md"
|
||||||
|
insight_block = f"\n## LLM Analysis\n\n{llm_insight}\n" if llm_insight else ""
|
||||||
|
report_path.write_text(
|
||||||
|
f"# Statistical Hypothesis: {hyp.get('title', hid)}\n\n"
|
||||||
|
f"**ID:** {hid}\n"
|
||||||
|
f"**Scanner:** {scanner}\n"
|
||||||
|
f"**Description:** {hyp.get('description', '')}\n"
|
||||||
|
f"**Concluded:** {TODAY}\n\n"
|
||||||
|
f"## Data Summary\n\n{stats_block}"
|
||||||
|
f"{insight_block}"
|
||||||
|
)
|
||||||
|
print(f" Report written to {report_path}", flush=True)
|
||||||
|
|
||||||
|
hyp["status"] = "concluded"
|
||||||
|
hyp["conclusion"] = "statistical"
|
||||||
|
hyp["days_elapsed"] = 0
|
||||||
|
|
||||||
|
|
||||||
def promote_pending(registry: dict) -> None:
|
def promote_pending(registry: dict) -> None:
|
||||||
"""Promote the highest-priority pending hypothesis to running if a slot is open."""
|
"""Promote the highest-priority pending implementation hypothesis to running if a slot is open."""
|
||||||
running_count = sum(1 for h in registry["hypotheses"] if h["status"] == "running")
|
# Only implementation/forward_test hypotheses count toward max_active.
|
||||||
|
# Statistical hypotheses are concluded immediately and never occupy runner slots.
|
||||||
|
running_count = sum(
|
||||||
|
1 for h in registry["hypotheses"]
|
||||||
|
if h["status"] == "running" and h.get("hypothesis_type", "implementation") == "implementation"
|
||||||
|
)
|
||||||
max_active = registry.get("max_active", 5)
|
max_active = registry.get("max_active", 5)
|
||||||
if running_count >= max_active:
|
if running_count >= max_active:
|
||||||
return
|
return
|
||||||
|
|
@ -435,6 +515,19 @@ def main():
|
||||||
registry = load_registry()
|
registry = load_registry()
|
||||||
filter_id = os.environ.get("FILTER_ID", "").strip()
|
filter_id = os.environ.get("FILTER_ID", "").strip()
|
||||||
|
|
||||||
|
# Fast-path: conclude all pending statistical hypotheses immediately.
|
||||||
|
# They answer questions from existing data — no cap, no worktree, no waiting.
|
||||||
|
statistical_pending = [
|
||||||
|
h for h in registry.get("hypotheses", [])
|
||||||
|
if h["status"] == "pending" and h.get("hypothesis_type") == "statistical"
|
||||||
|
and (not filter_id or h["id"] == filter_id)
|
||||||
|
]
|
||||||
|
for hyp in statistical_pending:
|
||||||
|
try:
|
||||||
|
conclude_statistical_hypothesis(hyp)
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Error concluding statistical hypothesis {hyp['id']}: {e}", flush=True)
|
||||||
|
|
||||||
hypotheses = registry.get("hypotheses", [])
|
hypotheses = registry.get("hypotheses", [])
|
||||||
running = [
|
running = [
|
||||||
h
|
h
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue