TradingAgents/agent_os/backend/routes/runs.py

from fastapi import APIRouter, Depends, BackgroundTasks, HTTPException
from typing import Dict, Any, List, AsyncGenerator
import logging
import uuid
import time
import os
from agent_os.backend.store import runs
from agent_os.backend.dependencies import get_current_user
from agent_os.backend.services.langgraph_engine import LangGraphEngine, NODE_TO_PHASE
from agent_os.backend.services.mock_engine import MockEngine
from tradingagents.report_paths import generate_flow_id, generate_run_id

logger = logging.getLogger("agent_os.runs")

router = APIRouter(prefix="/api/run", tags=["runs"])

engine = LangGraphEngine()
mock_engine = MockEngine()


def _persist_run_to_disk(run_id: str) -> None:
    """Persist run metadata and events to the report store."""
    run = runs.get(run_id)
    if not run:
        return
    try:
        from tradingagents.portfolio.store_factory import create_report_store
        flow_id = run.get("flow_id") or run.get("short_rid") or run_id[:8]
        store = create_report_store(flow_id=flow_id)
        date = (run.get("params") or {}).get("date", "")
        if not date:
            return
        meta = {
            "id": run_id,
            "flow_id": flow_id,
            "short_rid": flow_id,  # backward compat alias
            "type": run.get("type", ""),
            "status": run.get("status", ""),
            "created_at": run.get("created_at", 0),
            "completed_at": time.time(),
            "user_id": run.get("user_id", "anonymous"),
            "date": date,
            "params": run.get("params", {}),
            "rerun_seq": run.get("rerun_seq", 0),
        }
        store.save_run_meta(date, meta)
        store.save_run_events(date, run.get("events", []))
        logger.info("Persisted run to disk run=%s flow_id=%s", run_id, flow_id)
    except Exception:
        logger.exception("Failed to persist run to disk run=%s", run_id)


async def _run_and_store(run_id: str, gen: AsyncGenerator[Dict[str, Any], None]) -> None:
    """Drive an engine generator, updating run status and caching events."""
    runs[run_id]["status"] = "running"
    runs[run_id]["events"] = []
    try:
        async for event in gen:
            runs[run_id]["events"].append(event)
        runs[run_id]["status"] = "completed"
    except Exception as exc:
        runs[run_id]["status"] = "failed"
        runs[run_id]["error"] = str(exc)
        logger.exception("Run failed run=%s", run_id)
    finally:
        _persist_run_to_disk(run_id)


@router.post("/scan")
async def trigger_scan(
    background_tasks: BackgroundTasks,
    params: Dict[str, Any] = None,
    user: dict = Depends(get_current_user)
):
    p = params or {}
    run_id = str(uuid.uuid4())
    flow_id = p.get("flow_id") or generate_flow_id()
    runs[run_id] = {
        "id": run_id,
        "flow_id": flow_id,
        "short_rid": flow_id,  # backward compat alias
        "type": "scan",
        "status": "queued",
        "created_at": time.time(),
        "user_id": user["user_id"],
        "params": {**p, "flow_id": flow_id},
        "rerun_seq": 0,
    }
    logger.info("Queued SCAN run=%s flow_id=%s user=%s", run_id, flow_id, user["user_id"])
    background_tasks.add_task(_run_and_store, run_id, engine.run_scan(run_id, runs[run_id]["params"]))
    return {"run_id": run_id, "flow_id": flow_id, "status": "queued"}

@router.post("/pipeline")
async def trigger_pipeline(
    background_tasks: BackgroundTasks,
    params: Dict[str, Any] = None,
    user: dict = Depends(get_current_user)
):
    p = params or {}
    run_id = str(uuid.uuid4())
    flow_id = p.get("flow_id") or generate_flow_id()
    runs[run_id] = {
        "id": run_id,
        "flow_id": flow_id,
        "short_rid": flow_id,
        "type": "pipeline",
        "status": "queued",
        "created_at": time.time(),
        "user_id": user["user_id"],
        "params": {**p, "flow_id": flow_id},
        "rerun_seq": 0,
    }
    logger.info("Queued PIPELINE run=%s flow_id=%s user=%s", run_id, flow_id, user["user_id"])
    background_tasks.add_task(_run_and_store, run_id, engine.run_pipeline(run_id, runs[run_id]["params"]))
    return {"run_id": run_id, "flow_id": flow_id, "status": "queued"}

@router.post("/portfolio")
async def trigger_portfolio(
    background_tasks: BackgroundTasks,
    params: Dict[str, Any] = None,
    user: dict = Depends(get_current_user)
):
    p = params or {}
    run_id = str(uuid.uuid4())
    flow_id = p.get("flow_id") or generate_flow_id()
    runs[run_id] = {
        "id": run_id,
        "flow_id": flow_id,
        "short_rid": flow_id,
        "type": "portfolio",
        "status": "queued",
        "created_at": time.time(),
        "user_id": user["user_id"],
        "params": {**p, "flow_id": flow_id},
        "rerun_seq": 0,
    }
    logger.info("Queued PORTFOLIO run=%s flow_id=%s user=%s", run_id, flow_id, user["user_id"])
    background_tasks.add_task(_run_and_store, run_id, engine.run_portfolio(run_id, runs[run_id]["params"]))
    return {"run_id": run_id, "flow_id": flow_id, "status": "queued"}

@router.post("/auto")
async def trigger_auto(
    background_tasks: BackgroundTasks,
    params: Dict[str, Any] = None,
    user: dict = Depends(get_current_user)
):
    p = params or {}
    run_id = str(uuid.uuid4())
    flow_id = p.get("flow_id") or generate_flow_id()
    runs[run_id] = {
        "id": run_id,
        "flow_id": flow_id,
        "short_rid": flow_id,
        "type": "auto",
        "status": "queued",
        "created_at": time.time(),
        "user_id": user["user_id"],
        "params": {**p, "flow_id": flow_id},
        "rerun_seq": 0,
    }
    logger.info("Queued AUTO run=%s flow_id=%s user=%s", run_id, flow_id, user["user_id"])
    background_tasks.add_task(_run_and_store, run_id, engine.run_auto(run_id, runs[run_id]["params"]))
    return {"run_id": run_id, "flow_id": flow_id, "status": "queued"}

@router.post("/mock")
async def trigger_mock(
    background_tasks: BackgroundTasks,
    params: Dict[str, Any] = None,
    user: dict = Depends(get_current_user),
):
    """Start a mock run that streams scripted events — no real LLM calls.

    Accepted params:
      mock_type : "pipeline" | "scan" | "auto"  (default: "pipeline")
      ticker    : ticker symbol for pipeline / auto  (default: "AAPL")
      tickers   : list of tickers for auto mock
      date      : analysis date  (default: today)
      speed     : delay divisor — 1.0 = realistic, 5.0 = fast  (default: 1.0)
    """
    p = params or {}
    run_id = str(uuid.uuid4())
    flow_id = p.get("flow_id") or generate_flow_id()
    p_with_flow = {**p, "flow_id": flow_id}
    runs[run_id] = {
        "id": run_id,
        "flow_id": flow_id,
        "short_rid": flow_id,
        "type": "mock",
        "status": "queued",
        "created_at": time.time(),
        "user_id": user["user_id"],
        "params": p_with_flow,
        "rerun_seq": 0,
    }
    logger.info(
        "Queued MOCK run=%s mock_type=%s flow_id=%s user=%s",
        run_id, p.get("mock_type", "pipeline"), flow_id, user["user_id"],
    )
    background_tasks.add_task(
        _run_and_store, run_id, mock_engine.run_mock(run_id, p_with_flow)
    )
    return {"run_id": run_id, "flow_id": flow_id, "status": "queued"}

async def _append_and_store(run_id: str, gen) -> None:
    """Append events from a re-run generator to an existing run entry."""
    run = runs.get(run_id)
    if not run:
        return
    run["rerun_seq"] = run.get("rerun_seq", 0) + 1
    run["status"] = "running"
    try:
        async for event in gen:
            event["rerun_seq"] = run["rerun_seq"]
            if "events" not in run:
                run["events"] = []
            run["events"].append(event)
        run["status"] = "completed"
    except Exception as exc:
        run["status"] = "failed"
        run["error"] = str(exc)
        logger.exception("Rerun failed run=%s", run_id)
    finally:
        _persist_run_to_disk(run_id)


@router.post("/rerun-node")
async def trigger_rerun_node(
    background_tasks: BackgroundTasks,
    params: Dict[str, Any],
    user: dict = Depends(get_current_user),
):
    """Re-run a phase of the trading pipeline for a specific ticker.

    Body: { run_id, node_id, identifier, date, portfolio_id }
    """
    run_id = params.get("run_id", "")
    node_id = params.get("node_id", "")
    identifier = params.get("identifier", "")
    date = params.get("date", "")
    portfolio_id = params.get("portfolio_id", "main_portfolio")

    if run_id not in runs:
        raise HTTPException(status_code=404, detail="Run not found")
    if node_id not in NODE_TO_PHASE:
        raise HTTPException(status_code=422, detail=f"Unknown node_id: {node_id}")
    if not identifier:
        raise HTTPException(status_code=422, detail="identifier (ticker) is required")

    phase = NODE_TO_PHASE[node_id]
    rerun_params = {
        "ticker": identifier,
        "date": date or (runs[run_id].get("params") or {}).get("date", ""),
        "portfolio_id": portfolio_id,
    }

    logger.info(
        "Queued RERUN run=%s node=%s phase=%s ticker=%s user=%s",
        run_id, node_id, phase, identifier, user["user_id"],
    )
    background_tasks.add_task(
        _append_and_store,
        run_id,
        engine.run_pipeline_from_phase(f"{run_id}_rerun_{phase}", rerun_params, phase),
    )
    return {"run_id": run_id, "phase": phase, "status": "queued"}


@router.delete("/portfolio-stage")
async def reset_portfolio_stage(
    params: Dict[str, Any],
    user: dict = Depends(get_current_user),
):
    """Delete PM decision and execution result for a given date/portfolio_id.

    After calling this, an auto run will re-run Phase 3 from scratch
    (Phases 1 & 2 are skipped if their cached results still exist).
    """
    from tradingagents.portfolio.store_factory import create_report_store
    date = params.get("date")
    portfolio_id = params.get("portfolio_id")
    if not date or not portfolio_id:
        raise HTTPException(status_code=422, detail="date and portfolio_id are required")
    store = create_report_store()
    deleted = store.clear_portfolio_stage(date, portfolio_id)
    logger.info("reset_portfolio_stage date=%s portfolio=%s deleted=%s user=%s", date, portfolio_id, deleted, user["user_id"])
    return {"deleted": deleted, "date": date, "portfolio_id": portfolio_id}


def _get_mongo_col():
    """Return the run_events collection if MongoDB is configured."""
    uri = os.getenv("TRADINGAGENTS_MONGO_URI")
    db_name = os.getenv("TRADINGAGENTS_MONGO_DB", "tradingagents")
    if uri:
        try:
            from pymongo import MongoClient
            client = MongoClient(uri)
            return client[db_name]["run_events"]
        except Exception:
            logger.warning("Failed to connect to MongoDB for historical events")
    return None


@router.get("/")
async def list_runs(user: dict = Depends(get_current_user)):
    # Filter by user in production
    all_runs = dict(runs)

    # Supplement with historical metadata from MongoDB if available
    col = _get_mongo_col()
    if col is not None:
        try:
            # Fetch unique run_ids from the last 7 days (simplified)
            # In a real app, we'd have a separate 'runs' collection for metadata.
            # Here we use the events collection and group by run_id.
            pipeline = [
                {"$match": {"type": "log", "agent": "SYSTEM"}}, # Filter for start logs
                {"$sort": {"ts": -1}},
                {"$group": {
                    "_id": "$run_id",
                    "id": {"$first": "$run_id"},
                    "type": {"$first": "$type"},
                    "created_at": {"$first": "$ts"},
                    # Status is harder to get from events without a dedicated meta doc
                }},
                {"$limit": 50}
            ]
            for doc in col.aggregate(pipeline):
                rid = doc["id"]
                if rid not in all_runs:
                    all_runs[rid] = {
                        "id": rid,
                        "type": doc.get("type", "unknown"),
                        "status": "historical",
                        "created_at": doc.get("created_at", 0),
                        "user_id": "anonymous",
                    }
        except Exception:
            logger.warning("Failed to fetch historical runs from MongoDB")

    return list(all_runs.values())

@router.get("/{run_id}")
async def get_run_status(run_id: str, user: dict = Depends(get_current_user)):
    if run_id in runs:
        run = runs[run_id]
        # Lazy-load events from disk if they were not kept in memory
        if (
            not run.get("events")
            and run.get("status") in ("completed", "failed")
        ):
            try:
                from tradingagents.portfolio.store_factory import create_report_store
                flow_id = run.get("flow_id") or run.get("short_rid") or run_id[:8]
                store = create_report_store(flow_id=flow_id)
                date = (run.get("params") or {}).get("date", "")
                if date:
                    events = store.load_run_events(date)
                    if events:
                        run["events"] = events
            except Exception:
                logger.warning("Failed to lazy-load events for run=%s", run_id)
        return run

    # Not in memory — try MongoDB
    col = _get_mongo_col()
    if col is not None:
        try:
            cursor = col.find({"run_id": run_id}).sort("ts", 1)
            events = list(cursor)
            if events:
                # Remove MongoDB _id for JSON serialization
                for e in events:
                    e.pop("_id", None)
                return {
                    "id": run_id,
                    "status": "historical",
                    "events": events,
                    "type": events[0].get("type", "unknown") if events else "unknown",
                    "created_at": events[0].get("ts", 0) if events else 0,
                }
        except Exception:
            logger.warning("Failed to fetch historical run %s from MongoDB", run_id)

    raise HTTPException(status_code=404, detail="Run not found")