open-multi-agent/tests/delegation-budget.test.ts

import { describe, it, expect } from 'vitest'
import { z } from 'zod'
import { AgentRunner } from '../src/agent/runner.js'
import { ToolRegistry, defineTool } from '../src/tool/framework.js'
import { ToolExecutor } from '../src/tool/executor.js'
import type { LLMAdapter, LLMMessage, LLMResponse, StreamEvent, ToolUseBlock, ToolResultBlock } from '../src/types.js'

function toolUseResponse(toolName: string, input: Record<string, unknown>): LLMResponse {
  return {
    id: `resp-${Math.random().toString(36).slice(2)}`,
    content: [{
      type: 'tool_use',
      id: `tu-${Math.random().toString(36).slice(2)}`,
      name: toolName,
      input,
    }],
    model: 'mock-model',
    stop_reason: 'tool_use',
    usage: { input_tokens: 5, output_tokens: 5 },
  }
}

function textResponse(text: string): LLMResponse {
  return {
    id: `resp-${Math.random().toString(36).slice(2)}`,
    content: [{ type: 'text', text }],
    model: 'mock-model',
    stop_reason: 'end_turn',
    usage: { input_tokens: 5, output_tokens: 5 },
  }
}

describe('delegation-triggered budget_exceeded', () => {
  it('yields tool_result events and appends tool_result message before break', async () => {
    // Parent turn 1: LLM asks for a delegation.
    // Tool returns metadata.tokenUsage that alone pushes totalUsage past the budget.
    // Expectation: stream yields tool_use AND tool_result, and the returned
    // `messages` contains the user tool_result message, so downstream consumers
    // can resume without API "tool_use without tool_result" errors.
    const responses = [
      toolUseResponse('delegate_to_agent', { target_agent: 'bob', prompt: 'work' }),
      textResponse('should not be reached'),
    ]
    let idx = 0
    const adapter: LLMAdapter = {
      name: 'mock',
      async chat() {
        return responses[idx++]!
      },
      async *stream() { /* unused */ },
    }

    const registry = new ToolRegistry()
    registry.register(
      defineTool({
        name: 'delegate_to_agent',
        description: 'Fake delegation for test',
        inputSchema: z.object({ target_agent: z.string(), prompt: z.string() }),
        async execute() {
          return {
            data: 'delegated output',
            metadata: { tokenUsage: { input_tokens: 500, output_tokens: 500 } },
          }
        },
      }),
    )

    const runner = new AgentRunner(adapter, registry, new ToolExecutor(registry), {
      model: 'mock-model',
      allowedTools: ['delegate_to_agent'],
      maxTurns: 5,
      maxTokenBudget: 100, // 10 (parent LLM) + 1000 (delegation) ≫ 100
      agentName: 'parent',
    })

    const events: StreamEvent[] = []
    for await (const ev of runner.stream([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])) {
      events.push(ev)
    }

    const toolUseEvents = events.filter((e): e is StreamEvent & { type: 'tool_use'; data: ToolUseBlock } => e.type === 'tool_use')
    const toolResultEvents = events.filter((e): e is StreamEvent & { type: 'tool_result'; data: ToolResultBlock } => e.type === 'tool_result')
    const budgetEvents = events.filter(e => e.type === 'budget_exceeded')
    const doneEvents = events.filter((e): e is StreamEvent & { type: 'done'; data: { messages: LLMMessage[]; budgetExceeded?: boolean } } => e.type === 'done')

    // 1. Every tool_use event has a matching tool_result event.
    expect(toolUseEvents).toHaveLength(1)
    expect(toolResultEvents).toHaveLength(1)
    expect(toolResultEvents[0]!.data.tool_use_id).toBe(toolUseEvents[0]!.data.id)

    // 2. Budget event fires and the run terminates with budgetExceeded=true.
    expect(budgetEvents).toHaveLength(1)
    expect(doneEvents).toHaveLength(1)
    expect(doneEvents[0]!.data.budgetExceeded).toBe(true)

    // 3. Returned messages contain the tool_result user message so the
    //    conversation is API-resumable.
    const messages = doneEvents[0]!.data.messages
    const lastMsg = messages[messages.length - 1]!
    expect(lastMsg.role).toBe('user')
    const hasMatchingToolResult = lastMsg.content.some(
      b => b.type === 'tool_result' && b.tool_use_id === toolUseEvents[0]!.data.id,
    )
    expect(hasMatchingToolResult).toBe(true)

    // 4. Ordering: tool_result event is emitted before budget_exceeded.
    const toolResultIdx = events.findIndex(e => e.type === 'tool_result')
    const budgetIdx = events.findIndex(e => e.type === 'budget_exceeded')
    expect(toolResultIdx).toBeLessThan(budgetIdx)

    // 5. LLM was only called once — we broke before a second turn.
    expect(idx).toBe(1)
  })
})