115 lines
4.5 KiB
TypeScript
115 lines
4.5 KiB
TypeScript
import { describe, it, expect } from 'vitest'
|
|
import { z } from 'zod'
|
|
import { AgentRunner } from '../src/agent/runner.js'
|
|
import { ToolRegistry, defineTool } from '../src/tool/framework.js'
|
|
import { ToolExecutor } from '../src/tool/executor.js'
|
|
import type { LLMAdapter, LLMMessage, LLMResponse, StreamEvent, ToolUseBlock, ToolResultBlock } from '../src/types.js'
|
|
|
|
function toolUseResponse(toolName: string, input: Record<string, unknown>): LLMResponse {
|
|
return {
|
|
id: `resp-${Math.random().toString(36).slice(2)}`,
|
|
content: [{
|
|
type: 'tool_use',
|
|
id: `tu-${Math.random().toString(36).slice(2)}`,
|
|
name: toolName,
|
|
input,
|
|
}],
|
|
model: 'mock-model',
|
|
stop_reason: 'tool_use',
|
|
usage: { input_tokens: 5, output_tokens: 5 },
|
|
}
|
|
}
|
|
|
|
function textResponse(text: string): LLMResponse {
|
|
return {
|
|
id: `resp-${Math.random().toString(36).slice(2)}`,
|
|
content: [{ type: 'text', text }],
|
|
model: 'mock-model',
|
|
stop_reason: 'end_turn',
|
|
usage: { input_tokens: 5, output_tokens: 5 },
|
|
}
|
|
}
|
|
|
|
describe('delegation-triggered budget_exceeded', () => {
|
|
it('yields tool_result events and appends tool_result message before break', async () => {
|
|
// Parent turn 1: LLM asks for a delegation.
|
|
// Tool returns metadata.tokenUsage that alone pushes totalUsage past the budget.
|
|
// Expectation: stream yields tool_use AND tool_result, and the returned
|
|
// `messages` contains the user tool_result message, so downstream consumers
|
|
// can resume without API "tool_use without tool_result" errors.
|
|
const responses = [
|
|
toolUseResponse('delegate_to_agent', { target_agent: 'bob', prompt: 'work' }),
|
|
textResponse('should not be reached'),
|
|
]
|
|
let idx = 0
|
|
const adapter: LLMAdapter = {
|
|
name: 'mock',
|
|
async chat() {
|
|
return responses[idx++]!
|
|
},
|
|
async *stream() { /* unused */ },
|
|
}
|
|
|
|
const registry = new ToolRegistry()
|
|
registry.register(
|
|
defineTool({
|
|
name: 'delegate_to_agent',
|
|
description: 'Fake delegation for test',
|
|
inputSchema: z.object({ target_agent: z.string(), prompt: z.string() }),
|
|
async execute() {
|
|
return {
|
|
data: 'delegated output',
|
|
metadata: { tokenUsage: { input_tokens: 500, output_tokens: 500 } },
|
|
}
|
|
},
|
|
}),
|
|
)
|
|
|
|
const runner = new AgentRunner(adapter, registry, new ToolExecutor(registry), {
|
|
model: 'mock-model',
|
|
allowedTools: ['delegate_to_agent'],
|
|
maxTurns: 5,
|
|
maxTokenBudget: 100, // 10 (parent LLM) + 1000 (delegation) ≫ 100
|
|
agentName: 'parent',
|
|
})
|
|
|
|
const events: StreamEvent[] = []
|
|
for await (const ev of runner.stream([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])) {
|
|
events.push(ev)
|
|
}
|
|
|
|
const toolUseEvents = events.filter((e): e is StreamEvent & { type: 'tool_use'; data: ToolUseBlock } => e.type === 'tool_use')
|
|
const toolResultEvents = events.filter((e): e is StreamEvent & { type: 'tool_result'; data: ToolResultBlock } => e.type === 'tool_result')
|
|
const budgetEvents = events.filter(e => e.type === 'budget_exceeded')
|
|
const doneEvents = events.filter((e): e is StreamEvent & { type: 'done'; data: { messages: LLMMessage[]; budgetExceeded?: boolean } } => e.type === 'done')
|
|
|
|
// 1. Every tool_use event has a matching tool_result event.
|
|
expect(toolUseEvents).toHaveLength(1)
|
|
expect(toolResultEvents).toHaveLength(1)
|
|
expect(toolResultEvents[0]!.data.tool_use_id).toBe(toolUseEvents[0]!.data.id)
|
|
|
|
// 2. Budget event fires and the run terminates with budgetExceeded=true.
|
|
expect(budgetEvents).toHaveLength(1)
|
|
expect(doneEvents).toHaveLength(1)
|
|
expect(doneEvents[0]!.data.budgetExceeded).toBe(true)
|
|
|
|
// 3. Returned messages contain the tool_result user message so the
|
|
// conversation is API-resumable.
|
|
const messages = doneEvents[0]!.data.messages
|
|
const lastMsg = messages[messages.length - 1]!
|
|
expect(lastMsg.role).toBe('user')
|
|
const hasMatchingToolResult = lastMsg.content.some(
|
|
b => b.type === 'tool_result' && b.tool_use_id === toolUseEvents[0]!.data.id,
|
|
)
|
|
expect(hasMatchingToolResult).toBe(true)
|
|
|
|
// 4. Ordering: tool_result event is emitted before budget_exceeded.
|
|
const toolResultIdx = events.findIndex(e => e.type === 'tool_result')
|
|
const budgetIdx = events.findIndex(e => e.type === 'budget_exceeded')
|
|
expect(toolResultIdx).toBeLessThan(budgetIdx)
|
|
|
|
// 5. LLM was only called once — we broke before a second turn.
|
|
expect(idx).toBe(1)
|
|
})
|
|
})
|