627 lines
22 KiB
TypeScript
627 lines
22 KiB
TypeScript
import { describe, it, expect, vi } from 'vitest'
|
|
import { z } from 'zod'
|
|
import { AgentRunner } from '../src/agent/runner.js'
|
|
import { ToolRegistry, defineTool } from '../src/tool/framework.js'
|
|
import { ToolExecutor } from '../src/tool/executor.js'
|
|
import type { LLMAdapter, LLMChatOptions, LLMMessage, LLMResponse, TraceEvent } from '../src/types.js'
|
|
|
|
function textResponse(text: string): LLMResponse {
|
|
return {
|
|
id: `resp-${Math.random().toString(36).slice(2)}`,
|
|
content: [{ type: 'text', text }],
|
|
model: 'mock-model',
|
|
stop_reason: 'end_turn',
|
|
usage: { input_tokens: 10, output_tokens: 20 },
|
|
}
|
|
}
|
|
|
|
function toolUseResponse(toolName: string, input: Record<string, unknown>): LLMResponse {
|
|
return {
|
|
id: `resp-${Math.random().toString(36).slice(2)}`,
|
|
content: [{
|
|
type: 'tool_use',
|
|
id: `tu-${Math.random().toString(36).slice(2)}`,
|
|
name: toolName,
|
|
input,
|
|
}],
|
|
model: 'mock-model',
|
|
stop_reason: 'tool_use',
|
|
usage: { input_tokens: 15, output_tokens: 25 },
|
|
}
|
|
}
|
|
|
|
function buildRegistryAndExecutor(): { registry: ToolRegistry; executor: ToolExecutor } {
|
|
const registry = new ToolRegistry()
|
|
registry.register(
|
|
defineTool({
|
|
name: 'echo',
|
|
description: 'Echo input',
|
|
inputSchema: z.object({ message: z.string() }),
|
|
async execute({ message }) {
|
|
return { data: message }
|
|
},
|
|
}),
|
|
)
|
|
return { registry, executor: new ToolExecutor(registry) }
|
|
}
|
|
|
|
describe('AgentRunner contextStrategy', () => {
|
|
it('keeps baseline behavior when contextStrategy is not set', async () => {
|
|
const calls: LLMMessage[][] = []
|
|
const adapter: LLMAdapter = {
|
|
name: 'mock',
|
|
async chat(messages) {
|
|
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
|
return calls.length === 1
|
|
? toolUseResponse('echo', { message: 'hello' })
|
|
: textResponse('done')
|
|
},
|
|
async *stream() {
|
|
/* unused */
|
|
},
|
|
}
|
|
const { registry, executor } = buildRegistryAndExecutor()
|
|
const runner = new AgentRunner(adapter, registry, executor, {
|
|
model: 'mock-model',
|
|
allowedTools: ['echo'],
|
|
maxTurns: 4,
|
|
})
|
|
|
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
|
expect(calls).toHaveLength(2)
|
|
expect(calls[0]).toHaveLength(1)
|
|
expect(calls[1]!.length).toBeGreaterThan(calls[0]!.length)
|
|
})
|
|
|
|
it('sliding-window truncates old turns and preserves the first user message', async () => {
|
|
const calls: LLMMessage[][] = []
|
|
const responses = [
|
|
toolUseResponse('echo', { message: 't1' }),
|
|
toolUseResponse('echo', { message: 't2' }),
|
|
toolUseResponse('echo', { message: 't3' }),
|
|
textResponse('done'),
|
|
]
|
|
let idx = 0
|
|
const adapter: LLMAdapter = {
|
|
name: 'mock',
|
|
async chat(messages) {
|
|
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
|
return responses[idx++]!
|
|
},
|
|
async *stream() {
|
|
/* unused */
|
|
},
|
|
}
|
|
const { registry, executor } = buildRegistryAndExecutor()
|
|
const runner = new AgentRunner(adapter, registry, executor, {
|
|
model: 'mock-model',
|
|
allowedTools: ['echo'],
|
|
maxTurns: 8,
|
|
contextStrategy: { type: 'sliding-window', maxTurns: 1 },
|
|
})
|
|
|
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'original prompt' }] }])
|
|
|
|
const laterCall = calls[calls.length - 1]!
|
|
const firstUserText = laterCall[0]!.content[0]
|
|
expect(firstUserText).toMatchObject({ type: 'text', text: 'original prompt' })
|
|
const flattenedText = laterCall.flatMap(m => m.content.filter(c => c.type === 'text'))
|
|
expect(flattenedText.some(c => c.type === 'text' && c.text.includes('truncated'))).toBe(true)
|
|
})
|
|
|
|
it('summarize strategy replaces old context and emits summary trace call', async () => {
|
|
const calls: Array<{ messages: LLMMessage[]; options: LLMChatOptions }> = []
|
|
const traces: TraceEvent[] = []
|
|
const responses = [
|
|
toolUseResponse('echo', { message: 'first turn payload '.repeat(20) }),
|
|
toolUseResponse('echo', { message: 'second turn payload '.repeat(20) }),
|
|
textResponse('This is a concise summary.'),
|
|
textResponse('final answer'),
|
|
]
|
|
let idx = 0
|
|
const adapter: LLMAdapter = {
|
|
name: 'mock',
|
|
async chat(messages, options) {
|
|
calls.push({ messages: messages.map(m => ({ role: m.role, content: m.content })), options })
|
|
return responses[idx++]!
|
|
},
|
|
async *stream() {
|
|
/* unused */
|
|
},
|
|
}
|
|
const { registry, executor } = buildRegistryAndExecutor()
|
|
const runner = new AgentRunner(adapter, registry, executor, {
|
|
model: 'mock-model',
|
|
allowedTools: ['echo'],
|
|
maxTurns: 8,
|
|
contextStrategy: { type: 'summarize', maxTokens: 20 },
|
|
})
|
|
|
|
const result = await runner.run(
|
|
[{ role: 'user', content: [{ type: 'text', text: 'start' }] }],
|
|
{ onTrace: (e) => { traces.push(e) }, runId: 'run-summary', traceAgent: 'context-agent' },
|
|
)
|
|
|
|
const summaryCall = calls.find(c => c.messages.length === 1 && c.options.tools === undefined)
|
|
expect(summaryCall).toBeDefined()
|
|
const llmTraces = traces.filter(t => t.type === 'llm_call')
|
|
expect(llmTraces.some(t => t.type === 'llm_call' && t.phase === 'summary')).toBe(true)
|
|
|
|
// Summary adapter usage must count toward RunResult.tokenUsage (maxTokenBudget).
|
|
expect(result.tokenUsage.input_tokens).toBe(15 + 15 + 10 + 10)
|
|
expect(result.tokenUsage.output_tokens).toBe(25 + 25 + 20 + 20)
|
|
|
|
// After compaction, summary text is folded into the next user turn (not a
|
|
// standalone user message), preserving user/assistant alternation.
|
|
const turnAfterSummary = calls.find(
|
|
c => c.messages.some(
|
|
m => m.role === 'user' && m.content.some(
|
|
b => b.type === 'text' && b.text.includes('[Conversation summary]'),
|
|
),
|
|
),
|
|
)
|
|
expect(turnAfterSummary).toBeDefined()
|
|
const rolesAfterFirstUser = turnAfterSummary!.messages.map(m => m.role).join(',')
|
|
expect(rolesAfterFirstUser).not.toMatch(/^user,user/)
|
|
})
|
|
|
|
it('custom strategy calls compress callback and uses returned messages', async () => {
|
|
const compress = vi.fn((messages: LLMMessage[]) => messages.slice(-1))
|
|
const calls: LLMMessage[][] = []
|
|
const responses = [
|
|
toolUseResponse('echo', { message: 'hello' }),
|
|
textResponse('done'),
|
|
]
|
|
let idx = 0
|
|
const adapter: LLMAdapter = {
|
|
name: 'mock',
|
|
async chat(messages) {
|
|
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
|
return responses[idx++]!
|
|
},
|
|
async *stream() {
|
|
/* unused */
|
|
},
|
|
}
|
|
const { registry, executor } = buildRegistryAndExecutor()
|
|
const runner = new AgentRunner(adapter, registry, executor, {
|
|
model: 'mock-model',
|
|
allowedTools: ['echo'],
|
|
maxTurns: 4,
|
|
contextStrategy: {
|
|
type: 'custom',
|
|
compress,
|
|
},
|
|
})
|
|
|
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'custom prompt' }] }])
|
|
|
|
expect(compress).toHaveBeenCalledOnce()
|
|
expect(calls[1]).toHaveLength(1)
|
|
})
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// compact strategy
|
|
// ---------------------------------------------------------------------------
|
|
|
|
describe('compact strategy', () => {
|
|
const longText = 'x'.repeat(3000)
|
|
const longToolResult = 'result-data '.repeat(100) // ~1200 chars
|
|
|
|
function buildMultiTurnAdapter(
|
|
responseCount: number,
|
|
calls: LLMMessage[][],
|
|
): LLMAdapter {
|
|
const responses: LLMResponse[] = []
|
|
for (let i = 0; i < responseCount - 1; i++) {
|
|
responses.push(toolUseResponse('echo', { message: `turn-${i}` }))
|
|
}
|
|
responses.push(textResponse('done'))
|
|
let idx = 0
|
|
return {
|
|
name: 'mock',
|
|
async chat(messages) {
|
|
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
|
return responses[idx++]!
|
|
},
|
|
async *stream() { /* unused */ },
|
|
}
|
|
}
|
|
|
|
/** Build a registry with an echo tool that returns a fixed result string. */
|
|
function buildEchoRegistry(result: string): { registry: ToolRegistry; executor: ToolExecutor } {
|
|
const registry = new ToolRegistry()
|
|
registry.register(
|
|
defineTool({
|
|
name: 'echo',
|
|
description: 'Echo input',
|
|
inputSchema: z.object({ message: z.string() }),
|
|
async execute() {
|
|
return { data: result }
|
|
},
|
|
}),
|
|
)
|
|
return { registry, executor: new ToolExecutor(registry) }
|
|
}
|
|
|
|
it('does not activate below maxTokens threshold', async () => {
|
|
const calls: LLMMessage[][] = []
|
|
const adapter = buildMultiTurnAdapter(3, calls)
|
|
const { registry, executor } = buildEchoRegistry('short')
|
|
const runner = new AgentRunner(adapter, registry, executor, {
|
|
model: 'mock-model',
|
|
allowedTools: ['echo'],
|
|
maxTurns: 8,
|
|
contextStrategy: { type: 'compact', maxTokens: 999999 },
|
|
})
|
|
|
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
|
|
|
// On the 3rd call (turn 3), all previous messages should still be intact
|
|
// because estimated tokens are way below the threshold.
|
|
const lastCall = calls[calls.length - 1]!
|
|
const allToolResults = lastCall.flatMap(m =>
|
|
m.content.filter(b => b.type === 'tool_result'),
|
|
)
|
|
for (const tr of allToolResults) {
|
|
if (tr.type === 'tool_result') {
|
|
expect(tr.content).not.toContain('compacted')
|
|
}
|
|
}
|
|
})
|
|
|
|
it('compresses old tool_result blocks when tokens exceed threshold', async () => {
|
|
const calls: LLMMessage[][] = []
|
|
const adapter = buildMultiTurnAdapter(4, calls)
|
|
const { registry, executor } = buildEchoRegistry(longToolResult)
|
|
const runner = new AgentRunner(adapter, registry, executor, {
|
|
model: 'mock-model',
|
|
allowedTools: ['echo'],
|
|
maxTurns: 8,
|
|
contextStrategy: {
|
|
type: 'compact',
|
|
maxTokens: 20, // very low to always trigger
|
|
preserveRecentTurns: 1, // only protect the most recent turn
|
|
minToolResultChars: 100,
|
|
},
|
|
})
|
|
|
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
|
|
|
// On the last call, old tool results should have compact markers.
|
|
const lastCall = calls[calls.length - 1]!
|
|
const toolResults = lastCall.flatMap(m =>
|
|
m.content.filter(b => b.type === 'tool_result'),
|
|
)
|
|
const compacted = toolResults.filter(
|
|
b => b.type === 'tool_result' && b.content.includes('compacted'),
|
|
)
|
|
expect(compacted.length).toBeGreaterThan(0)
|
|
// Marker should include tool name.
|
|
for (const tr of compacted) {
|
|
if (tr.type === 'tool_result') {
|
|
expect(tr.content).toMatch(/\[Tool result: echo/)
|
|
}
|
|
}
|
|
})
|
|
|
|
it('preserves the first user message', async () => {
|
|
const calls: LLMMessage[][] = []
|
|
const adapter = buildMultiTurnAdapter(4, calls)
|
|
const { registry, executor } = buildEchoRegistry(longToolResult)
|
|
const runner = new AgentRunner(adapter, registry, executor, {
|
|
model: 'mock-model',
|
|
allowedTools: ['echo'],
|
|
maxTurns: 8,
|
|
contextStrategy: {
|
|
type: 'compact',
|
|
maxTokens: 20,
|
|
preserveRecentTurns: 1,
|
|
minToolResultChars: 100,
|
|
},
|
|
})
|
|
|
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'original prompt' }] }])
|
|
|
|
const lastCall = calls[calls.length - 1]!
|
|
const firstUser = lastCall.find(m => m.role === 'user')!
|
|
expect(firstUser.content[0]).toMatchObject({ type: 'text', text: 'original prompt' })
|
|
})
|
|
|
|
it('preserves tool_use blocks in old turns', async () => {
|
|
const calls: LLMMessage[][] = []
|
|
const adapter = buildMultiTurnAdapter(4, calls)
|
|
const { registry, executor } = buildEchoRegistry(longToolResult)
|
|
const runner = new AgentRunner(adapter, registry, executor, {
|
|
model: 'mock-model',
|
|
allowedTools: ['echo'],
|
|
maxTurns: 8,
|
|
contextStrategy: {
|
|
type: 'compact',
|
|
maxTokens: 20,
|
|
preserveRecentTurns: 1,
|
|
minToolResultChars: 100,
|
|
},
|
|
})
|
|
|
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
|
|
|
// Every assistant message should still have its tool_use block.
|
|
const lastCall = calls[calls.length - 1]!
|
|
const assistantMsgs = lastCall.filter(m => m.role === 'assistant')
|
|
for (const msg of assistantMsgs) {
|
|
const toolUses = msg.content.filter(b => b.type === 'tool_use')
|
|
// The last assistant message is "done" (text only), others have tool_use.
|
|
if (msg.content.some(b => b.type === 'text' && b.text === 'done')) continue
|
|
expect(toolUses.length).toBeGreaterThan(0)
|
|
}
|
|
})
|
|
|
|
it('preserves error tool_result blocks', async () => {
|
|
const calls: LLMMessage[][] = []
|
|
const responses: LLMResponse[] = [
|
|
toolUseResponse('echo', { message: 'will-fail' }),
|
|
toolUseResponse('echo', { message: 'ok' }),
|
|
textResponse('done'),
|
|
]
|
|
let idx = 0
|
|
const adapter: LLMAdapter = {
|
|
name: 'mock',
|
|
async chat(messages) {
|
|
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
|
return responses[idx++]!
|
|
},
|
|
async *stream() { /* unused */ },
|
|
}
|
|
// Tool that fails on first call, succeeds on second.
|
|
let callCount = 0
|
|
const registry = new ToolRegistry()
|
|
registry.register(
|
|
defineTool({
|
|
name: 'echo',
|
|
description: 'Echo input',
|
|
inputSchema: z.object({ message: z.string() }),
|
|
async execute() {
|
|
callCount++
|
|
if (callCount === 1) {
|
|
throw new Error('deliberate error '.repeat(40))
|
|
}
|
|
return { data: longToolResult }
|
|
},
|
|
}),
|
|
)
|
|
const executor = new ToolExecutor(registry)
|
|
const runner = new AgentRunner(adapter, registry, executor, {
|
|
model: 'mock-model',
|
|
allowedTools: ['echo'],
|
|
maxTurns: 8,
|
|
contextStrategy: {
|
|
type: 'compact',
|
|
maxTokens: 20,
|
|
preserveRecentTurns: 1,
|
|
minToolResultChars: 50,
|
|
},
|
|
})
|
|
|
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
|
|
|
const lastCall = calls[calls.length - 1]!
|
|
const errorResults = lastCall.flatMap(m =>
|
|
m.content.filter(b => b.type === 'tool_result' && b.is_error),
|
|
)
|
|
// Error results should still have their original content (not compacted).
|
|
for (const er of errorResults) {
|
|
if (er.type === 'tool_result') {
|
|
expect(er.content).not.toContain('compacted')
|
|
expect(er.content).toContain('deliberate error')
|
|
}
|
|
}
|
|
})
|
|
|
|
it('does not re-compress markers from compressToolResults', async () => {
|
|
const calls: LLMMessage[][] = []
|
|
const adapter = buildMultiTurnAdapter(4, calls)
|
|
const { registry, executor } = buildEchoRegistry(longToolResult)
|
|
const runner = new AgentRunner(adapter, registry, executor, {
|
|
model: 'mock-model',
|
|
allowedTools: ['echo'],
|
|
maxTurns: 8,
|
|
compressToolResults: { minChars: 100 },
|
|
contextStrategy: {
|
|
type: 'compact',
|
|
maxTokens: 20,
|
|
preserveRecentTurns: 1,
|
|
minToolResultChars: 10,
|
|
},
|
|
})
|
|
|
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
|
|
|
const lastCall = calls[calls.length - 1]!
|
|
const allToolResults = lastCall.flatMap(m =>
|
|
m.content.filter(b => b.type === 'tool_result'),
|
|
)
|
|
// No result should contain nested markers.
|
|
for (const tr of allToolResults) {
|
|
if (tr.type === 'tool_result') {
|
|
// Should not have a compact marker wrapping another marker.
|
|
const markerCount = (tr.content.match(/\[Tool/g) || []).length
|
|
expect(markerCount).toBeLessThanOrEqual(1)
|
|
}
|
|
}
|
|
})
|
|
|
|
it('truncates long assistant text blocks in old turns', async () => {
|
|
const calls: LLMMessage[][] = []
|
|
const responses: LLMResponse[] = [
|
|
// First turn: assistant with long text + tool_use
|
|
{
|
|
id: 'r1',
|
|
content: [
|
|
{ type: 'text', text: longText },
|
|
{ type: 'tool_use', id: 'tu-1', name: 'echo', input: { message: 'hi' } },
|
|
],
|
|
model: 'mock-model',
|
|
stop_reason: 'tool_use',
|
|
usage: { input_tokens: 10, output_tokens: 20 },
|
|
},
|
|
toolUseResponse('echo', { message: 'turn2' }),
|
|
textResponse('done'),
|
|
]
|
|
let idx = 0
|
|
const adapter: LLMAdapter = {
|
|
name: 'mock',
|
|
async chat(messages) {
|
|
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
|
return responses[idx++]!
|
|
},
|
|
async *stream() { /* unused */ },
|
|
}
|
|
const { registry, executor } = buildEchoRegistry('short')
|
|
const runner = new AgentRunner(adapter, registry, executor, {
|
|
model: 'mock-model',
|
|
allowedTools: ['echo'],
|
|
maxTurns: 8,
|
|
contextStrategy: {
|
|
type: 'compact',
|
|
maxTokens: 20,
|
|
preserveRecentTurns: 1,
|
|
minTextBlockChars: 500,
|
|
textBlockExcerptChars: 100,
|
|
},
|
|
})
|
|
|
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
|
|
|
const lastCall = calls[calls.length - 1]!
|
|
// The first assistant message (old zone) should have its text truncated.
|
|
const firstAssistant = lastCall.find(m => m.role === 'assistant')!
|
|
const textBlocks = firstAssistant.content.filter(b => b.type === 'text')
|
|
const truncated = textBlocks.find(
|
|
b => b.type === 'text' && b.text.includes('truncated'),
|
|
)
|
|
expect(truncated).toBeDefined()
|
|
if (truncated && truncated.type === 'text') {
|
|
expect(truncated.text.length).toBeLessThan(longText.length)
|
|
expect(truncated.text).toContain(`${longText.length} chars total`)
|
|
}
|
|
})
|
|
|
|
it('keeps recent turns intact within preserveRecentTurns', async () => {
|
|
const calls: LLMMessage[][] = []
|
|
const adapter = buildMultiTurnAdapter(4, calls)
|
|
const { registry, executor } = buildEchoRegistry(longToolResult)
|
|
const runner = new AgentRunner(adapter, registry, executor, {
|
|
model: 'mock-model',
|
|
allowedTools: ['echo'],
|
|
maxTurns: 8,
|
|
contextStrategy: {
|
|
type: 'compact',
|
|
maxTokens: 20,
|
|
preserveRecentTurns: 1,
|
|
minToolResultChars: 100,
|
|
},
|
|
})
|
|
|
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
|
|
|
// The most recent tool_result (last user message with tool_result) should
|
|
// still contain the original long content.
|
|
const lastCall = calls[calls.length - 1]!
|
|
const userMsgs = lastCall.filter(m => m.role === 'user')
|
|
const lastUserWithToolResult = [...userMsgs]
|
|
.reverse()
|
|
.find(m => m.content.some(b => b.type === 'tool_result'))
|
|
expect(lastUserWithToolResult).toBeDefined()
|
|
const recentTr = lastUserWithToolResult!.content.find(b => b.type === 'tool_result')
|
|
if (recentTr && recentTr.type === 'tool_result') {
|
|
expect(recentTr.content).not.toContain('compacted')
|
|
expect(recentTr.content).toContain('result-data')
|
|
}
|
|
})
|
|
|
|
it('does not compact when all turns fit in preserveRecentTurns', async () => {
|
|
const calls: LLMMessage[][] = []
|
|
const adapter = buildMultiTurnAdapter(3, calls)
|
|
const { registry, executor } = buildEchoRegistry(longToolResult)
|
|
const runner = new AgentRunner(adapter, registry, executor, {
|
|
model: 'mock-model',
|
|
allowedTools: ['echo'],
|
|
maxTurns: 8,
|
|
contextStrategy: {
|
|
type: 'compact',
|
|
maxTokens: 20,
|
|
preserveRecentTurns: 10, // way more than actual turns
|
|
minToolResultChars: 100,
|
|
},
|
|
})
|
|
|
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
|
|
|
// All tool results should still have original content.
|
|
const lastCall = calls[calls.length - 1]!
|
|
const toolResults = lastCall.flatMap(m =>
|
|
m.content.filter(b => b.type === 'tool_result'),
|
|
)
|
|
for (const tr of toolResults) {
|
|
if (tr.type === 'tool_result') {
|
|
expect(tr.content).not.toContain('compacted')
|
|
}
|
|
}
|
|
})
|
|
|
|
it('maintains correct role alternation after compaction', async () => {
|
|
const calls: LLMMessage[][] = []
|
|
const adapter = buildMultiTurnAdapter(5, calls)
|
|
const { registry, executor } = buildEchoRegistry(longToolResult)
|
|
const runner = new AgentRunner(adapter, registry, executor, {
|
|
model: 'mock-model',
|
|
allowedTools: ['echo'],
|
|
maxTurns: 10,
|
|
contextStrategy: {
|
|
type: 'compact',
|
|
maxTokens: 20,
|
|
preserveRecentTurns: 1,
|
|
minToolResultChars: 100,
|
|
},
|
|
})
|
|
|
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
|
|
|
// Check all LLM calls for role alternation.
|
|
for (const callMsgs of calls) {
|
|
for (let i = 1; i < callMsgs.length; i++) {
|
|
expect(callMsgs[i]!.role).not.toBe(callMsgs[i - 1]!.role)
|
|
}
|
|
}
|
|
})
|
|
|
|
it('returns ZERO_USAGE (no LLM cost from compaction)', async () => {
|
|
const calls: LLMMessage[][] = []
|
|
const adapter = buildMultiTurnAdapter(4, calls)
|
|
const { registry, executor } = buildEchoRegistry(longToolResult)
|
|
const runner = new AgentRunner(adapter, registry, executor, {
|
|
model: 'mock-model',
|
|
allowedTools: ['echo'],
|
|
maxTurns: 8,
|
|
contextStrategy: {
|
|
type: 'compact',
|
|
maxTokens: 20,
|
|
preserveRecentTurns: 1,
|
|
minToolResultChars: 100,
|
|
},
|
|
})
|
|
|
|
const result = await runner.run([
|
|
{ role: 'user', content: [{ type: 'text', text: 'start' }] },
|
|
])
|
|
|
|
// Token usage should only reflect the 4 actual LLM calls (no extra from compaction).
|
|
// Each toolUseResponse: input=15, output=25. textResponse: input=10, output=20.
|
|
// 3 tool calls + 1 final = (15*3 + 10) input, (25*3 + 20) output.
|
|
expect(result.tokenUsage.input_tokens).toBe(15 * 3 + 10)
|
|
expect(result.tokenUsage.output_tokens).toBe(25 * 3 + 20)
|
|
})
|
|
})
|
|
})
|