feat: post-consumption tool result compression (#116)
Replace consumed tool results with compact markers before each LLM call, freeing context budget in multi-turn agent runs. A tool result is "consumed" once the assistant has produced a response after seeing it. - Add `compressToolResults` option to AgentConfig / RunnerOptions - Runs before contextStrategy (lightweight, no LLM calls) - Error results and short results (< minChars, default 500) are skipped - 9 test cases covering default off, compression, parallel tools, 4+ turn compounding, error exemption, custom threshold, and contextStrategy coexistence
This commit is contained in:
parent
c3ead26677
commit
1529dd1346
|
|
@ -154,6 +154,7 @@ export class Agent {
|
|||
loopDetection: this.config.loopDetection,
|
||||
maxTokenBudget: this.config.maxTokenBudget,
|
||||
contextStrategy: this.config.contextStrategy,
|
||||
compressToolResults: this.config.compressToolResults,
|
||||
}
|
||||
|
||||
this.runner = new AgentRunner(
|
||||
|
|
|
|||
|
|
@ -98,6 +98,11 @@ export interface RunnerOptions {
|
|||
readonly maxTokenBudget?: number
|
||||
/** Optional context compression strategy for long multi-turn runs. */
|
||||
readonly contextStrategy?: ContextStrategy
|
||||
/**
|
||||
* Compress tool results that the agent has already processed.
|
||||
* See {@link AgentConfig.compressToolResults} for details.
|
||||
*/
|
||||
readonly compressToolResults?: boolean | { readonly minChars?: number }
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -176,6 +181,9 @@ function addTokenUsage(a: TokenUsage, b: TokenUsage): TokenUsage {
|
|||
|
||||
const ZERO_USAGE: TokenUsage = { input_tokens: 0, output_tokens: 0 }
|
||||
|
||||
/** Default minimum content length before tool result compression kicks in. */
|
||||
const DEFAULT_MIN_COMPRESS_CHARS = 500
|
||||
|
||||
/**
|
||||
* Prepends synthetic framing text to the first user message so we never emit
|
||||
* consecutive `user` turns (Bedrock) and summaries do not concatenate onto
|
||||
|
|
@ -569,6 +577,12 @@ export class AgentRunner {
|
|||
|
||||
turns++
|
||||
|
||||
// Compress consumed tool results before context strategy (lightweight,
|
||||
// no LLM calls) so the strategy operates on already-reduced messages.
|
||||
if (this.options.compressToolResults && turns > 1) {
|
||||
conversationMessages = this.compressConsumedToolResults(conversationMessages)
|
||||
}
|
||||
|
||||
// Optionally compact context before each LLM call after the first turn.
|
||||
if (this.options.contextStrategy && turns > 1) {
|
||||
const compacted = await this.applyContextStrategy(
|
||||
|
|
@ -846,6 +860,75 @@ export class AgentRunner {
|
|||
// Private helpers
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Replace consumed tool results with compact markers.
|
||||
*
|
||||
* A tool_result is "consumed" when the assistant has produced a response
|
||||
* after seeing it (i.e. there is an assistant message following the user
|
||||
* message that contains the tool_result). The most recent user message
|
||||
* with tool results is always kept intact — the LLM is about to see it.
|
||||
*
|
||||
* Error results and results shorter than `minChars` are never compressed.
|
||||
*/
|
||||
private compressConsumedToolResults(messages: LLMMessage[]): LLMMessage[] {
|
||||
const config = this.options.compressToolResults
|
||||
if (!config) return messages
|
||||
|
||||
const minChars = typeof config === 'object'
|
||||
? (config.minChars ?? DEFAULT_MIN_COMPRESS_CHARS)
|
||||
: DEFAULT_MIN_COMPRESS_CHARS
|
||||
|
||||
// Find the last user message that carries tool_result blocks.
|
||||
let lastToolResultUserIdx = -1
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
if (
|
||||
messages[i]!.role === 'user' &&
|
||||
messages[i]!.content.some(b => b.type === 'tool_result')
|
||||
) {
|
||||
lastToolResultUserIdx = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Nothing to compress if there's at most one tool-result user message.
|
||||
if (lastToolResultUserIdx <= 0) return messages
|
||||
|
||||
let anyChanged = false
|
||||
const result = messages.map((msg, idx) => {
|
||||
// Only compress user messages that appear before the last one.
|
||||
if (msg.role !== 'user' || idx >= lastToolResultUserIdx) return msg
|
||||
|
||||
const hasToolResult = msg.content.some(b => b.type === 'tool_result')
|
||||
if (!hasToolResult) return msg
|
||||
|
||||
let msgChanged = false
|
||||
const newContent = msg.content.map((block): ContentBlock => {
|
||||
if (block.type !== 'tool_result') return block
|
||||
|
||||
// Never compress error results — they carry diagnostic value.
|
||||
if (block.is_error) return block
|
||||
|
||||
// Skip short results — the marker itself has overhead.
|
||||
if (block.content.length < minChars) return block
|
||||
|
||||
msgChanged = true
|
||||
return {
|
||||
type: 'tool_result',
|
||||
tool_use_id: block.tool_use_id,
|
||||
content: `[Tool output compressed — ${block.content.length} chars, already processed]`,
|
||||
} satisfies ToolResultBlock
|
||||
})
|
||||
|
||||
if (msgChanged) {
|
||||
anyChanged = true
|
||||
return { role: msg.role, content: newContent } as LLMMessage
|
||||
}
|
||||
return msg
|
||||
})
|
||||
|
||||
return anyChanged ? result : messages
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the {@link ToolUseContext} passed to every tool execution.
|
||||
* Identifies this runner as the invoking agent.
|
||||
|
|
|
|||
15
src/types.ts
15
src/types.ts
|
|
@ -270,6 +270,21 @@ export interface AgentConfig {
|
|||
* takes priority over this value.
|
||||
*/
|
||||
readonly maxToolOutputChars?: number
|
||||
/**
|
||||
* Compress tool results that the agent has already processed.
|
||||
*
|
||||
* In multi-turn runs, tool results persist in the conversation even after the
|
||||
* agent has acted on them. When enabled, consumed tool results (those followed
|
||||
* by an assistant response) are replaced with a short marker before the next
|
||||
* LLM call, freeing context budget for new reasoning.
|
||||
*
|
||||
* - `true` — enable with default threshold (500 chars)
|
||||
* - `{ minChars: N }` — only compress results longer than N characters
|
||||
* - `false` / `undefined` — disabled (default)
|
||||
*
|
||||
* Error tool results are never compressed.
|
||||
*/
|
||||
readonly compressToolResults?: boolean | { readonly minChars?: number }
|
||||
/**
|
||||
* Optional Zod schema for structured output. When set, the agent's final
|
||||
* output is parsed as JSON and validated against this schema. A single
|
||||
|
|
|
|||
|
|
@ -0,0 +1,456 @@
|
|||
import { describe, it, expect } from 'vitest'
|
||||
import { z } from 'zod'
|
||||
import { AgentRunner } from '../src/agent/runner.js'
|
||||
import { ToolRegistry, defineTool } from '../src/tool/framework.js'
|
||||
import { ToolExecutor } from '../src/tool/executor.js'
|
||||
import type { LLMAdapter, LLMMessage, LLMResponse } from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function textResponse(text: string): LLMResponse {
|
||||
return {
|
||||
id: `resp-${Math.random().toString(36).slice(2)}`,
|
||||
content: [{ type: 'text', text }],
|
||||
model: 'mock-model',
|
||||
stop_reason: 'end_turn',
|
||||
usage: { input_tokens: 10, output_tokens: 20 },
|
||||
}
|
||||
}
|
||||
|
||||
function toolUseResponse(toolName: string, input: Record<string, unknown>): LLMResponse {
|
||||
return {
|
||||
id: `resp-${Math.random().toString(36).slice(2)}`,
|
||||
content: [{
|
||||
type: 'tool_use',
|
||||
id: `tu-${Math.random().toString(36).slice(2)}`,
|
||||
name: toolName,
|
||||
input,
|
||||
}],
|
||||
model: 'mock-model',
|
||||
stop_reason: 'tool_use',
|
||||
usage: { input_tokens: 15, output_tokens: 25 },
|
||||
}
|
||||
}
|
||||
|
||||
function buildRegistryAndExecutor(
|
||||
toolOutput: string = 'x'.repeat(600),
|
||||
): { registry: ToolRegistry; executor: ToolExecutor } {
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(
|
||||
defineTool({
|
||||
name: 'echo',
|
||||
description: 'Echo input',
|
||||
inputSchema: z.object({ message: z.string() }),
|
||||
async execute() {
|
||||
return { data: toolOutput }
|
||||
},
|
||||
}),
|
||||
)
|
||||
return { registry, executor: new ToolExecutor(registry) }
|
||||
}
|
||||
|
||||
function buildErrorRegistryAndExecutor(): { registry: ToolRegistry; executor: ToolExecutor } {
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(
|
||||
defineTool({
|
||||
name: 'fail',
|
||||
description: 'Always fails',
|
||||
inputSchema: z.object({ message: z.string() }),
|
||||
async execute() {
|
||||
return { data: 'E'.repeat(600), isError: true }
|
||||
},
|
||||
}),
|
||||
)
|
||||
return { registry, executor: new ToolExecutor(registry) }
|
||||
}
|
||||
|
||||
/** Extract all tool_result content strings from messages sent to the LLM. */
|
||||
function extractToolResultContents(messages: LLMMessage[]): string[] {
|
||||
return messages.flatMap(m =>
|
||||
m.content
|
||||
.filter((b): b is { type: 'tool_result'; tool_use_id: string; content: string; is_error?: boolean } =>
|
||||
b.type === 'tool_result')
|
||||
.map(b => b.content),
|
||||
)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('AgentRunner compressToolResults', () => {
|
||||
it('does NOT compress when compressToolResults is not set (default)', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const longOutput = 'x'.repeat(600)
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 't1' }),
|
||||
toolUseResponse('echo', { message: 't2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor(longOutput)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 5,
|
||||
// compressToolResults not set
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Turn 3 should still see full tool results from turn 1
|
||||
const turn3Messages = calls[2]!
|
||||
const allToolResults = extractToolResultContents(turn3Messages)
|
||||
expect(allToolResults.every(c => c === longOutput)).toBe(true)
|
||||
})
|
||||
|
||||
it('compresses consumed tool results on turn 3+', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const longOutput = 'x'.repeat(600)
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 't1' }),
|
||||
toolUseResponse('echo', { message: 't2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor(longOutput)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 5,
|
||||
compressToolResults: true,
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Turn 3: the LLM should see a compressed marker for turn 1 results
|
||||
// but the full output for turn 2 results (most recent, not yet consumed).
|
||||
const turn3Messages = calls[2]!
|
||||
const allToolResults = extractToolResultContents(turn3Messages)
|
||||
expect(allToolResults).toHaveLength(2)
|
||||
|
||||
// First result (turn 1) should be compressed
|
||||
expect(allToolResults[0]).toContain('compressed')
|
||||
expect(allToolResults[0]).toContain('600 chars')
|
||||
|
||||
// Second result (turn 2, most recent) should be preserved in full
|
||||
expect(allToolResults[1]).toBe(longOutput)
|
||||
})
|
||||
|
||||
it('preserves tool_use_id on compressed results', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const longOutput = 'x'.repeat(600)
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 't1' }),
|
||||
toolUseResponse('echo', { message: 't2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor(longOutput)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 5,
|
||||
compressToolResults: true,
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Turn 3: verify compressed result still has tool_use_id
|
||||
const turn3Messages = calls[2]!
|
||||
const toolResultBlocks = turn3Messages.flatMap(m =>
|
||||
m.content.filter(b => b.type === 'tool_result'),
|
||||
)
|
||||
for (const block of toolResultBlocks) {
|
||||
expect(block).toHaveProperty('tool_use_id')
|
||||
expect((block as { tool_use_id: string }).tool_use_id).toBeTruthy()
|
||||
}
|
||||
})
|
||||
|
||||
it('skips short tool results below minChars threshold', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const shortOutput = 'short' // 5 chars, well below 500 default
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 't1' }),
|
||||
toolUseResponse('echo', { message: 't2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor(shortOutput)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 5,
|
||||
compressToolResults: true,
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Turn 3: short results should NOT be compressed
|
||||
const turn3Messages = calls[2]!
|
||||
const allToolResults = extractToolResultContents(turn3Messages)
|
||||
expect(allToolResults.every(c => c === shortOutput)).toBe(true)
|
||||
})
|
||||
|
||||
it('respects custom minChars threshold', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const output = 'x'.repeat(200)
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 't1' }),
|
||||
toolUseResponse('echo', { message: 't2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor(output)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 5,
|
||||
compressToolResults: { minChars: 100 },
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// With minChars=100, the 200-char output should be compressed
|
||||
const turn3Messages = calls[2]!
|
||||
const allToolResults = extractToolResultContents(turn3Messages)
|
||||
expect(allToolResults[0]).toContain('compressed')
|
||||
expect(allToolResults[0]).toContain('200 chars')
|
||||
})
|
||||
|
||||
it('never compresses error tool results', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const responses = [
|
||||
toolUseResponse('fail', { message: 't1' }),
|
||||
toolUseResponse('fail', { message: 't2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildErrorRegistryAndExecutor()
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['fail'],
|
||||
maxTurns: 5,
|
||||
compressToolResults: true,
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Error results should never be compressed even if long
|
||||
const turn3Messages = calls[2]!
|
||||
const allToolResults = extractToolResultContents(turn3Messages)
|
||||
expect(allToolResults.every(c => c === 'E'.repeat(600))).toBe(true)
|
||||
})
|
||||
|
||||
it('compresses selectively in multi-block tool_result messages (parallel tool calls)', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
// Two tools: one returns long output, one returns short output
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(
|
||||
defineTool({
|
||||
name: 'long_tool',
|
||||
description: 'Returns long output',
|
||||
inputSchema: z.object({ msg: z.string() }),
|
||||
async execute() { return { data: 'L'.repeat(600) } },
|
||||
}),
|
||||
)
|
||||
registry.register(
|
||||
defineTool({
|
||||
name: 'short_tool',
|
||||
description: 'Returns short output',
|
||||
inputSchema: z.object({ msg: z.string() }),
|
||||
async execute() { return { data: 'S'.repeat(50) } },
|
||||
}),
|
||||
)
|
||||
const executor = new ToolExecutor(registry)
|
||||
|
||||
// Turn 1: model calls both tools in parallel
|
||||
const parallelResponse: LLMResponse = {
|
||||
id: 'resp-parallel',
|
||||
content: [
|
||||
{ type: 'tool_use', id: 'tu-long', name: 'long_tool', input: { msg: 'a' } },
|
||||
{ type: 'tool_use', id: 'tu-short', name: 'short_tool', input: { msg: 'b' } },
|
||||
],
|
||||
model: 'mock-model',
|
||||
stop_reason: 'tool_use',
|
||||
usage: { input_tokens: 15, output_tokens: 25 },
|
||||
}
|
||||
const responses = [
|
||||
parallelResponse,
|
||||
toolUseResponse('long_tool', { msg: 't2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['long_tool', 'short_tool'],
|
||||
maxTurns: 5,
|
||||
compressToolResults: true,
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Turn 3: the parallel results from turn 1 should be selectively compressed.
|
||||
// The long_tool result (600 chars) → compressed. The short_tool result (50 chars) → kept.
|
||||
const turn3Messages = calls[2]!
|
||||
const turn1ToolResults = turn3Messages.flatMap(m =>
|
||||
m.content.filter((b): b is { type: 'tool_result'; tool_use_id: string; content: string } =>
|
||||
b.type === 'tool_result'),
|
||||
)
|
||||
// Find the results from turn 1 (first user message with tool_results)
|
||||
const firstToolResultMsg = turn3Messages.find(
|
||||
m => m.role === 'user' && m.content.some(b => b.type === 'tool_result'),
|
||||
)!
|
||||
const blocks = firstToolResultMsg.content.filter(
|
||||
(b): b is { type: 'tool_result'; tool_use_id: string; content: string } =>
|
||||
b.type === 'tool_result',
|
||||
)
|
||||
|
||||
// One should be compressed (long), one should be intact (short)
|
||||
const compressedBlocks = blocks.filter(b => b.content.includes('compressed'))
|
||||
const intactBlocks = blocks.filter(b => !b.content.includes('compressed'))
|
||||
expect(compressedBlocks).toHaveLength(1)
|
||||
expect(compressedBlocks[0]!.content).toContain('600 chars')
|
||||
expect(intactBlocks).toHaveLength(1)
|
||||
expect(intactBlocks[0]!.content).toBe('S'.repeat(50))
|
||||
})
|
||||
|
||||
it('compounds compression across 4+ turns', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const longOutput = 'x'.repeat(600)
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 't1' }),
|
||||
toolUseResponse('echo', { message: 't2' }),
|
||||
toolUseResponse('echo', { message: 't3' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor(longOutput)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 6,
|
||||
compressToolResults: true,
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Turn 4: turns 1 and 2 should both be compressed, turn 3 should be intact
|
||||
const turn4Messages = calls[3]!
|
||||
const allToolResults = extractToolResultContents(turn4Messages)
|
||||
expect(allToolResults).toHaveLength(3)
|
||||
|
||||
// First two are compressed (turns 1 & 2)
|
||||
expect(allToolResults[0]).toContain('compressed')
|
||||
expect(allToolResults[1]).toContain('compressed')
|
||||
|
||||
// Last one (turn 3, most recent) preserved
|
||||
expect(allToolResults[2]).toBe(longOutput)
|
||||
})
|
||||
|
||||
it('works together with contextStrategy', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const longOutput = 'x'.repeat(600)
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 't1' }),
|
||||
toolUseResponse('echo', { message: 't2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor(longOutput)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 5,
|
||||
compressToolResults: true,
|
||||
contextStrategy: { type: 'sliding-window', maxTurns: 10 },
|
||||
})
|
||||
|
||||
const result = await runner.run([
|
||||
{ role: 'user', content: [{ type: 'text', text: 'start' }] },
|
||||
])
|
||||
|
||||
// Should complete without error; both features coexist
|
||||
expect(result.output).toBe('done')
|
||||
|
||||
// Turn 3 should have compressed turn 1 results
|
||||
const turn3Messages = calls[2]!
|
||||
const allToolResults = extractToolResultContents(turn3Messages)
|
||||
expect(allToolResults[0]).toContain('compressed')
|
||||
})
|
||||
})
|
||||
Loading…
Reference in New Issue