Compare commits
3 Commits
c3ead26677
...
6de7bbd41f
| Author | SHA1 | Date |
|---|---|---|
|
|
6de7bbd41f | |
|
|
696269c924 | |
|
|
a6b5181c74 |
|
|
@ -154,6 +154,7 @@ export class Agent {
|
||||||
loopDetection: this.config.loopDetection,
|
loopDetection: this.config.loopDetection,
|
||||||
maxTokenBudget: this.config.maxTokenBudget,
|
maxTokenBudget: this.config.maxTokenBudget,
|
||||||
contextStrategy: this.config.contextStrategy,
|
contextStrategy: this.config.contextStrategy,
|
||||||
|
compressToolResults: this.config.compressToolResults,
|
||||||
}
|
}
|
||||||
|
|
||||||
this.runner = new AgentRunner(
|
this.runner = new AgentRunner(
|
||||||
|
|
|
||||||
|
|
@ -98,6 +98,11 @@ export interface RunnerOptions {
|
||||||
readonly maxTokenBudget?: number
|
readonly maxTokenBudget?: number
|
||||||
/** Optional context compression strategy for long multi-turn runs. */
|
/** Optional context compression strategy for long multi-turn runs. */
|
||||||
readonly contextStrategy?: ContextStrategy
|
readonly contextStrategy?: ContextStrategy
|
||||||
|
/**
|
||||||
|
* Compress tool results that the agent has already processed.
|
||||||
|
* See {@link AgentConfig.compressToolResults} for details.
|
||||||
|
*/
|
||||||
|
readonly compressToolResults?: boolean | { readonly minChars?: number }
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -176,6 +181,9 @@ function addTokenUsage(a: TokenUsage, b: TokenUsage): TokenUsage {
|
||||||
|
|
||||||
const ZERO_USAGE: TokenUsage = { input_tokens: 0, output_tokens: 0 }
|
const ZERO_USAGE: TokenUsage = { input_tokens: 0, output_tokens: 0 }
|
||||||
|
|
||||||
|
/** Default minimum content length before tool result compression kicks in. */
|
||||||
|
const DEFAULT_MIN_COMPRESS_CHARS = 500
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prepends synthetic framing text to the first user message so we never emit
|
* Prepends synthetic framing text to the first user message so we never emit
|
||||||
* consecutive `user` turns (Bedrock) and summaries do not concatenate onto
|
* consecutive `user` turns (Bedrock) and summaries do not concatenate onto
|
||||||
|
|
@ -392,6 +400,10 @@ export class AgentRunner {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (strategy.type === 'compact') {
|
||||||
|
return { messages: this.compactMessages(messages, strategy), usage: ZERO_USAGE }
|
||||||
|
}
|
||||||
|
|
||||||
const estimated = estimateTokens(messages)
|
const estimated = estimateTokens(messages)
|
||||||
const compressed = await strategy.compress(messages, estimated)
|
const compressed = await strategy.compress(messages, estimated)
|
||||||
if (!Array.isArray(compressed) || compressed.length === 0) {
|
if (!Array.isArray(compressed) || compressed.length === 0) {
|
||||||
|
|
@ -569,6 +581,12 @@ export class AgentRunner {
|
||||||
|
|
||||||
turns++
|
turns++
|
||||||
|
|
||||||
|
// Compress consumed tool results before context strategy (lightweight,
|
||||||
|
// no LLM calls) so the strategy operates on already-reduced messages.
|
||||||
|
if (this.options.compressToolResults && turns > 1) {
|
||||||
|
conversationMessages = this.compressConsumedToolResults(conversationMessages)
|
||||||
|
}
|
||||||
|
|
||||||
// Optionally compact context before each LLM call after the first turn.
|
// Optionally compact context before each LLM call after the first turn.
|
||||||
if (this.options.contextStrategy && turns > 1) {
|
if (this.options.contextStrategy && turns > 1) {
|
||||||
const compacted = await this.applyContextStrategy(
|
const compacted = await this.applyContextStrategy(
|
||||||
|
|
@ -846,6 +864,205 @@ export class AgentRunner {
|
||||||
// Private helpers
|
// Private helpers
|
||||||
// -------------------------------------------------------------------------
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rule-based selective context compaction (no LLM calls).
|
||||||
|
*
|
||||||
|
* Compresses old turns while preserving the conversation skeleton:
|
||||||
|
* - tool_use blocks (decisions) are always kept
|
||||||
|
* - Long tool_result content is replaced with a compact marker
|
||||||
|
* - Long assistant text blocks are truncated with an excerpt
|
||||||
|
* - Error tool_results are never compressed
|
||||||
|
* - Recent turns (within `preserveRecentTurns`) are kept intact
|
||||||
|
*/
|
||||||
|
private compactMessages(
|
||||||
|
messages: LLMMessage[],
|
||||||
|
strategy: Extract<ContextStrategy, { type: 'compact' }>,
|
||||||
|
): LLMMessage[] {
|
||||||
|
const estimated = estimateTokens(messages)
|
||||||
|
if (estimated <= strategy.maxTokens) {
|
||||||
|
return messages
|
||||||
|
}
|
||||||
|
|
||||||
|
const preserveRecent = strategy.preserveRecentTurns ?? 4
|
||||||
|
const minToolResultChars = strategy.minToolResultChars ?? 200
|
||||||
|
const minTextBlockChars = strategy.minTextBlockChars ?? 2000
|
||||||
|
const textBlockExcerptChars = strategy.textBlockExcerptChars ?? 200
|
||||||
|
|
||||||
|
// Find the first user message — it is always preserved as-is.
|
||||||
|
const firstUserIndex = messages.findIndex(m => m.role === 'user')
|
||||||
|
if (firstUserIndex < 0 || firstUserIndex === messages.length - 1) {
|
||||||
|
return messages
|
||||||
|
}
|
||||||
|
|
||||||
|
// Walk backward to find the boundary between old and recent turns.
|
||||||
|
// A "turn pair" is an assistant message followed by a user message.
|
||||||
|
let boundary = messages.length
|
||||||
|
let pairsFound = 0
|
||||||
|
for (let i = messages.length - 1; i > firstUserIndex && pairsFound < preserveRecent; i--) {
|
||||||
|
if (messages[i]!.role === 'user' && i > 0 && messages[i - 1]!.role === 'assistant') {
|
||||||
|
pairsFound++
|
||||||
|
boundary = i - 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If all turns fit within the recent window, nothing to compact.
|
||||||
|
if (boundary <= firstUserIndex + 1) {
|
||||||
|
return messages
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build a tool_use_id → tool name lookup from old assistant messages.
|
||||||
|
const toolNameMap = new Map<string, string>()
|
||||||
|
for (let i = firstUserIndex + 1; i < boundary; i++) {
|
||||||
|
const msg = messages[i]!
|
||||||
|
if (msg.role !== 'assistant') continue
|
||||||
|
for (const block of msg.content) {
|
||||||
|
if (block.type === 'tool_use') {
|
||||||
|
toolNameMap.set(block.id, block.name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process old messages (between first user and boundary).
|
||||||
|
let anyChanged = false
|
||||||
|
const result: LLMMessage[] = []
|
||||||
|
|
||||||
|
for (let i = 0; i < messages.length; i++) {
|
||||||
|
// First user message and recent messages: keep intact.
|
||||||
|
if (i <= firstUserIndex || i >= boundary) {
|
||||||
|
result.push(messages[i]!)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
const msg = messages[i]!
|
||||||
|
let msgChanged = false
|
||||||
|
const newContent = msg.content.map((block): ContentBlock => {
|
||||||
|
if (msg.role === 'assistant') {
|
||||||
|
// tool_use blocks: always preserve (decisions).
|
||||||
|
if (block.type === 'tool_use') return block
|
||||||
|
// Long text blocks: truncate with excerpt.
|
||||||
|
if (block.type === 'text' && block.text.length >= minTextBlockChars) {
|
||||||
|
msgChanged = true
|
||||||
|
return {
|
||||||
|
type: 'text',
|
||||||
|
text: `${block.text.slice(0, textBlockExcerptChars)}... [truncated — ${block.text.length} chars total]`,
|
||||||
|
} satisfies TextBlock
|
||||||
|
}
|
||||||
|
// Image blocks in old turns: replace with marker.
|
||||||
|
if (block.type === 'image') {
|
||||||
|
msgChanged = true
|
||||||
|
return { type: 'text', text: '[Image compacted]' } satisfies TextBlock
|
||||||
|
}
|
||||||
|
return block
|
||||||
|
}
|
||||||
|
|
||||||
|
// User messages in old zone.
|
||||||
|
if (block.type === 'tool_result') {
|
||||||
|
// Error results: always preserve.
|
||||||
|
if (block.is_error) return block
|
||||||
|
// Already compressed by compressToolResults or a prior compact pass.
|
||||||
|
if (
|
||||||
|
block.content.startsWith('[Tool output compressed') ||
|
||||||
|
block.content.startsWith('[Tool result:')
|
||||||
|
) {
|
||||||
|
return block
|
||||||
|
}
|
||||||
|
// Short results: preserve.
|
||||||
|
if (block.content.length < minToolResultChars) return block
|
||||||
|
// Compress.
|
||||||
|
const toolName = toolNameMap.get(block.tool_use_id) ?? 'unknown'
|
||||||
|
msgChanged = true
|
||||||
|
return {
|
||||||
|
type: 'tool_result',
|
||||||
|
tool_use_id: block.tool_use_id,
|
||||||
|
content: `[Tool result: ${toolName} — ${block.content.length} chars, compacted]`,
|
||||||
|
} satisfies ToolResultBlock
|
||||||
|
}
|
||||||
|
return block
|
||||||
|
})
|
||||||
|
|
||||||
|
if (msgChanged) {
|
||||||
|
anyChanged = true
|
||||||
|
result.push({ role: msg.role, content: newContent } as LLMMessage)
|
||||||
|
} else {
|
||||||
|
result.push(msg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return anyChanged ? result : messages
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replace consumed tool results with compact markers.
|
||||||
|
*
|
||||||
|
* A tool_result is "consumed" when the assistant has produced a response
|
||||||
|
* after seeing it (i.e. there is an assistant message following the user
|
||||||
|
* message that contains the tool_result). The most recent user message
|
||||||
|
* with tool results is always kept intact — the LLM is about to see it.
|
||||||
|
*
|
||||||
|
* Error results and results shorter than `minChars` are never compressed.
|
||||||
|
*/
|
||||||
|
private compressConsumedToolResults(messages: LLMMessage[]): LLMMessage[] {
|
||||||
|
const config = this.options.compressToolResults
|
||||||
|
if (!config) return messages
|
||||||
|
|
||||||
|
const minChars = typeof config === 'object'
|
||||||
|
? (config.minChars ?? DEFAULT_MIN_COMPRESS_CHARS)
|
||||||
|
: DEFAULT_MIN_COMPRESS_CHARS
|
||||||
|
|
||||||
|
// Find the last user message that carries tool_result blocks.
|
||||||
|
let lastToolResultUserIdx = -1
|
||||||
|
for (let i = messages.length - 1; i >= 0; i--) {
|
||||||
|
if (
|
||||||
|
messages[i]!.role === 'user' &&
|
||||||
|
messages[i]!.content.some(b => b.type === 'tool_result')
|
||||||
|
) {
|
||||||
|
lastToolResultUserIdx = i
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Nothing to compress if there's at most one tool-result user message.
|
||||||
|
if (lastToolResultUserIdx <= 0) return messages
|
||||||
|
|
||||||
|
let anyChanged = false
|
||||||
|
const result = messages.map((msg, idx) => {
|
||||||
|
// Only compress user messages that appear before the last one.
|
||||||
|
if (msg.role !== 'user' || idx >= lastToolResultUserIdx) return msg
|
||||||
|
|
||||||
|
const hasToolResult = msg.content.some(b => b.type === 'tool_result')
|
||||||
|
if (!hasToolResult) return msg
|
||||||
|
|
||||||
|
let msgChanged = false
|
||||||
|
const newContent = msg.content.map((block): ContentBlock => {
|
||||||
|
if (block.type !== 'tool_result') return block
|
||||||
|
|
||||||
|
// Never compress error results — they carry diagnostic value.
|
||||||
|
if (block.is_error) return block
|
||||||
|
|
||||||
|
// Skip already-compressed results — avoid re-compression with wrong char count.
|
||||||
|
if (block.content.startsWith('[Tool output compressed')) return block
|
||||||
|
|
||||||
|
// Skip short results — the marker itself has overhead.
|
||||||
|
if (block.content.length < minChars) return block
|
||||||
|
|
||||||
|
msgChanged = true
|
||||||
|
return {
|
||||||
|
type: 'tool_result',
|
||||||
|
tool_use_id: block.tool_use_id,
|
||||||
|
content: `[Tool output compressed — ${block.content.length} chars, already processed]`,
|
||||||
|
} satisfies ToolResultBlock
|
||||||
|
})
|
||||||
|
|
||||||
|
if (msgChanged) {
|
||||||
|
anyChanged = true
|
||||||
|
return { role: msg.role, content: newContent } as LLMMessage
|
||||||
|
}
|
||||||
|
return msg
|
||||||
|
})
|
||||||
|
|
||||||
|
return anyChanged ? result : messages
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build the {@link ToolUseContext} passed to every tool execution.
|
* Build the {@link ToolUseContext} passed to every tool execution.
|
||||||
* Identifies this runner as the invoking agent.
|
* Identifies this runner as the invoking agent.
|
||||||
|
|
|
||||||
28
src/types.ts
28
src/types.ts
|
|
@ -69,6 +69,19 @@ export interface LLMMessage {
|
||||||
export type ContextStrategy =
|
export type ContextStrategy =
|
||||||
| { type: 'sliding-window'; maxTurns: number }
|
| { type: 'sliding-window'; maxTurns: number }
|
||||||
| { type: 'summarize'; maxTokens: number; summaryModel?: string }
|
| { type: 'summarize'; maxTokens: number; summaryModel?: string }
|
||||||
|
| {
|
||||||
|
type: 'compact'
|
||||||
|
/** Estimated token threshold that triggers compaction. Compaction is skipped when below this. */
|
||||||
|
maxTokens: number
|
||||||
|
/** Number of recent turn pairs (assistant+user) to keep intact. Default: 4. */
|
||||||
|
preserveRecentTurns?: number
|
||||||
|
/** Minimum chars in a tool_result content to qualify for compaction. Default: 200. */
|
||||||
|
minToolResultChars?: number
|
||||||
|
/** Minimum chars in an assistant text block to qualify for truncation. Default: 2000. */
|
||||||
|
minTextBlockChars?: number
|
||||||
|
/** Maximum chars to keep from a truncated text block (head excerpt). Default: 200. */
|
||||||
|
textBlockExcerptChars?: number
|
||||||
|
}
|
||||||
| {
|
| {
|
||||||
type: 'custom'
|
type: 'custom'
|
||||||
compress: (
|
compress: (
|
||||||
|
|
@ -270,6 +283,21 @@ export interface AgentConfig {
|
||||||
* takes priority over this value.
|
* takes priority over this value.
|
||||||
*/
|
*/
|
||||||
readonly maxToolOutputChars?: number
|
readonly maxToolOutputChars?: number
|
||||||
|
/**
|
||||||
|
* Compress tool results that the agent has already processed.
|
||||||
|
*
|
||||||
|
* In multi-turn runs, tool results persist in the conversation even after the
|
||||||
|
* agent has acted on them. When enabled, consumed tool results (those followed
|
||||||
|
* by an assistant response) are replaced with a short marker before the next
|
||||||
|
* LLM call, freeing context budget for new reasoning.
|
||||||
|
*
|
||||||
|
* - `true` — enable with default threshold (500 chars)
|
||||||
|
* - `{ minChars: N }` — only compress results longer than N characters
|
||||||
|
* - `false` / `undefined` — disabled (default)
|
||||||
|
*
|
||||||
|
* Error tool results are never compressed.
|
||||||
|
*/
|
||||||
|
readonly compressToolResults?: boolean | { readonly minChars?: number }
|
||||||
/**
|
/**
|
||||||
* Optional Zod schema for structured output. When set, the agent's final
|
* Optional Zod schema for structured output. When set, the agent's final
|
||||||
* output is parsed as JSON and validated against this schema. A single
|
* output is parsed as JSON and validated against this schema. A single
|
||||||
|
|
|
||||||
|
|
@ -199,4 +199,428 @@ describe('AgentRunner contextStrategy', () => {
|
||||||
expect(compress).toHaveBeenCalledOnce()
|
expect(compress).toHaveBeenCalledOnce()
|
||||||
expect(calls[1]).toHaveLength(1)
|
expect(calls[1]).toHaveLength(1)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// compact strategy
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('compact strategy', () => {
|
||||||
|
const longText = 'x'.repeat(3000)
|
||||||
|
const longToolResult = 'result-data '.repeat(100) // ~1200 chars
|
||||||
|
|
||||||
|
function buildMultiTurnAdapter(
|
||||||
|
responseCount: number,
|
||||||
|
calls: LLMMessage[][],
|
||||||
|
): LLMAdapter {
|
||||||
|
const responses: LLMResponse[] = []
|
||||||
|
for (let i = 0; i < responseCount - 1; i++) {
|
||||||
|
responses.push(toolUseResponse('echo', { message: `turn-${i}` }))
|
||||||
|
}
|
||||||
|
responses.push(textResponse('done'))
|
||||||
|
let idx = 0
|
||||||
|
return {
|
||||||
|
name: 'mock',
|
||||||
|
async chat(messages) {
|
||||||
|
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
||||||
|
return responses[idx++]!
|
||||||
|
},
|
||||||
|
async *stream() { /* unused */ },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Build a registry with an echo tool that returns a fixed result string. */
|
||||||
|
function buildEchoRegistry(result: string): { registry: ToolRegistry; executor: ToolExecutor } {
|
||||||
|
const registry = new ToolRegistry()
|
||||||
|
registry.register(
|
||||||
|
defineTool({
|
||||||
|
name: 'echo',
|
||||||
|
description: 'Echo input',
|
||||||
|
inputSchema: z.object({ message: z.string() }),
|
||||||
|
async execute() {
|
||||||
|
return { data: result }
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
return { registry, executor: new ToolExecutor(registry) }
|
||||||
|
}
|
||||||
|
|
||||||
|
it('does not activate below maxTokens threshold', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const adapter = buildMultiTurnAdapter(3, calls)
|
||||||
|
const { registry, executor } = buildEchoRegistry('short')
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 8,
|
||||||
|
contextStrategy: { type: 'compact', maxTokens: 999999 },
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
// On the 3rd call (turn 3), all previous messages should still be intact
|
||||||
|
// because estimated tokens are way below the threshold.
|
||||||
|
const lastCall = calls[calls.length - 1]!
|
||||||
|
const allToolResults = lastCall.flatMap(m =>
|
||||||
|
m.content.filter(b => b.type === 'tool_result'),
|
||||||
|
)
|
||||||
|
for (const tr of allToolResults) {
|
||||||
|
if (tr.type === 'tool_result') {
|
||||||
|
expect(tr.content).not.toContain('compacted')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('compresses old tool_result blocks when tokens exceed threshold', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const adapter = buildMultiTurnAdapter(4, calls)
|
||||||
|
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 8,
|
||||||
|
contextStrategy: {
|
||||||
|
type: 'compact',
|
||||||
|
maxTokens: 20, // very low to always trigger
|
||||||
|
preserveRecentTurns: 1, // only protect the most recent turn
|
||||||
|
minToolResultChars: 100,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
// On the last call, old tool results should have compact markers.
|
||||||
|
const lastCall = calls[calls.length - 1]!
|
||||||
|
const toolResults = lastCall.flatMap(m =>
|
||||||
|
m.content.filter(b => b.type === 'tool_result'),
|
||||||
|
)
|
||||||
|
const compacted = toolResults.filter(
|
||||||
|
b => b.type === 'tool_result' && b.content.includes('compacted'),
|
||||||
|
)
|
||||||
|
expect(compacted.length).toBeGreaterThan(0)
|
||||||
|
// Marker should include tool name.
|
||||||
|
for (const tr of compacted) {
|
||||||
|
if (tr.type === 'tool_result') {
|
||||||
|
expect(tr.content).toMatch(/\[Tool result: echo/)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('preserves the first user message', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const adapter = buildMultiTurnAdapter(4, calls)
|
||||||
|
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 8,
|
||||||
|
contextStrategy: {
|
||||||
|
type: 'compact',
|
||||||
|
maxTokens: 20,
|
||||||
|
preserveRecentTurns: 1,
|
||||||
|
minToolResultChars: 100,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'original prompt' }] }])
|
||||||
|
|
||||||
|
const lastCall = calls[calls.length - 1]!
|
||||||
|
const firstUser = lastCall.find(m => m.role === 'user')!
|
||||||
|
expect(firstUser.content[0]).toMatchObject({ type: 'text', text: 'original prompt' })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('preserves tool_use blocks in old turns', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const adapter = buildMultiTurnAdapter(4, calls)
|
||||||
|
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 8,
|
||||||
|
contextStrategy: {
|
||||||
|
type: 'compact',
|
||||||
|
maxTokens: 20,
|
||||||
|
preserveRecentTurns: 1,
|
||||||
|
minToolResultChars: 100,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
// Every assistant message should still have its tool_use block.
|
||||||
|
const lastCall = calls[calls.length - 1]!
|
||||||
|
const assistantMsgs = lastCall.filter(m => m.role === 'assistant')
|
||||||
|
for (const msg of assistantMsgs) {
|
||||||
|
const toolUses = msg.content.filter(b => b.type === 'tool_use')
|
||||||
|
// The last assistant message is "done" (text only), others have tool_use.
|
||||||
|
if (msg.content.some(b => b.type === 'text' && b.text === 'done')) continue
|
||||||
|
expect(toolUses.length).toBeGreaterThan(0)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('preserves error tool_result blocks', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const responses: LLMResponse[] = [
|
||||||
|
toolUseResponse('echo', { message: 'will-fail' }),
|
||||||
|
toolUseResponse('echo', { message: 'ok' }),
|
||||||
|
textResponse('done'),
|
||||||
|
]
|
||||||
|
let idx = 0
|
||||||
|
const adapter: LLMAdapter = {
|
||||||
|
name: 'mock',
|
||||||
|
async chat(messages) {
|
||||||
|
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
||||||
|
return responses[idx++]!
|
||||||
|
},
|
||||||
|
async *stream() { /* unused */ },
|
||||||
|
}
|
||||||
|
// Tool that fails on first call, succeeds on second.
|
||||||
|
let callCount = 0
|
||||||
|
const registry = new ToolRegistry()
|
||||||
|
registry.register(
|
||||||
|
defineTool({
|
||||||
|
name: 'echo',
|
||||||
|
description: 'Echo input',
|
||||||
|
inputSchema: z.object({ message: z.string() }),
|
||||||
|
async execute() {
|
||||||
|
callCount++
|
||||||
|
if (callCount === 1) {
|
||||||
|
throw new Error('deliberate error '.repeat(40))
|
||||||
|
}
|
||||||
|
return { data: longToolResult }
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
const executor = new ToolExecutor(registry)
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 8,
|
||||||
|
contextStrategy: {
|
||||||
|
type: 'compact',
|
||||||
|
maxTokens: 20,
|
||||||
|
preserveRecentTurns: 1,
|
||||||
|
minToolResultChars: 50,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
const lastCall = calls[calls.length - 1]!
|
||||||
|
const errorResults = lastCall.flatMap(m =>
|
||||||
|
m.content.filter(b => b.type === 'tool_result' && b.is_error),
|
||||||
|
)
|
||||||
|
// Error results should still have their original content (not compacted).
|
||||||
|
for (const er of errorResults) {
|
||||||
|
if (er.type === 'tool_result') {
|
||||||
|
expect(er.content).not.toContain('compacted')
|
||||||
|
expect(er.content).toContain('deliberate error')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('does not re-compress markers from compressToolResults', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const adapter = buildMultiTurnAdapter(4, calls)
|
||||||
|
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 8,
|
||||||
|
compressToolResults: { minChars: 100 },
|
||||||
|
contextStrategy: {
|
||||||
|
type: 'compact',
|
||||||
|
maxTokens: 20,
|
||||||
|
preserveRecentTurns: 1,
|
||||||
|
minToolResultChars: 10,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
const lastCall = calls[calls.length - 1]!
|
||||||
|
const allToolResults = lastCall.flatMap(m =>
|
||||||
|
m.content.filter(b => b.type === 'tool_result'),
|
||||||
|
)
|
||||||
|
// No result should contain nested markers.
|
||||||
|
for (const tr of allToolResults) {
|
||||||
|
if (tr.type === 'tool_result') {
|
||||||
|
// Should not have a compact marker wrapping another marker.
|
||||||
|
const markerCount = (tr.content.match(/\[Tool/g) || []).length
|
||||||
|
expect(markerCount).toBeLessThanOrEqual(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('truncates long assistant text blocks in old turns', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const responses: LLMResponse[] = [
|
||||||
|
// First turn: assistant with long text + tool_use
|
||||||
|
{
|
||||||
|
id: 'r1',
|
||||||
|
content: [
|
||||||
|
{ type: 'text', text: longText },
|
||||||
|
{ type: 'tool_use', id: 'tu-1', name: 'echo', input: { message: 'hi' } },
|
||||||
|
],
|
||||||
|
model: 'mock-model',
|
||||||
|
stop_reason: 'tool_use',
|
||||||
|
usage: { input_tokens: 10, output_tokens: 20 },
|
||||||
|
},
|
||||||
|
toolUseResponse('echo', { message: 'turn2' }),
|
||||||
|
textResponse('done'),
|
||||||
|
]
|
||||||
|
let idx = 0
|
||||||
|
const adapter: LLMAdapter = {
|
||||||
|
name: 'mock',
|
||||||
|
async chat(messages) {
|
||||||
|
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
||||||
|
return responses[idx++]!
|
||||||
|
},
|
||||||
|
async *stream() { /* unused */ },
|
||||||
|
}
|
||||||
|
const { registry, executor } = buildEchoRegistry('short')
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 8,
|
||||||
|
contextStrategy: {
|
||||||
|
type: 'compact',
|
||||||
|
maxTokens: 20,
|
||||||
|
preserveRecentTurns: 1,
|
||||||
|
minTextBlockChars: 500,
|
||||||
|
textBlockExcerptChars: 100,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
const lastCall = calls[calls.length - 1]!
|
||||||
|
// The first assistant message (old zone) should have its text truncated.
|
||||||
|
const firstAssistant = lastCall.find(m => m.role === 'assistant')!
|
||||||
|
const textBlocks = firstAssistant.content.filter(b => b.type === 'text')
|
||||||
|
const truncated = textBlocks.find(
|
||||||
|
b => b.type === 'text' && b.text.includes('truncated'),
|
||||||
|
)
|
||||||
|
expect(truncated).toBeDefined()
|
||||||
|
if (truncated && truncated.type === 'text') {
|
||||||
|
expect(truncated.text.length).toBeLessThan(longText.length)
|
||||||
|
expect(truncated.text).toContain(`${longText.length} chars total`)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('keeps recent turns intact within preserveRecentTurns', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const adapter = buildMultiTurnAdapter(4, calls)
|
||||||
|
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 8,
|
||||||
|
contextStrategy: {
|
||||||
|
type: 'compact',
|
||||||
|
maxTokens: 20,
|
||||||
|
preserveRecentTurns: 1,
|
||||||
|
minToolResultChars: 100,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
// The most recent tool_result (last user message with tool_result) should
|
||||||
|
// still contain the original long content.
|
||||||
|
const lastCall = calls[calls.length - 1]!
|
||||||
|
const userMsgs = lastCall.filter(m => m.role === 'user')
|
||||||
|
const lastUserWithToolResult = [...userMsgs]
|
||||||
|
.reverse()
|
||||||
|
.find(m => m.content.some(b => b.type === 'tool_result'))
|
||||||
|
expect(lastUserWithToolResult).toBeDefined()
|
||||||
|
const recentTr = lastUserWithToolResult!.content.find(b => b.type === 'tool_result')
|
||||||
|
if (recentTr && recentTr.type === 'tool_result') {
|
||||||
|
expect(recentTr.content).not.toContain('compacted')
|
||||||
|
expect(recentTr.content).toContain('result-data')
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('does not compact when all turns fit in preserveRecentTurns', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const adapter = buildMultiTurnAdapter(3, calls)
|
||||||
|
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 8,
|
||||||
|
contextStrategy: {
|
||||||
|
type: 'compact',
|
||||||
|
maxTokens: 20,
|
||||||
|
preserveRecentTurns: 10, // way more than actual turns
|
||||||
|
minToolResultChars: 100,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
// All tool results should still have original content.
|
||||||
|
const lastCall = calls[calls.length - 1]!
|
||||||
|
const toolResults = lastCall.flatMap(m =>
|
||||||
|
m.content.filter(b => b.type === 'tool_result'),
|
||||||
|
)
|
||||||
|
for (const tr of toolResults) {
|
||||||
|
if (tr.type === 'tool_result') {
|
||||||
|
expect(tr.content).not.toContain('compacted')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('maintains correct role alternation after compaction', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const adapter = buildMultiTurnAdapter(5, calls)
|
||||||
|
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 10,
|
||||||
|
contextStrategy: {
|
||||||
|
type: 'compact',
|
||||||
|
maxTokens: 20,
|
||||||
|
preserveRecentTurns: 1,
|
||||||
|
minToolResultChars: 100,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
// Check all LLM calls for role alternation.
|
||||||
|
for (const callMsgs of calls) {
|
||||||
|
for (let i = 1; i < callMsgs.length; i++) {
|
||||||
|
expect(callMsgs[i]!.role).not.toBe(callMsgs[i - 1]!.role)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('returns ZERO_USAGE (no LLM cost from compaction)', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const adapter = buildMultiTurnAdapter(4, calls)
|
||||||
|
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 8,
|
||||||
|
contextStrategy: {
|
||||||
|
type: 'compact',
|
||||||
|
maxTokens: 20,
|
||||||
|
preserveRecentTurns: 1,
|
||||||
|
minToolResultChars: 100,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
const result = await runner.run([
|
||||||
|
{ role: 'user', content: [{ type: 'text', text: 'start' }] },
|
||||||
|
])
|
||||||
|
|
||||||
|
// Token usage should only reflect the 4 actual LLM calls (no extra from compaction).
|
||||||
|
// Each toolUseResponse: input=15, output=25. textResponse: input=10, output=20.
|
||||||
|
// 3 tool calls + 1 final = (15*3 + 10) input, (25*3 + 20) output.
|
||||||
|
expect(result.tokenUsage.input_tokens).toBe(15 * 3 + 10)
|
||||||
|
expect(result.tokenUsage.output_tokens).toBe(25 * 3 + 20)
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,498 @@
|
||||||
|
import { describe, it, expect } from 'vitest'
|
||||||
|
import { z } from 'zod'
|
||||||
|
import { AgentRunner } from '../src/agent/runner.js'
|
||||||
|
import { ToolRegistry, defineTool } from '../src/tool/framework.js'
|
||||||
|
import { ToolExecutor } from '../src/tool/executor.js'
|
||||||
|
import type { LLMAdapter, LLMMessage, LLMResponse } from '../src/types.js'
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function textResponse(text: string): LLMResponse {
|
||||||
|
return {
|
||||||
|
id: `resp-${Math.random().toString(36).slice(2)}`,
|
||||||
|
content: [{ type: 'text', text }],
|
||||||
|
model: 'mock-model',
|
||||||
|
stop_reason: 'end_turn',
|
||||||
|
usage: { input_tokens: 10, output_tokens: 20 },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function toolUseResponse(toolName: string, input: Record<string, unknown>): LLMResponse {
|
||||||
|
return {
|
||||||
|
id: `resp-${Math.random().toString(36).slice(2)}`,
|
||||||
|
content: [{
|
||||||
|
type: 'tool_use',
|
||||||
|
id: `tu-${Math.random().toString(36).slice(2)}`,
|
||||||
|
name: toolName,
|
||||||
|
input,
|
||||||
|
}],
|
||||||
|
model: 'mock-model',
|
||||||
|
stop_reason: 'tool_use',
|
||||||
|
usage: { input_tokens: 15, output_tokens: 25 },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildRegistryAndExecutor(
|
||||||
|
toolOutput: string = 'x'.repeat(600),
|
||||||
|
): { registry: ToolRegistry; executor: ToolExecutor } {
|
||||||
|
const registry = new ToolRegistry()
|
||||||
|
registry.register(
|
||||||
|
defineTool({
|
||||||
|
name: 'echo',
|
||||||
|
description: 'Echo input',
|
||||||
|
inputSchema: z.object({ message: z.string() }),
|
||||||
|
async execute() {
|
||||||
|
return { data: toolOutput }
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
return { registry, executor: new ToolExecutor(registry) }
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildErrorRegistryAndExecutor(): { registry: ToolRegistry; executor: ToolExecutor } {
|
||||||
|
const registry = new ToolRegistry()
|
||||||
|
registry.register(
|
||||||
|
defineTool({
|
||||||
|
name: 'fail',
|
||||||
|
description: 'Always fails',
|
||||||
|
inputSchema: z.object({ message: z.string() }),
|
||||||
|
async execute() {
|
||||||
|
return { data: 'E'.repeat(600), isError: true }
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
return { registry, executor: new ToolExecutor(registry) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Extract all tool_result content strings from messages sent to the LLM. */
|
||||||
|
function extractToolResultContents(messages: LLMMessage[]): string[] {
|
||||||
|
return messages.flatMap(m =>
|
||||||
|
m.content
|
||||||
|
.filter((b): b is { type: 'tool_result'; tool_use_id: string; content: string; is_error?: boolean } =>
|
||||||
|
b.type === 'tool_result')
|
||||||
|
.map(b => b.content),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Tests
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('AgentRunner compressToolResults', () => {
|
||||||
|
it('does NOT compress when compressToolResults is not set (default)', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const longOutput = 'x'.repeat(600)
|
||||||
|
const responses = [
|
||||||
|
toolUseResponse('echo', { message: 't1' }),
|
||||||
|
toolUseResponse('echo', { message: 't2' }),
|
||||||
|
textResponse('done'),
|
||||||
|
]
|
||||||
|
let idx = 0
|
||||||
|
const adapter: LLMAdapter = {
|
||||||
|
name: 'mock',
|
||||||
|
async chat(messages) {
|
||||||
|
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||||
|
return responses[idx++]!
|
||||||
|
},
|
||||||
|
async *stream() { /* unused */ },
|
||||||
|
}
|
||||||
|
const { registry, executor } = buildRegistryAndExecutor(longOutput)
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 5,
|
||||||
|
// compressToolResults not set
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
// Turn 3 should still see full tool results from turn 1
|
||||||
|
const turn3Messages = calls[2]!
|
||||||
|
const allToolResults = extractToolResultContents(turn3Messages)
|
||||||
|
expect(allToolResults.every(c => c === longOutput)).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('compresses consumed tool results on turn 3+', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const longOutput = 'x'.repeat(600)
|
||||||
|
const responses = [
|
||||||
|
toolUseResponse('echo', { message: 't1' }),
|
||||||
|
toolUseResponse('echo', { message: 't2' }),
|
||||||
|
textResponse('done'),
|
||||||
|
]
|
||||||
|
let idx = 0
|
||||||
|
const adapter: LLMAdapter = {
|
||||||
|
name: 'mock',
|
||||||
|
async chat(messages) {
|
||||||
|
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||||
|
return responses[idx++]!
|
||||||
|
},
|
||||||
|
async *stream() { /* unused */ },
|
||||||
|
}
|
||||||
|
const { registry, executor } = buildRegistryAndExecutor(longOutput)
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 5,
|
||||||
|
compressToolResults: true,
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
// Turn 3: the LLM should see a compressed marker for turn 1 results
|
||||||
|
// but the full output for turn 2 results (most recent, not yet consumed).
|
||||||
|
const turn3Messages = calls[2]!
|
||||||
|
const allToolResults = extractToolResultContents(turn3Messages)
|
||||||
|
expect(allToolResults).toHaveLength(2)
|
||||||
|
|
||||||
|
// First result (turn 1) should be compressed
|
||||||
|
expect(allToolResults[0]).toContain('compressed')
|
||||||
|
expect(allToolResults[0]).toContain('600 chars')
|
||||||
|
|
||||||
|
// Second result (turn 2, most recent) should be preserved in full
|
||||||
|
expect(allToolResults[1]).toBe(longOutput)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('preserves tool_use_id on compressed results', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const longOutput = 'x'.repeat(600)
|
||||||
|
const responses = [
|
||||||
|
toolUseResponse('echo', { message: 't1' }),
|
||||||
|
toolUseResponse('echo', { message: 't2' }),
|
||||||
|
textResponse('done'),
|
||||||
|
]
|
||||||
|
let idx = 0
|
||||||
|
const adapter: LLMAdapter = {
|
||||||
|
name: 'mock',
|
||||||
|
async chat(messages) {
|
||||||
|
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||||
|
return responses[idx++]!
|
||||||
|
},
|
||||||
|
async *stream() { /* unused */ },
|
||||||
|
}
|
||||||
|
const { registry, executor } = buildRegistryAndExecutor(longOutput)
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 5,
|
||||||
|
compressToolResults: true,
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
// Turn 3: verify compressed result still has tool_use_id
|
||||||
|
const turn3Messages = calls[2]!
|
||||||
|
const toolResultBlocks = turn3Messages.flatMap(m =>
|
||||||
|
m.content.filter(b => b.type === 'tool_result'),
|
||||||
|
)
|
||||||
|
for (const block of toolResultBlocks) {
|
||||||
|
expect(block).toHaveProperty('tool_use_id')
|
||||||
|
expect((block as { tool_use_id: string }).tool_use_id).toBeTruthy()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('skips short tool results below minChars threshold', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const shortOutput = 'short' // 5 chars, well below 500 default
|
||||||
|
const responses = [
|
||||||
|
toolUseResponse('echo', { message: 't1' }),
|
||||||
|
toolUseResponse('echo', { message: 't2' }),
|
||||||
|
textResponse('done'),
|
||||||
|
]
|
||||||
|
let idx = 0
|
||||||
|
const adapter: LLMAdapter = {
|
||||||
|
name: 'mock',
|
||||||
|
async chat(messages) {
|
||||||
|
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||||
|
return responses[idx++]!
|
||||||
|
},
|
||||||
|
async *stream() { /* unused */ },
|
||||||
|
}
|
||||||
|
const { registry, executor } = buildRegistryAndExecutor(shortOutput)
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 5,
|
||||||
|
compressToolResults: true,
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
// Turn 3: short results should NOT be compressed
|
||||||
|
const turn3Messages = calls[2]!
|
||||||
|
const allToolResults = extractToolResultContents(turn3Messages)
|
||||||
|
expect(allToolResults.every(c => c === shortOutput)).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('respects custom minChars threshold', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const output = 'x'.repeat(200)
|
||||||
|
const responses = [
|
||||||
|
toolUseResponse('echo', { message: 't1' }),
|
||||||
|
toolUseResponse('echo', { message: 't2' }),
|
||||||
|
textResponse('done'),
|
||||||
|
]
|
||||||
|
let idx = 0
|
||||||
|
const adapter: LLMAdapter = {
|
||||||
|
name: 'mock',
|
||||||
|
async chat(messages) {
|
||||||
|
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||||
|
return responses[idx++]!
|
||||||
|
},
|
||||||
|
async *stream() { /* unused */ },
|
||||||
|
}
|
||||||
|
const { registry, executor } = buildRegistryAndExecutor(output)
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 5,
|
||||||
|
compressToolResults: { minChars: 100 },
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
// With minChars=100, the 200-char output should be compressed
|
||||||
|
const turn3Messages = calls[2]!
|
||||||
|
const allToolResults = extractToolResultContents(turn3Messages)
|
||||||
|
expect(allToolResults[0]).toContain('compressed')
|
||||||
|
expect(allToolResults[0]).toContain('200 chars')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('never compresses error tool results', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const responses = [
|
||||||
|
toolUseResponse('fail', { message: 't1' }),
|
||||||
|
toolUseResponse('fail', { message: 't2' }),
|
||||||
|
textResponse('done'),
|
||||||
|
]
|
||||||
|
let idx = 0
|
||||||
|
const adapter: LLMAdapter = {
|
||||||
|
name: 'mock',
|
||||||
|
async chat(messages) {
|
||||||
|
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||||
|
return responses[idx++]!
|
||||||
|
},
|
||||||
|
async *stream() { /* unused */ },
|
||||||
|
}
|
||||||
|
const { registry, executor } = buildErrorRegistryAndExecutor()
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['fail'],
|
||||||
|
maxTurns: 5,
|
||||||
|
compressToolResults: true,
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
// Error results should never be compressed even if long
|
||||||
|
const turn3Messages = calls[2]!
|
||||||
|
const allToolResults = extractToolResultContents(turn3Messages)
|
||||||
|
expect(allToolResults.every(c => c === 'E'.repeat(600))).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('compresses selectively in multi-block tool_result messages (parallel tool calls)', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
// Two tools: one returns long output, one returns short output
|
||||||
|
const registry = new ToolRegistry()
|
||||||
|
registry.register(
|
||||||
|
defineTool({
|
||||||
|
name: 'long_tool',
|
||||||
|
description: 'Returns long output',
|
||||||
|
inputSchema: z.object({ msg: z.string() }),
|
||||||
|
async execute() { return { data: 'L'.repeat(600) } },
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
registry.register(
|
||||||
|
defineTool({
|
||||||
|
name: 'short_tool',
|
||||||
|
description: 'Returns short output',
|
||||||
|
inputSchema: z.object({ msg: z.string() }),
|
||||||
|
async execute() { return { data: 'S'.repeat(50) } },
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
const executor = new ToolExecutor(registry)
|
||||||
|
|
||||||
|
// Turn 1: model calls both tools in parallel
|
||||||
|
const parallelResponse: LLMResponse = {
|
||||||
|
id: 'resp-parallel',
|
||||||
|
content: [
|
||||||
|
{ type: 'tool_use', id: 'tu-long', name: 'long_tool', input: { msg: 'a' } },
|
||||||
|
{ type: 'tool_use', id: 'tu-short', name: 'short_tool', input: { msg: 'b' } },
|
||||||
|
],
|
||||||
|
model: 'mock-model',
|
||||||
|
stop_reason: 'tool_use',
|
||||||
|
usage: { input_tokens: 15, output_tokens: 25 },
|
||||||
|
}
|
||||||
|
const responses = [
|
||||||
|
parallelResponse,
|
||||||
|
toolUseResponse('long_tool', { msg: 't2' }),
|
||||||
|
textResponse('done'),
|
||||||
|
]
|
||||||
|
let idx = 0
|
||||||
|
const adapter: LLMAdapter = {
|
||||||
|
name: 'mock',
|
||||||
|
async chat(messages) {
|
||||||
|
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||||
|
return responses[idx++]!
|
||||||
|
},
|
||||||
|
async *stream() { /* unused */ },
|
||||||
|
}
|
||||||
|
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['long_tool', 'short_tool'],
|
||||||
|
maxTurns: 5,
|
||||||
|
compressToolResults: true,
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
// Turn 3: the parallel results from turn 1 should be selectively compressed.
|
||||||
|
// The long_tool result (600 chars) → compressed. The short_tool result (50 chars) → kept.
|
||||||
|
const turn3Messages = calls[2]!
|
||||||
|
const turn1ToolResults = turn3Messages.flatMap(m =>
|
||||||
|
m.content.filter((b): b is { type: 'tool_result'; tool_use_id: string; content: string } =>
|
||||||
|
b.type === 'tool_result'),
|
||||||
|
)
|
||||||
|
// Find the results from turn 1 (first user message with tool_results)
|
||||||
|
const firstToolResultMsg = turn3Messages.find(
|
||||||
|
m => m.role === 'user' && m.content.some(b => b.type === 'tool_result'),
|
||||||
|
)!
|
||||||
|
const blocks = firstToolResultMsg.content.filter(
|
||||||
|
(b): b is { type: 'tool_result'; tool_use_id: string; content: string } =>
|
||||||
|
b.type === 'tool_result',
|
||||||
|
)
|
||||||
|
|
||||||
|
// One should be compressed (long), one should be intact (short)
|
||||||
|
const compressedBlocks = blocks.filter(b => b.content.includes('compressed'))
|
||||||
|
const intactBlocks = blocks.filter(b => !b.content.includes('compressed'))
|
||||||
|
expect(compressedBlocks).toHaveLength(1)
|
||||||
|
expect(compressedBlocks[0]!.content).toContain('600 chars')
|
||||||
|
expect(intactBlocks).toHaveLength(1)
|
||||||
|
expect(intactBlocks[0]!.content).toBe('S'.repeat(50))
|
||||||
|
})
|
||||||
|
|
||||||
|
it('compounds compression across 4+ turns', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const longOutput = 'x'.repeat(600)
|
||||||
|
const responses = [
|
||||||
|
toolUseResponse('echo', { message: 't1' }),
|
||||||
|
toolUseResponse('echo', { message: 't2' }),
|
||||||
|
toolUseResponse('echo', { message: 't3' }),
|
||||||
|
textResponse('done'),
|
||||||
|
]
|
||||||
|
let idx = 0
|
||||||
|
const adapter: LLMAdapter = {
|
||||||
|
name: 'mock',
|
||||||
|
async chat(messages) {
|
||||||
|
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||||
|
return responses[idx++]!
|
||||||
|
},
|
||||||
|
async *stream() { /* unused */ },
|
||||||
|
}
|
||||||
|
const { registry, executor } = buildRegistryAndExecutor(longOutput)
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 6,
|
||||||
|
compressToolResults: true,
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
// Turn 4: turns 1 and 2 should both be compressed, turn 3 should be intact
|
||||||
|
const turn4Messages = calls[3]!
|
||||||
|
const allToolResults = extractToolResultContents(turn4Messages)
|
||||||
|
expect(allToolResults).toHaveLength(3)
|
||||||
|
|
||||||
|
// First two are compressed (turns 1 & 2)
|
||||||
|
expect(allToolResults[0]).toContain('compressed')
|
||||||
|
expect(allToolResults[1]).toContain('compressed')
|
||||||
|
|
||||||
|
// Last one (turn 3, most recent) preserved
|
||||||
|
expect(allToolResults[2]).toBe(longOutput)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('does not re-compress already compressed markers with low minChars', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const longOutput = 'x'.repeat(600)
|
||||||
|
const responses = [
|
||||||
|
toolUseResponse('echo', { message: 't1' }),
|
||||||
|
toolUseResponse('echo', { message: 't2' }),
|
||||||
|
toolUseResponse('echo', { message: 't3' }),
|
||||||
|
textResponse('done'),
|
||||||
|
]
|
||||||
|
let idx = 0
|
||||||
|
const adapter: LLMAdapter = {
|
||||||
|
name: 'mock',
|
||||||
|
async chat(messages) {
|
||||||
|
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||||
|
return responses[idx++]!
|
||||||
|
},
|
||||||
|
async *stream() { /* unused */ },
|
||||||
|
}
|
||||||
|
const { registry, executor } = buildRegistryAndExecutor(longOutput)
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 6,
|
||||||
|
compressToolResults: { minChars: 10 }, // very low threshold
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
|
||||||
|
// Turn 4: turn 1 was compressed in turn 3. With minChars=10 the marker
|
||||||
|
// itself (55 chars) exceeds the threshold. Without the guard it would be
|
||||||
|
// re-compressed with a wrong char count (55 instead of 600).
|
||||||
|
const turn4Messages = calls[3]!
|
||||||
|
const allToolResults = extractToolResultContents(turn4Messages)
|
||||||
|
|
||||||
|
// Turn 1 result: should still show original 600 chars, not re-compressed
|
||||||
|
expect(allToolResults[0]).toContain('600 chars')
|
||||||
|
// Turn 2 result: compressed for the first time this turn
|
||||||
|
expect(allToolResults[1]).toContain('600 chars')
|
||||||
|
// Turn 3 result: most recent, preserved in full
|
||||||
|
expect(allToolResults[2]).toBe(longOutput)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('works together with contextStrategy', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const longOutput = 'x'.repeat(600)
|
||||||
|
const responses = [
|
||||||
|
toolUseResponse('echo', { message: 't1' }),
|
||||||
|
toolUseResponse('echo', { message: 't2' }),
|
||||||
|
textResponse('done'),
|
||||||
|
]
|
||||||
|
let idx = 0
|
||||||
|
const adapter: LLMAdapter = {
|
||||||
|
name: 'mock',
|
||||||
|
async chat(messages) {
|
||||||
|
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||||
|
return responses[idx++]!
|
||||||
|
},
|
||||||
|
async *stream() { /* unused */ },
|
||||||
|
}
|
||||||
|
const { registry, executor } = buildRegistryAndExecutor(longOutput)
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 5,
|
||||||
|
compressToolResults: true,
|
||||||
|
contextStrategy: { type: 'sliding-window', maxTurns: 10 },
|
||||||
|
})
|
||||||
|
|
||||||
|
const result = await runner.run([
|
||||||
|
{ role: 'user', content: [{ type: 'text', text: 'start' }] },
|
||||||
|
])
|
||||||
|
|
||||||
|
// Should complete without error; both features coexist
|
||||||
|
expect(result.output).toBe('done')
|
||||||
|
|
||||||
|
// Turn 3 should have compressed turn 1 results
|
||||||
|
const turn3Messages = calls[2]!
|
||||||
|
const allToolResults = extractToolResultContents(turn3Messages)
|
||||||
|
expect(allToolResults[0]).toContain('compressed')
|
||||||
|
})
|
||||||
|
})
|
||||||
Loading…
Reference in New Issue