diff --git a/src/agent/runner.ts b/src/agent/runner.ts index 1a5a594..fd8305d 100644 --- a/src/agent/runner.ts +++ b/src/agent/runner.ts @@ -400,6 +400,10 @@ export class AgentRunner { ) } + if (strategy.type === 'compact') { + return { messages: this.compactMessages(messages, strategy), usage: ZERO_USAGE } + } + const estimated = estimateTokens(messages) const compressed = await strategy.compress(messages, estimated) if (!Array.isArray(compressed) || compressed.length === 0) { @@ -860,6 +864,133 @@ export class AgentRunner { // Private helpers // ------------------------------------------------------------------------- + /** + * Rule-based selective context compaction (no LLM calls). + * + * Compresses old turns while preserving the conversation skeleton: + * - tool_use blocks (decisions) are always kept + * - Long tool_result content is replaced with a compact marker + * - Long assistant text blocks are truncated with an excerpt + * - Error tool_results are never compressed + * - Recent turns (within `preserveRecentTurns`) are kept intact + */ + private compactMessages( + messages: LLMMessage[], + strategy: Extract, + ): LLMMessage[] { + const estimated = estimateTokens(messages) + if (estimated <= strategy.maxTokens || messages.length < 4) { + return messages + } + + const preserveRecent = strategy.preserveRecentTurns ?? 4 + const minToolResultChars = strategy.minToolResultChars ?? 200 + const minTextBlockChars = strategy.minTextBlockChars ?? 2000 + const textBlockExcerptChars = strategy.textBlockExcerptChars ?? 200 + + // Find the first user message — it is always preserved as-is. + const firstUserIndex = messages.findIndex(m => m.role === 'user') + if (firstUserIndex < 0 || firstUserIndex === messages.length - 1) { + return messages + } + + // Walk backward to find the boundary between old and recent turns. + // A "turn pair" is an assistant message followed by a user message. + let boundary = messages.length + let pairsFound = 0 + for (let i = messages.length - 1; i > firstUserIndex && pairsFound < preserveRecent; i--) { + if (messages[i]!.role === 'user' && i > 0 && messages[i - 1]!.role === 'assistant') { + pairsFound++ + boundary = i - 1 + } + } + + // If all turns fit within the recent window, nothing to compact. + if (boundary <= firstUserIndex + 1) { + return messages + } + + // Build a tool_use_id → tool name lookup from old assistant messages. + const toolNameMap = new Map() + for (let i = firstUserIndex + 1; i < boundary; i++) { + const msg = messages[i]! + if (msg.role !== 'assistant') continue + for (const block of msg.content) { + if (block.type === 'tool_use') { + toolNameMap.set(block.id, block.name) + } + } + } + + // Process old messages (between first user and boundary). + let anyChanged = false + const result: LLMMessage[] = [] + + for (let i = 0; i < messages.length; i++) { + // First user message and recent messages: keep intact. + if (i <= firstUserIndex || i >= boundary) { + result.push(messages[i]!) + continue + } + + const msg = messages[i]! + let msgChanged = false + const newContent = msg.content.map((block): ContentBlock => { + if (msg.role === 'assistant') { + // tool_use blocks: always preserve (decisions). + if (block.type === 'tool_use') return block + // Long text blocks: truncate with excerpt. + if (block.type === 'text' && block.text.length >= minTextBlockChars) { + msgChanged = true + return { + type: 'text', + text: `${block.text.slice(0, textBlockExcerptChars)}... [truncated — ${block.text.length} chars total]`, + } satisfies TextBlock + } + // Image blocks in old turns: replace with marker. + if (block.type === 'image') { + msgChanged = true + return { type: 'text', text: '[Image compacted]' } satisfies TextBlock + } + return block + } + + // User messages in old zone. + if (block.type === 'tool_result') { + // Error results: always preserve. + if (block.is_error) return block + // Already compressed by compressToolResults or a prior compact pass. + if ( + block.content.startsWith('[Tool output compressed') || + block.content.startsWith('[Tool result:') + ) { + return block + } + // Short results: preserve. + if (block.content.length < minToolResultChars) return block + // Compress. + const toolName = toolNameMap.get(block.tool_use_id) ?? 'unknown' + msgChanged = true + return { + type: 'tool_result', + tool_use_id: block.tool_use_id, + content: `[Tool result: ${toolName} — ${block.content.length} chars, compacted]`, + } satisfies ToolResultBlock + } + return block + }) + + if (msgChanged) { + anyChanged = true + result.push({ role: msg.role, content: newContent } as LLMMessage) + } else { + result.push(msg) + } + } + + return anyChanged ? result : messages + } + /** * Replace consumed tool results with compact markers. * diff --git a/src/types.ts b/src/types.ts index f61decb..765d9fe 100644 --- a/src/types.ts +++ b/src/types.ts @@ -69,6 +69,19 @@ export interface LLMMessage { export type ContextStrategy = | { type: 'sliding-window'; maxTurns: number } | { type: 'summarize'; maxTokens: number; summaryModel?: string } + | { + type: 'compact' + /** Estimated token threshold that triggers compaction. Compaction is skipped when below this. */ + maxTokens: number + /** Number of recent turn pairs (assistant+user) to keep intact. Default: 4. */ + preserveRecentTurns?: number + /** Minimum chars in a tool_result content to qualify for compaction. Default: 200. */ + minToolResultChars?: number + /** Minimum chars in an assistant text block to qualify for truncation. Default: 2000. */ + minTextBlockChars?: number + /** Maximum chars to keep from a truncated text block (head excerpt). Default: 200. */ + textBlockExcerptChars?: number + } | { type: 'custom' compress: ( diff --git a/tests/context-strategy.test.ts b/tests/context-strategy.test.ts index 7c847b0..711d134 100644 --- a/tests/context-strategy.test.ts +++ b/tests/context-strategy.test.ts @@ -199,4 +199,428 @@ describe('AgentRunner contextStrategy', () => { expect(compress).toHaveBeenCalledOnce() expect(calls[1]).toHaveLength(1) }) + + // --------------------------------------------------------------------------- + // compact strategy + // --------------------------------------------------------------------------- + + describe('compact strategy', () => { + const longText = 'x'.repeat(3000) + const longToolResult = 'result-data '.repeat(100) // ~1200 chars + + function buildMultiTurnAdapter( + responseCount: number, + calls: LLMMessage[][], + ): LLMAdapter { + const responses: LLMResponse[] = [] + for (let i = 0; i < responseCount - 1; i++) { + responses.push(toolUseResponse('echo', { message: `turn-${i}` })) + } + responses.push(textResponse('done')) + let idx = 0 + return { + name: 'mock', + async chat(messages) { + calls.push(messages.map(m => ({ role: m.role, content: m.content }))) + return responses[idx++]! + }, + async *stream() { /* unused */ }, + } + } + + /** Build a registry with an echo tool that returns a fixed result string. */ + function buildEchoRegistry(result: string): { registry: ToolRegistry; executor: ToolExecutor } { + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'echo', + description: 'Echo input', + inputSchema: z.object({ message: z.string() }), + async execute() { + return { data: result } + }, + }), + ) + return { registry, executor: new ToolExecutor(registry) } + } + + it('does not activate below maxTokens threshold', async () => { + const calls: LLMMessage[][] = [] + const adapter = buildMultiTurnAdapter(3, calls) + const { registry, executor } = buildEchoRegistry('short') + const runner = new AgentRunner(adapter, registry, executor, { + model: 'mock-model', + allowedTools: ['echo'], + maxTurns: 8, + contextStrategy: { type: 'compact', maxTokens: 999999 }, + }) + + await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }]) + + // On the 3rd call (turn 3), all previous messages should still be intact + // because estimated tokens are way below the threshold. + const lastCall = calls[calls.length - 1]! + const allToolResults = lastCall.flatMap(m => + m.content.filter(b => b.type === 'tool_result'), + ) + for (const tr of allToolResults) { + if (tr.type === 'tool_result') { + expect(tr.content).not.toContain('compacted') + } + } + }) + + it('compresses old tool_result blocks when tokens exceed threshold', async () => { + const calls: LLMMessage[][] = [] + const adapter = buildMultiTurnAdapter(4, calls) + const { registry, executor } = buildEchoRegistry(longToolResult) + const runner = new AgentRunner(adapter, registry, executor, { + model: 'mock-model', + allowedTools: ['echo'], + maxTurns: 8, + contextStrategy: { + type: 'compact', + maxTokens: 20, // very low to always trigger + preserveRecentTurns: 1, // only protect the most recent turn + minToolResultChars: 100, + }, + }) + + await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }]) + + // On the last call, old tool results should have compact markers. + const lastCall = calls[calls.length - 1]! + const toolResults = lastCall.flatMap(m => + m.content.filter(b => b.type === 'tool_result'), + ) + const compacted = toolResults.filter( + b => b.type === 'tool_result' && b.content.includes('compacted'), + ) + expect(compacted.length).toBeGreaterThan(0) + // Marker should include tool name. + for (const tr of compacted) { + if (tr.type === 'tool_result') { + expect(tr.content).toMatch(/\[Tool result: echo/) + } + } + }) + + it('preserves the first user message', async () => { + const calls: LLMMessage[][] = [] + const adapter = buildMultiTurnAdapter(4, calls) + const { registry, executor } = buildEchoRegistry(longToolResult) + const runner = new AgentRunner(adapter, registry, executor, { + model: 'mock-model', + allowedTools: ['echo'], + maxTurns: 8, + contextStrategy: { + type: 'compact', + maxTokens: 20, + preserveRecentTurns: 1, + minToolResultChars: 100, + }, + }) + + await runner.run([{ role: 'user', content: [{ type: 'text', text: 'original prompt' }] }]) + + const lastCall = calls[calls.length - 1]! + const firstUser = lastCall.find(m => m.role === 'user')! + expect(firstUser.content[0]).toMatchObject({ type: 'text', text: 'original prompt' }) + }) + + it('preserves tool_use blocks in old turns', async () => { + const calls: LLMMessage[][] = [] + const adapter = buildMultiTurnAdapter(4, calls) + const { registry, executor } = buildEchoRegistry(longToolResult) + const runner = new AgentRunner(adapter, registry, executor, { + model: 'mock-model', + allowedTools: ['echo'], + maxTurns: 8, + contextStrategy: { + type: 'compact', + maxTokens: 20, + preserveRecentTurns: 1, + minToolResultChars: 100, + }, + }) + + await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }]) + + // Every assistant message should still have its tool_use block. + const lastCall = calls[calls.length - 1]! + const assistantMsgs = lastCall.filter(m => m.role === 'assistant') + for (const msg of assistantMsgs) { + const toolUses = msg.content.filter(b => b.type === 'tool_use') + // The last assistant message is "done" (text only), others have tool_use. + if (msg.content.some(b => b.type === 'text' && b.text === 'done')) continue + expect(toolUses.length).toBeGreaterThan(0) + } + }) + + it('preserves error tool_result blocks', async () => { + const calls: LLMMessage[][] = [] + const responses: LLMResponse[] = [ + toolUseResponse('echo', { message: 'will-fail' }), + toolUseResponse('echo', { message: 'ok' }), + textResponse('done'), + ] + let idx = 0 + const adapter: LLMAdapter = { + name: 'mock', + async chat(messages) { + calls.push(messages.map(m => ({ role: m.role, content: m.content }))) + return responses[idx++]! + }, + async *stream() { /* unused */ }, + } + // Tool that fails on first call, succeeds on second. + let callCount = 0 + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'echo', + description: 'Echo input', + inputSchema: z.object({ message: z.string() }), + async execute() { + callCount++ + if (callCount === 1) { + throw new Error('deliberate error '.repeat(40)) + } + return { data: longToolResult } + }, + }), + ) + const executor = new ToolExecutor(registry) + const runner = new AgentRunner(adapter, registry, executor, { + model: 'mock-model', + allowedTools: ['echo'], + maxTurns: 8, + contextStrategy: { + type: 'compact', + maxTokens: 20, + preserveRecentTurns: 1, + minToolResultChars: 50, + }, + }) + + await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }]) + + const lastCall = calls[calls.length - 1]! + const errorResults = lastCall.flatMap(m => + m.content.filter(b => b.type === 'tool_result' && b.is_error), + ) + // Error results should still have their original content (not compacted). + for (const er of errorResults) { + if (er.type === 'tool_result') { + expect(er.content).not.toContain('compacted') + expect(er.content).toContain('deliberate error') + } + } + }) + + it('does not re-compress markers from compressToolResults', async () => { + const calls: LLMMessage[][] = [] + const adapter = buildMultiTurnAdapter(4, calls) + const { registry, executor } = buildEchoRegistry(longToolResult) + const runner = new AgentRunner(adapter, registry, executor, { + model: 'mock-model', + allowedTools: ['echo'], + maxTurns: 8, + compressToolResults: { minChars: 100 }, + contextStrategy: { + type: 'compact', + maxTokens: 20, + preserveRecentTurns: 1, + minToolResultChars: 10, + }, + }) + + await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }]) + + const lastCall = calls[calls.length - 1]! + const allToolResults = lastCall.flatMap(m => + m.content.filter(b => b.type === 'tool_result'), + ) + // No result should contain nested markers. + for (const tr of allToolResults) { + if (tr.type === 'tool_result') { + // Should not have a compact marker wrapping another marker. + const markerCount = (tr.content.match(/\[Tool/g) || []).length + expect(markerCount).toBeLessThanOrEqual(1) + } + } + }) + + it('truncates long assistant text blocks in old turns', async () => { + const calls: LLMMessage[][] = [] + const responses: LLMResponse[] = [ + // First turn: assistant with long text + tool_use + { + id: 'r1', + content: [ + { type: 'text', text: longText }, + { type: 'tool_use', id: 'tu-1', name: 'echo', input: { message: 'hi' } }, + ], + model: 'mock-model', + stop_reason: 'tool_use', + usage: { input_tokens: 10, output_tokens: 20 }, + }, + toolUseResponse('echo', { message: 'turn2' }), + textResponse('done'), + ] + let idx = 0 + const adapter: LLMAdapter = { + name: 'mock', + async chat(messages) { + calls.push(messages.map(m => ({ role: m.role, content: m.content }))) + return responses[idx++]! + }, + async *stream() { /* unused */ }, + } + const { registry, executor } = buildEchoRegistry('short') + const runner = new AgentRunner(adapter, registry, executor, { + model: 'mock-model', + allowedTools: ['echo'], + maxTurns: 8, + contextStrategy: { + type: 'compact', + maxTokens: 20, + preserveRecentTurns: 1, + minTextBlockChars: 500, + textBlockExcerptChars: 100, + }, + }) + + await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }]) + + const lastCall = calls[calls.length - 1]! + // The first assistant message (old zone) should have its text truncated. + const firstAssistant = lastCall.find(m => m.role === 'assistant')! + const textBlocks = firstAssistant.content.filter(b => b.type === 'text') + const truncated = textBlocks.find( + b => b.type === 'text' && b.text.includes('truncated'), + ) + expect(truncated).toBeDefined() + if (truncated && truncated.type === 'text') { + expect(truncated.text.length).toBeLessThan(longText.length) + expect(truncated.text).toContain(`${longText.length} chars total`) + } + }) + + it('keeps recent turns intact within preserveRecentTurns', async () => { + const calls: LLMMessage[][] = [] + const adapter = buildMultiTurnAdapter(4, calls) + const { registry, executor } = buildEchoRegistry(longToolResult) + const runner = new AgentRunner(adapter, registry, executor, { + model: 'mock-model', + allowedTools: ['echo'], + maxTurns: 8, + contextStrategy: { + type: 'compact', + maxTokens: 20, + preserveRecentTurns: 1, + minToolResultChars: 100, + }, + }) + + await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }]) + + // The most recent tool_result (last user message with tool_result) should + // still contain the original long content. + const lastCall = calls[calls.length - 1]! + const userMsgs = lastCall.filter(m => m.role === 'user') + const lastUserWithToolResult = [...userMsgs] + .reverse() + .find(m => m.content.some(b => b.type === 'tool_result')) + expect(lastUserWithToolResult).toBeDefined() + const recentTr = lastUserWithToolResult!.content.find(b => b.type === 'tool_result') + if (recentTr && recentTr.type === 'tool_result') { + expect(recentTr.content).not.toContain('compacted') + expect(recentTr.content).toContain('result-data') + } + }) + + it('does not compact when all turns fit in preserveRecentTurns', async () => { + const calls: LLMMessage[][] = [] + const adapter = buildMultiTurnAdapter(3, calls) + const { registry, executor } = buildEchoRegistry(longToolResult) + const runner = new AgentRunner(adapter, registry, executor, { + model: 'mock-model', + allowedTools: ['echo'], + maxTurns: 8, + contextStrategy: { + type: 'compact', + maxTokens: 20, + preserveRecentTurns: 10, // way more than actual turns + minToolResultChars: 100, + }, + }) + + await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }]) + + // All tool results should still have original content. + const lastCall = calls[calls.length - 1]! + const toolResults = lastCall.flatMap(m => + m.content.filter(b => b.type === 'tool_result'), + ) + for (const tr of toolResults) { + if (tr.type === 'tool_result') { + expect(tr.content).not.toContain('compacted') + } + } + }) + + it('maintains correct role alternation after compaction', async () => { + const calls: LLMMessage[][] = [] + const adapter = buildMultiTurnAdapter(5, calls) + const { registry, executor } = buildEchoRegistry(longToolResult) + const runner = new AgentRunner(adapter, registry, executor, { + model: 'mock-model', + allowedTools: ['echo'], + maxTurns: 10, + contextStrategy: { + type: 'compact', + maxTokens: 20, + preserveRecentTurns: 1, + minToolResultChars: 100, + }, + }) + + await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }]) + + // Check all LLM calls for role alternation. + for (const callMsgs of calls) { + for (let i = 1; i < callMsgs.length; i++) { + expect(callMsgs[i]!.role).not.toBe(callMsgs[i - 1]!.role) + } + } + }) + + it('returns ZERO_USAGE (no LLM cost from compaction)', async () => { + const calls: LLMMessage[][] = [] + const adapter = buildMultiTurnAdapter(4, calls) + const { registry, executor } = buildEchoRegistry(longToolResult) + const runner = new AgentRunner(adapter, registry, executor, { + model: 'mock-model', + allowedTools: ['echo'], + maxTurns: 8, + contextStrategy: { + type: 'compact', + maxTokens: 20, + preserveRecentTurns: 1, + minToolResultChars: 100, + }, + }) + + const result = await runner.run([ + { role: 'user', content: [{ type: 'text', text: 'start' }] }, + ]) + + // Token usage should only reflect the 4 actual LLM calls (no extra from compaction). + // Each toolUseResponse: input=15, output=25. textResponse: input=10, output=20. + // 3 tool calls + 1 final = (15*3 + 10) input, (25*3 + 20) output. + expect(result.tokenUsage.input_tokens).toBe(15 * 3 + 10) + expect(result.tokenUsage.output_tokens).toBe(25 * 3 + 20) + }) + }) })