feat: add rule-based compact context strategy (#111)

Add `contextStrategy: 'compact'` as a zero-LLM-cost alternative to `summarize`. Instead of making an LLM call to compress everything into prose, it selectively compresses old turns using structural rules: - Preserve tool_use blocks (agent decisions) and error tool_results - Replace long tool_result content with compact markers including tool name - Truncate long assistant text blocks with head excerpts - Keep recent turns (configurable via preserveRecentTurns) fully intact - Detect already-compressed markers from compressToolResults to avoid double-processing Closes #111
2026-04-16 23:16:30 +08:00 · 2026-04-16 23:16:30 +08:00 · d21d4e035a
parent 696269c924
commit d21d4e035a
3 changed files with 568 additions and 0 deletions
--- a/src/agent/runner.ts
+++ b/src/agent/runner.ts
@ -400,6 +400,10 @@ export class AgentRunner {
      )
    }
    if (strategy.type === 'compact') {
      return { messages: this.compactMessages(messages, strategy), usage: ZERO_USAGE }
    }
    const estimated = estimateTokens(messages)
    const compressed = await strategy.compress(messages, estimated)
    if (!Array.isArray(compressed) || compressed.length === 0) {
@ -860,6 +864,133 @@ export class AgentRunner {
  // Private helpers
  // -------------------------------------------------------------------------
  /**
   * Rule-based selective context compaction (no LLM calls).
   *
   * Compresses old turns while preserving the conversation skeleton:
   * - tool_use blocks (decisions) are always kept
   * - Long tool_result content is replaced with a compact marker
   * - Long assistant text blocks are truncated with an excerpt
   * - Error tool_results are never compressed
   * - Recent turns (within `preserveRecentTurns`) are kept intact
   */
  private compactMessages(
    messages: LLMMessage[],
    strategy: Extract<ContextStrategy, { type: 'compact' }>,
  ): LLMMessage[] {
    const estimated = estimateTokens(messages)
    if (estimated <= strategy.maxTokens || messages.length < 4) {
      return messages
    }
    const preserveRecent = strategy.preserveRecentTurns ?? 4
    const minToolResultChars = strategy.minToolResultChars ?? 200
    const minTextBlockChars = strategy.minTextBlockChars ?? 2000
    const textBlockExcerptChars = strategy.textBlockExcerptChars ?? 200
    // Find the first user message — it is always preserved as-is.
    const firstUserIndex = messages.findIndex(m => m.role === 'user')
    if (firstUserIndex < 0 || firstUserIndex === messages.length - 1) {
      return messages
    }
    // Walk backward to find the boundary between old and recent turns.
    // A "turn pair" is an assistant message followed by a user message.
    let boundary = messages.length
    let pairsFound = 0
    for (let i = messages.length - 1; i > firstUserIndex && pairsFound < preserveRecent; i--) {
      if (messages[i]!.role === 'user' && i > 0 && messages[i - 1]!.role === 'assistant') {
        pairsFound++
        boundary = i - 1
      }
    }
    // If all turns fit within the recent window, nothing to compact.
    if (boundary <= firstUserIndex + 1) {
      return messages
    }
    // Build a tool_use_id → tool name lookup from old assistant messages.
    const toolNameMap = new Map<string, string>()
    for (let i = firstUserIndex + 1; i < boundary; i++) {
      const msg = messages[i]!
      if (msg.role !== 'assistant') continue
      for (const block of msg.content) {
        if (block.type === 'tool_use') {
          toolNameMap.set(block.id, block.name)
        }
      }
    }
    // Process old messages (between first user and boundary).
    let anyChanged = false
    const result: LLMMessage[] = []
    for (let i = 0; i < messages.length; i++) {
      // First user message and recent messages: keep intact.
      if (i <= firstUserIndex || i >= boundary) {
        result.push(messages[i]!)
        continue
      }
      const msg = messages[i]!
      let msgChanged = false
      const newContent = msg.content.map((block): ContentBlock => {
        if (msg.role === 'assistant') {
          // tool_use blocks: always preserve (decisions).
          if (block.type === 'tool_use') return block
          // Long text blocks: truncate with excerpt.
          if (block.type === 'text' && block.text.length >= minTextBlockChars) {
            msgChanged = true
            return {
              type: 'text',
              text: `${block.text.slice(0, textBlockExcerptChars)}... [truncated — ${block.text.length} chars total]`,
            } satisfies TextBlock
          }
          // Image blocks in old turns: replace with marker.
          if (block.type === 'image') {
            msgChanged = true
            return { type: 'text', text: '[Image compacted]' } satisfies TextBlock
          }
          return block
        }
        // User messages in old zone.
        if (block.type === 'tool_result') {
          // Error results: always preserve.
          if (block.is_error) return block
          // Already compressed by compressToolResults or a prior compact pass.
          if (
            block.content.startsWith('[Tool output compressed') ||
            block.content.startsWith('[Tool result:')
          ) {
            return block
          }
          // Short results: preserve.
          if (block.content.length < minToolResultChars) return block
          // Compress.
          const toolName = toolNameMap.get(block.tool_use_id) ?? 'unknown'
          msgChanged = true
          return {
            type: 'tool_result',
            tool_use_id: block.tool_use_id,
            content: `[Tool result: ${toolName} — ${block.content.length} chars, compacted]`,
          } satisfies ToolResultBlock
        }
        return block
      })
      if (msgChanged) {
        anyChanged = true
        result.push({ role: msg.role, content: newContent } as LLMMessage)
      } else {
        result.push(msg)
      }
    }
    return anyChanged ? result : messages
  }
  /**
   * Replace consumed tool results with compact markers.
   *
--- a/src/types.ts
+++ b/src/types.ts
@ -69,6 +69,19 @@ export interface LLMMessage {
 export type ContextStrategy =
  | { type: 'sliding-window'; maxTurns: number }
  | { type: 'summarize'; maxTokens: number; summaryModel?: string }
  | {
    type: 'compact'
    /** Estimated token threshold that triggers compaction. Compaction is skipped when below this. */
    maxTokens: number
    /** Number of recent turn pairs (assistant+user) to keep intact. Default: 4. */
    preserveRecentTurns?: number
    /** Minimum chars in a tool_result content to qualify for compaction. Default: 200. */
    minToolResultChars?: number
    /** Minimum chars in an assistant text block to qualify for truncation. Default: 2000. */
    minTextBlockChars?: number
    /** Maximum chars to keep from a truncated text block (head excerpt). Default: 200. */
    textBlockExcerptChars?: number
  }
  | {
    type: 'custom'
    compress: (
--- a/tests/context-strategy.test.ts
+++ b/tests/context-strategy.test.ts
@ -199,4 +199,428 @@ describe('AgentRunner contextStrategy', () => {
    expect(compress).toHaveBeenCalledOnce()
    expect(calls[1]).toHaveLength(1)
  })
  // ---------------------------------------------------------------------------
  // compact strategy
  // ---------------------------------------------------------------------------
  describe('compact strategy', () => {
    const longText = 'x'.repeat(3000)
    const longToolResult = 'result-data '.repeat(100) // ~1200 chars
    function buildMultiTurnAdapter(
      responseCount: number,
      calls: LLMMessage[][],
    ): LLMAdapter {
      const responses: LLMResponse[] = []
      for (let i = 0; i < responseCount - 1; i++) {
        responses.push(toolUseResponse('echo', { message: `turn-${i}` }))
      }
      responses.push(textResponse('done'))
      let idx = 0
      return {
        name: 'mock',
        async chat(messages) {
          calls.push(messages.map(m => ({ role: m.role, content: m.content })))
          return responses[idx++]!
        },
        async *stream() { /* unused */ },
      }
    }
    /** Build a registry with an echo tool that returns a fixed result string. */
    function buildEchoRegistry(result: string): { registry: ToolRegistry; executor: ToolExecutor } {
      const registry = new ToolRegistry()
      registry.register(
        defineTool({
          name: 'echo',
          description: 'Echo input',
          inputSchema: z.object({ message: z.string() }),
          async execute() {
            return { data: result }
          },
        }),
      )
      return { registry, executor: new ToolExecutor(registry) }
    }
    it('does not activate below maxTokens threshold', async () => {
      const calls: LLMMessage[][] = []
      const adapter = buildMultiTurnAdapter(3, calls)
      const { registry, executor } = buildEchoRegistry('short')
      const runner = new AgentRunner(adapter, registry, executor, {
        model: 'mock-model',
        allowedTools: ['echo'],
        maxTurns: 8,
        contextStrategy: { type: 'compact', maxTokens: 999999 },
      })
      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
      // On the 3rd call (turn 3), all previous messages should still be intact
      // because estimated tokens are way below the threshold.
      const lastCall = calls[calls.length - 1]!
      const allToolResults = lastCall.flatMap(m =>
        m.content.filter(b => b.type === 'tool_result'),
      )
      for (const tr of allToolResults) {
        if (tr.type === 'tool_result') {
          expect(tr.content).not.toContain('compacted')
        }
      }
    })
    it('compresses old tool_result blocks when tokens exceed threshold', async () => {
      const calls: LLMMessage[][] = []
      const adapter = buildMultiTurnAdapter(4, calls)
      const { registry, executor } = buildEchoRegistry(longToolResult)
      const runner = new AgentRunner(adapter, registry, executor, {
        model: 'mock-model',
        allowedTools: ['echo'],
        maxTurns: 8,
        contextStrategy: {
          type: 'compact',
          maxTokens: 20,           // very low to always trigger
          preserveRecentTurns: 1,  // only protect the most recent turn
          minToolResultChars: 100,
        },
      })
      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
      // On the last call, old tool results should have compact markers.
      const lastCall = calls[calls.length - 1]!
      const toolResults = lastCall.flatMap(m =>
        m.content.filter(b => b.type === 'tool_result'),
      )
      const compacted = toolResults.filter(
        b => b.type === 'tool_result' && b.content.includes('compacted'),
      )
      expect(compacted.length).toBeGreaterThan(0)
      // Marker should include tool name.
      for (const tr of compacted) {
        if (tr.type === 'tool_result') {
          expect(tr.content).toMatch(/\[Tool result: echo/)
        }
      }
    })
    it('preserves the first user message', async () => {
      const calls: LLMMessage[][] = []
      const adapter = buildMultiTurnAdapter(4, calls)
      const { registry, executor } = buildEchoRegistry(longToolResult)
      const runner = new AgentRunner(adapter, registry, executor, {
        model: 'mock-model',
        allowedTools: ['echo'],
        maxTurns: 8,
        contextStrategy: {
          type: 'compact',
          maxTokens: 20,
          preserveRecentTurns: 1,
          minToolResultChars: 100,
        },
      })
      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'original prompt' }] }])
      const lastCall = calls[calls.length - 1]!
      const firstUser = lastCall.find(m => m.role === 'user')!
      expect(firstUser.content[0]).toMatchObject({ type: 'text', text: 'original prompt' })
    })
    it('preserves tool_use blocks in old turns', async () => {
      const calls: LLMMessage[][] = []
      const adapter = buildMultiTurnAdapter(4, calls)
      const { registry, executor } = buildEchoRegistry(longToolResult)
      const runner = new AgentRunner(adapter, registry, executor, {
        model: 'mock-model',
        allowedTools: ['echo'],
        maxTurns: 8,
        contextStrategy: {
          type: 'compact',
          maxTokens: 20,
          preserveRecentTurns: 1,
          minToolResultChars: 100,
        },
      })
      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
      // Every assistant message should still have its tool_use block.
      const lastCall = calls[calls.length - 1]!
      const assistantMsgs = lastCall.filter(m => m.role === 'assistant')
      for (const msg of assistantMsgs) {
        const toolUses = msg.content.filter(b => b.type === 'tool_use')
        // The last assistant message is "done" (text only), others have tool_use.
        if (msg.content.some(b => b.type === 'text' && b.text === 'done')) continue
        expect(toolUses.length).toBeGreaterThan(0)
      }
    })
    it('preserves error tool_result blocks', async () => {
      const calls: LLMMessage[][] = []
      const responses: LLMResponse[] = [
        toolUseResponse('echo', { message: 'will-fail' }),
        toolUseResponse('echo', { message: 'ok' }),
        textResponse('done'),
      ]
      let idx = 0
      const adapter: LLMAdapter = {
        name: 'mock',
        async chat(messages) {
          calls.push(messages.map(m => ({ role: m.role, content: m.content })))
          return responses[idx++]!
        },
        async *stream() { /* unused */ },
      }
      // Tool that fails on first call, succeeds on second.
      let callCount = 0
      const registry = new ToolRegistry()
      registry.register(
        defineTool({
          name: 'echo',
          description: 'Echo input',
          inputSchema: z.object({ message: z.string() }),
          async execute() {
            callCount++
            if (callCount === 1) {
              throw new Error('deliberate error '.repeat(40))
            }
            return { data: longToolResult }
          },
        }),
      )
      const executor = new ToolExecutor(registry)
      const runner = new AgentRunner(adapter, registry, executor, {
        model: 'mock-model',
        allowedTools: ['echo'],
        maxTurns: 8,
        contextStrategy: {
          type: 'compact',
          maxTokens: 20,
          preserveRecentTurns: 1,
          minToolResultChars: 50,
        },
      })
      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
      const lastCall = calls[calls.length - 1]!
      const errorResults = lastCall.flatMap(m =>
        m.content.filter(b => b.type === 'tool_result' && b.is_error),
      )
      // Error results should still have their original content (not compacted).
      for (const er of errorResults) {
        if (er.type === 'tool_result') {
          expect(er.content).not.toContain('compacted')
          expect(er.content).toContain('deliberate error')
        }
      }
    })
    it('does not re-compress markers from compressToolResults', async () => {
      const calls: LLMMessage[][] = []
      const adapter = buildMultiTurnAdapter(4, calls)
      const { registry, executor } = buildEchoRegistry(longToolResult)
      const runner = new AgentRunner(adapter, registry, executor, {
        model: 'mock-model',
        allowedTools: ['echo'],
        maxTurns: 8,
        compressToolResults: { minChars: 100 },
        contextStrategy: {
          type: 'compact',
          maxTokens: 20,
          preserveRecentTurns: 1,
          minToolResultChars: 10,
        },
      })
      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
      const lastCall = calls[calls.length - 1]!
      const allToolResults = lastCall.flatMap(m =>
        m.content.filter(b => b.type === 'tool_result'),
      )
      // No result should contain nested markers.
      for (const tr of allToolResults) {
        if (tr.type === 'tool_result') {
          // Should not have a compact marker wrapping another marker.
          const markerCount = (tr.content.match(/\[Tool/g) || []).length
          expect(markerCount).toBeLessThanOrEqual(1)
        }
      }
    })
    it('truncates long assistant text blocks in old turns', async () => {
      const calls: LLMMessage[][] = []
      const responses: LLMResponse[] = [
        // First turn: assistant with long text + tool_use
        {
          id: 'r1',
          content: [
            { type: 'text', text: longText },
            { type: 'tool_use', id: 'tu-1', name: 'echo', input: { message: 'hi' } },
          ],
          model: 'mock-model',
          stop_reason: 'tool_use',
          usage: { input_tokens: 10, output_tokens: 20 },
        },
        toolUseResponse('echo', { message: 'turn2' }),
        textResponse('done'),
      ]
      let idx = 0
      const adapter: LLMAdapter = {
        name: 'mock',
        async chat(messages) {
          calls.push(messages.map(m => ({ role: m.role, content: m.content })))
          return responses[idx++]!
        },
        async *stream() { /* unused */ },
      }
      const { registry, executor } = buildEchoRegistry('short')
      const runner = new AgentRunner(adapter, registry, executor, {
        model: 'mock-model',
        allowedTools: ['echo'],
        maxTurns: 8,
        contextStrategy: {
          type: 'compact',
          maxTokens: 20,
          preserveRecentTurns: 1,
          minTextBlockChars: 500,
          textBlockExcerptChars: 100,
        },
      })
      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
      const lastCall = calls[calls.length - 1]!
      // The first assistant message (old zone) should have its text truncated.
      const firstAssistant = lastCall.find(m => m.role === 'assistant')!
      const textBlocks = firstAssistant.content.filter(b => b.type === 'text')
      const truncated = textBlocks.find(
        b => b.type === 'text' && b.text.includes('truncated'),
      )
      expect(truncated).toBeDefined()
      if (truncated && truncated.type === 'text') {
        expect(truncated.text.length).toBeLessThan(longText.length)
        expect(truncated.text).toContain(`${longText.length} chars total`)
      }
    })
    it('keeps recent turns intact within preserveRecentTurns', async () => {
      const calls: LLMMessage[][] = []
      const adapter = buildMultiTurnAdapter(4, calls)
      const { registry, executor } = buildEchoRegistry(longToolResult)
      const runner = new AgentRunner(adapter, registry, executor, {
        model: 'mock-model',
        allowedTools: ['echo'],
        maxTurns: 8,
        contextStrategy: {
          type: 'compact',
          maxTokens: 20,
          preserveRecentTurns: 1,
          minToolResultChars: 100,
        },
      })
      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
      // The most recent tool_result (last user message with tool_result) should
      // still contain the original long content.
      const lastCall = calls[calls.length - 1]!
      const userMsgs = lastCall.filter(m => m.role === 'user')
      const lastUserWithToolResult = [...userMsgs]
        .reverse()
        .find(m => m.content.some(b => b.type === 'tool_result'))
      expect(lastUserWithToolResult).toBeDefined()
      const recentTr = lastUserWithToolResult!.content.find(b => b.type === 'tool_result')
      if (recentTr && recentTr.type === 'tool_result') {
        expect(recentTr.content).not.toContain('compacted')
        expect(recentTr.content).toContain('result-data')
      }
    })
    it('does not compact when all turns fit in preserveRecentTurns', async () => {
      const calls: LLMMessage[][] = []
      const adapter = buildMultiTurnAdapter(3, calls)
      const { registry, executor } = buildEchoRegistry(longToolResult)
      const runner = new AgentRunner(adapter, registry, executor, {
        model: 'mock-model',
        allowedTools: ['echo'],
        maxTurns: 8,
        contextStrategy: {
          type: 'compact',
          maxTokens: 20,
          preserveRecentTurns: 10, // way more than actual turns
          minToolResultChars: 100,
        },
      })
      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
      // All tool results should still have original content.
      const lastCall = calls[calls.length - 1]!
      const toolResults = lastCall.flatMap(m =>
        m.content.filter(b => b.type === 'tool_result'),
      )
      for (const tr of toolResults) {
        if (tr.type === 'tool_result') {
          expect(tr.content).not.toContain('compacted')
        }
      }
    })
    it('maintains correct role alternation after compaction', async () => {
      const calls: LLMMessage[][] = []
      const adapter = buildMultiTurnAdapter(5, calls)
      const { registry, executor } = buildEchoRegistry(longToolResult)
      const runner = new AgentRunner(adapter, registry, executor, {
        model: 'mock-model',
        allowedTools: ['echo'],
        maxTurns: 10,
        contextStrategy: {
          type: 'compact',
          maxTokens: 20,
          preserveRecentTurns: 1,
          minToolResultChars: 100,
        },
      })
      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
      // Check all LLM calls for role alternation.
      for (const callMsgs of calls) {
        for (let i = 1; i < callMsgs.length; i++) {
          expect(callMsgs[i]!.role).not.toBe(callMsgs[i - 1]!.role)
        }
      }
    })
    it('returns ZERO_USAGE (no LLM cost from compaction)', async () => {
      const calls: LLMMessage[][] = []
      const adapter = buildMultiTurnAdapter(4, calls)
      const { registry, executor } = buildEchoRegistry(longToolResult)
      const runner = new AgentRunner(adapter, registry, executor, {
        model: 'mock-model',
        allowedTools: ['echo'],
        maxTurns: 8,
        contextStrategy: {
          type: 'compact',
          maxTokens: 20,
          preserveRecentTurns: 1,
          minToolResultChars: 100,
        },
      })
      const result = await runner.run([
        { role: 'user', content: [{ type: 'text', text: 'start' }] },
      ])
      // Token usage should only reflect the 4 actual LLM calls (no extra from compaction).
      // Each toolUseResponse: input=15, output=25. textResponse: input=10, output=20.
      // 3 tool calls + 1 final = (15*3 + 10) input, (25*3 + 20) output.
      expect(result.tokenUsage.input_tokens).toBe(15 * 3 + 10)
      expect(result.tokenUsage.output_tokens).toBe(25 * 3 + 20)
    })
  })
 })