feat: add rule-based compact context strategy (#111 ) (#119 )

* feat: add rule-based compact context strategy (#111) Add `contextStrategy: 'compact'` as a zero-LLM-cost alternative to `summarize`. Instead of making an LLM call to compress everything into prose, it selectively compresses old turns using structural rules: - Preserve tool_use blocks (agent decisions) and error tool_results - Replace long tool_result content with compact markers including tool name - Truncate long assistant text blocks with head excerpts - Keep recent turns (configurable via preserveRecentTurns) fully intact - Detect already-compressed markers from compressToolResults to avoid double-processing Closes #111 * fix: remove redundant length guard and fix compact type indentation
fix: guard against re-compression of already compressed tool result markers (#118 )
2026-04-16 23:34:50 +08:00 · 2026-04-16 18:25:48 +08:00 · 2026-04-16 18:22:42 +08:00
5 changed files with 1168 additions and 0 deletions
--- a/src/agent/agent.ts
+++ b/src/agent/agent.ts
@ -154,6 +154,7 @@ export class Agent {
      loopDetection: this.config.loopDetection,
      maxTokenBudget: this.config.maxTokenBudget,
      contextStrategy: this.config.contextStrategy,
+      compressToolResults: this.config.compressToolResults,
    }

    this.runner = new AgentRunner(
--- a/src/agent/runner.ts
+++ b/src/agent/runner.ts
@ -98,6 +98,11 @@ export interface RunnerOptions {
  readonly maxTokenBudget?: number
  /** Optional context compression strategy for long multi-turn runs. */
  readonly contextStrategy?: ContextStrategy
+  /**
+   * Compress tool results that the agent has already processed.
+   * See {@link AgentConfig.compressToolResults} for details.
+   */
+  readonly compressToolResults?: boolean | { readonly minChars?: number }
 }

 /**
@ -176,6 +181,9 @@ function addTokenUsage(a: TokenUsage, b: TokenUsage): TokenUsage {

 const ZERO_USAGE: TokenUsage = { input_tokens: 0, output_tokens: 0 }

+/** Default minimum content length before tool result compression kicks in. */
+const DEFAULT_MIN_COMPRESS_CHARS = 500
+
 /**
 * Prepends synthetic framing text to the first user message so we never emit
 * consecutive `user` turns (Bedrock) and summaries do not concatenate onto
@ -392,6 +400,10 @@ export class AgentRunner {
      )
    }

+    if (strategy.type === 'compact') {
+      return { messages: this.compactMessages(messages, strategy), usage: ZERO_USAGE }
+    }
+
    const estimated = estimateTokens(messages)
    const compressed = await strategy.compress(messages, estimated)
    if (!Array.isArray(compressed) || compressed.length === 0) {
@ -569,6 +581,12 @@ export class AgentRunner {

        turns++

+        // Compress consumed tool results before context strategy (lightweight,
+        // no LLM calls) so the strategy operates on already-reduced messages.
+        if (this.options.compressToolResults && turns > 1) {
+          conversationMessages = this.compressConsumedToolResults(conversationMessages)
+        }
+
        // Optionally compact context before each LLM call after the first turn.
        if (this.options.contextStrategy && turns > 1) {
          const compacted = await this.applyContextStrategy(
@ -846,6 +864,205 @@ export class AgentRunner {
  // Private helpers
  // -------------------------------------------------------------------------

+  /**
+   * Rule-based selective context compaction (no LLM calls).
+   *
+   * Compresses old turns while preserving the conversation skeleton:
+   * - tool_use blocks (decisions) are always kept
+   * - Long tool_result content is replaced with a compact marker
+   * - Long assistant text blocks are truncated with an excerpt
+   * - Error tool_results are never compressed
+   * - Recent turns (within `preserveRecentTurns`) are kept intact
+   */
+  private compactMessages(
+    messages: LLMMessage[],
+    strategy: Extract<ContextStrategy, { type: 'compact' }>,
+  ): LLMMessage[] {
+    const estimated = estimateTokens(messages)
+    if (estimated <= strategy.maxTokens) {
+      return messages
+    }
+
+    const preserveRecent = strategy.preserveRecentTurns ?? 4
+    const minToolResultChars = strategy.minToolResultChars ?? 200
+    const minTextBlockChars = strategy.minTextBlockChars ?? 2000
+    const textBlockExcerptChars = strategy.textBlockExcerptChars ?? 200
+
+    // Find the first user message — it is always preserved as-is.
+    const firstUserIndex = messages.findIndex(m => m.role === 'user')
+    if (firstUserIndex < 0 || firstUserIndex === messages.length - 1) {
+      return messages
+    }
+
+    // Walk backward to find the boundary between old and recent turns.
+    // A "turn pair" is an assistant message followed by a user message.
+    let boundary = messages.length
+    let pairsFound = 0
+    for (let i = messages.length - 1; i > firstUserIndex && pairsFound < preserveRecent; i--) {
+      if (messages[i]!.role === 'user' && i > 0 && messages[i - 1]!.role === 'assistant') {
+        pairsFound++
+        boundary = i - 1
+      }
+    }
+
+    // If all turns fit within the recent window, nothing to compact.
+    if (boundary <= firstUserIndex + 1) {
+      return messages
+    }
+
+    // Build a tool_use_id → tool name lookup from old assistant messages.
+    const toolNameMap = new Map<string, string>()
+    for (let i = firstUserIndex + 1; i < boundary; i++) {
+      const msg = messages[i]!
+      if (msg.role !== 'assistant') continue
+      for (const block of msg.content) {
+        if (block.type === 'tool_use') {
+          toolNameMap.set(block.id, block.name)
+        }
+      }
+    }
+
+    // Process old messages (between first user and boundary).
+    let anyChanged = false
+    const result: LLMMessage[] = []
+
+    for (let i = 0; i < messages.length; i++) {
+      // First user message and recent messages: keep intact.
+      if (i <= firstUserIndex || i >= boundary) {
+        result.push(messages[i]!)
+        continue
+      }
+
+      const msg = messages[i]!
+      let msgChanged = false
+      const newContent = msg.content.map((block): ContentBlock => {
+        if (msg.role === 'assistant') {
+          // tool_use blocks: always preserve (decisions).
+          if (block.type === 'tool_use') return block
+          // Long text blocks: truncate with excerpt.
+          if (block.type === 'text' && block.text.length >= minTextBlockChars) {
+            msgChanged = true
+            return {
+              type: 'text',
+              text: `${block.text.slice(0, textBlockExcerptChars)}... [truncated — ${block.text.length} chars total]`,
+            } satisfies TextBlock
+          }
+          // Image blocks in old turns: replace with marker.
+          if (block.type === 'image') {
+            msgChanged = true
+            return { type: 'text', text: '[Image compacted]' } satisfies TextBlock
+          }
+          return block
+        }
+
+        // User messages in old zone.
+        if (block.type === 'tool_result') {
+          // Error results: always preserve.
+          if (block.is_error) return block
+          // Already compressed by compressToolResults or a prior compact pass.
+          if (
+            block.content.startsWith('[Tool output compressed') ||
+            block.content.startsWith('[Tool result:')
+          ) {
+            return block
+          }
+          // Short results: preserve.
+          if (block.content.length < minToolResultChars) return block
+          // Compress.
+          const toolName = toolNameMap.get(block.tool_use_id) ?? 'unknown'
+          msgChanged = true
+          return {
+            type: 'tool_result',
+            tool_use_id: block.tool_use_id,
+            content: `[Tool result: ${toolName} — ${block.content.length} chars, compacted]`,
+          } satisfies ToolResultBlock
+        }
+        return block
+      })
+
+      if (msgChanged) {
+        anyChanged = true
+        result.push({ role: msg.role, content: newContent } as LLMMessage)
+      } else {
+        result.push(msg)
+      }
+    }
+
+    return anyChanged ? result : messages
+  }
+
+  /**
+   * Replace consumed tool results with compact markers.
+   *
+   * A tool_result is "consumed" when the assistant has produced a response
+   * after seeing it (i.e. there is an assistant message following the user
+   * message that contains the tool_result).  The most recent user message
+   * with tool results is always kept intact — the LLM is about to see it.
+   *
+   * Error results and results shorter than `minChars` are never compressed.
+   */
+  private compressConsumedToolResults(messages: LLMMessage[]): LLMMessage[] {
+    const config = this.options.compressToolResults
+    if (!config) return messages
+
+    const minChars = typeof config === 'object'
+      ? (config.minChars ?? DEFAULT_MIN_COMPRESS_CHARS)
+      : DEFAULT_MIN_COMPRESS_CHARS
+
+    // Find the last user message that carries tool_result blocks.
+    let lastToolResultUserIdx = -1
+    for (let i = messages.length - 1; i >= 0; i--) {
+      if (
+        messages[i]!.role === 'user' &&
+        messages[i]!.content.some(b => b.type === 'tool_result')
+      ) {
+        lastToolResultUserIdx = i
+        break
+      }
+    }
+
+    // Nothing to compress if there's at most one tool-result user message.
+    if (lastToolResultUserIdx <= 0) return messages
+
+    let anyChanged = false
+    const result = messages.map((msg, idx) => {
+      // Only compress user messages that appear before the last one.
+      if (msg.role !== 'user' || idx >= lastToolResultUserIdx) return msg
+
+      const hasToolResult = msg.content.some(b => b.type === 'tool_result')
+      if (!hasToolResult) return msg
+
+      let msgChanged = false
+      const newContent = msg.content.map((block): ContentBlock => {
+        if (block.type !== 'tool_result') return block
+
+        // Never compress error results — they carry diagnostic value.
+        if (block.is_error) return block
+
+        // Skip already-compressed results — avoid re-compression with wrong char count.
+        if (block.content.startsWith('[Tool output compressed')) return block
+
+        // Skip short results — the marker itself has overhead.
+        if (block.content.length < minChars) return block
+
+        msgChanged = true
+        return {
+          type: 'tool_result',
+          tool_use_id: block.tool_use_id,
+          content: `[Tool output compressed — ${block.content.length} chars, already processed]`,
+        } satisfies ToolResultBlock
+      })
+
+      if (msgChanged) {
+        anyChanged = true
+        return { role: msg.role, content: newContent } as LLMMessage
+      }
+      return msg
+    })
+
+    return anyChanged ? result : messages
+  }
+
  /**
   * Build the {@link ToolUseContext} passed to every tool execution.
   * Identifies this runner as the invoking agent.
--- a/src/types.ts
+++ b/src/types.ts
@ -69,6 +69,19 @@ export interface LLMMessage {
 export type ContextStrategy =
  | { type: 'sliding-window'; maxTurns: number }
  | { type: 'summarize'; maxTokens: number; summaryModel?: string }
+  | {
+      type: 'compact'
+      /** Estimated token threshold that triggers compaction. Compaction is skipped when below this. */
+      maxTokens: number
+      /** Number of recent turn pairs (assistant+user) to keep intact. Default: 4. */
+      preserveRecentTurns?: number
+      /** Minimum chars in a tool_result content to qualify for compaction. Default: 200. */
+      minToolResultChars?: number
+      /** Minimum chars in an assistant text block to qualify for truncation. Default: 2000. */
+      minTextBlockChars?: number
+      /** Maximum chars to keep from a truncated text block (head excerpt). Default: 200. */
+      textBlockExcerptChars?: number
+    }
  | {
    type: 'custom'
    compress: (
@ -270,6 +283,21 @@ export interface AgentConfig {
   * takes priority over this value.
   */
  readonly maxToolOutputChars?: number
+  /**
+   * Compress tool results that the agent has already processed.
+   *
+   * In multi-turn runs, tool results persist in the conversation even after the
+   * agent has acted on them. When enabled, consumed tool results (those followed
+   * by an assistant response) are replaced with a short marker before the next
+   * LLM call, freeing context budget for new reasoning.
+   *
+   * - `true` — enable with default threshold (500 chars)
+   * - `{ minChars: N }` — only compress results longer than N characters
+   * - `false` / `undefined` — disabled (default)
+   *
+   * Error tool results are never compressed.
+   */
+  readonly compressToolResults?: boolean | { readonly minChars?: number }
  /**
   * Optional Zod schema for structured output.  When set, the agent's final
   * output is parsed as JSON and validated against this schema.  A single
--- a/tests/context-strategy.test.ts
+++ b/tests/context-strategy.test.ts
@ -199,4 +199,428 @@ describe('AgentRunner contextStrategy', () => {
    expect(compress).toHaveBeenCalledOnce()
    expect(calls[1]).toHaveLength(1)
  })
+
+  // ---------------------------------------------------------------------------
+  // compact strategy
+  // ---------------------------------------------------------------------------
+
+  describe('compact strategy', () => {
+    const longText = 'x'.repeat(3000)
+    const longToolResult = 'result-data '.repeat(100) // ~1200 chars
+
+    function buildMultiTurnAdapter(
+      responseCount: number,
+      calls: LLMMessage[][],
+    ): LLMAdapter {
+      const responses: LLMResponse[] = []
+      for (let i = 0; i < responseCount - 1; i++) {
+        responses.push(toolUseResponse('echo', { message: `turn-${i}` }))
+      }
+      responses.push(textResponse('done'))
+      let idx = 0
+      return {
+        name: 'mock',
+        async chat(messages) {
+          calls.push(messages.map(m => ({ role: m.role, content: m.content })))
+          return responses[idx++]!
+        },
+        async *stream() { /* unused */ },
+      }
+    }
+
+    /** Build a registry with an echo tool that returns a fixed result string. */
+    function buildEchoRegistry(result: string): { registry: ToolRegistry; executor: ToolExecutor } {
+      const registry = new ToolRegistry()
+      registry.register(
+        defineTool({
+          name: 'echo',
+          description: 'Echo input',
+          inputSchema: z.object({ message: z.string() }),
+          async execute() {
+            return { data: result }
+          },
+        }),
+      )
+      return { registry, executor: new ToolExecutor(registry) }
+    }
+
+    it('does not activate below maxTokens threshold', async () => {
+      const calls: LLMMessage[][] = []
+      const adapter = buildMultiTurnAdapter(3, calls)
+      const { registry, executor } = buildEchoRegistry('short')
+      const runner = new AgentRunner(adapter, registry, executor, {
+        model: 'mock-model',
+        allowedTools: ['echo'],
+        maxTurns: 8,
+        contextStrategy: { type: 'compact', maxTokens: 999999 },
+      })
+
+      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+      // On the 3rd call (turn 3), all previous messages should still be intact
+      // because estimated tokens are way below the threshold.
+      const lastCall = calls[calls.length - 1]!
+      const allToolResults = lastCall.flatMap(m =>
+        m.content.filter(b => b.type === 'tool_result'),
+      )
+      for (const tr of allToolResults) {
+        if (tr.type === 'tool_result') {
+          expect(tr.content).not.toContain('compacted')
+        }
+      }
+    })
+
+    it('compresses old tool_result blocks when tokens exceed threshold', async () => {
+      const calls: LLMMessage[][] = []
+      const adapter = buildMultiTurnAdapter(4, calls)
+      const { registry, executor } = buildEchoRegistry(longToolResult)
+      const runner = new AgentRunner(adapter, registry, executor, {
+        model: 'mock-model',
+        allowedTools: ['echo'],
+        maxTurns: 8,
+        contextStrategy: {
+          type: 'compact',
+          maxTokens: 20,           // very low to always trigger
+          preserveRecentTurns: 1,  // only protect the most recent turn
+          minToolResultChars: 100,
+        },
+      })
+
+      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+      // On the last call, old tool results should have compact markers.
+      const lastCall = calls[calls.length - 1]!
+      const toolResults = lastCall.flatMap(m =>
+        m.content.filter(b => b.type === 'tool_result'),
+      )
+      const compacted = toolResults.filter(
+        b => b.type === 'tool_result' && b.content.includes('compacted'),
+      )
+      expect(compacted.length).toBeGreaterThan(0)
+      // Marker should include tool name.
+      for (const tr of compacted) {
+        if (tr.type === 'tool_result') {
+          expect(tr.content).toMatch(/\[Tool result: echo/)
+        }
+      }
+    })
+
+    it('preserves the first user message', async () => {
+      const calls: LLMMessage[][] = []
+      const adapter = buildMultiTurnAdapter(4, calls)
+      const { registry, executor } = buildEchoRegistry(longToolResult)
+      const runner = new AgentRunner(adapter, registry, executor, {
+        model: 'mock-model',
+        allowedTools: ['echo'],
+        maxTurns: 8,
+        contextStrategy: {
+          type: 'compact',
+          maxTokens: 20,
+          preserveRecentTurns: 1,
+          minToolResultChars: 100,
+        },
+      })
+
+      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'original prompt' }] }])
+
+      const lastCall = calls[calls.length - 1]!
+      const firstUser = lastCall.find(m => m.role === 'user')!
+      expect(firstUser.content[0]).toMatchObject({ type: 'text', text: 'original prompt' })
+    })
+
+    it('preserves tool_use blocks in old turns', async () => {
+      const calls: LLMMessage[][] = []
+      const adapter = buildMultiTurnAdapter(4, calls)
+      const { registry, executor } = buildEchoRegistry(longToolResult)
+      const runner = new AgentRunner(adapter, registry, executor, {
+        model: 'mock-model',
+        allowedTools: ['echo'],
+        maxTurns: 8,
+        contextStrategy: {
+          type: 'compact',
+          maxTokens: 20,
+          preserveRecentTurns: 1,
+          minToolResultChars: 100,
+        },
+      })
+
+      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+      // Every assistant message should still have its tool_use block.
+      const lastCall = calls[calls.length - 1]!
+      const assistantMsgs = lastCall.filter(m => m.role === 'assistant')
+      for (const msg of assistantMsgs) {
+        const toolUses = msg.content.filter(b => b.type === 'tool_use')
+        // The last assistant message is "done" (text only), others have tool_use.
+        if (msg.content.some(b => b.type === 'text' && b.text === 'done')) continue
+        expect(toolUses.length).toBeGreaterThan(0)
+      }
+    })
+
+    it('preserves error tool_result blocks', async () => {
+      const calls: LLMMessage[][] = []
+      const responses: LLMResponse[] = [
+        toolUseResponse('echo', { message: 'will-fail' }),
+        toolUseResponse('echo', { message: 'ok' }),
+        textResponse('done'),
+      ]
+      let idx = 0
+      const adapter: LLMAdapter = {
+        name: 'mock',
+        async chat(messages) {
+          calls.push(messages.map(m => ({ role: m.role, content: m.content })))
+          return responses[idx++]!
+        },
+        async *stream() { /* unused */ },
+      }
+      // Tool that fails on first call, succeeds on second.
+      let callCount = 0
+      const registry = new ToolRegistry()
+      registry.register(
+        defineTool({
+          name: 'echo',
+          description: 'Echo input',
+          inputSchema: z.object({ message: z.string() }),
+          async execute() {
+            callCount++
+            if (callCount === 1) {
+              throw new Error('deliberate error '.repeat(40))
+            }
+            return { data: longToolResult }
+          },
+        }),
+      )
+      const executor = new ToolExecutor(registry)
+      const runner = new AgentRunner(adapter, registry, executor, {
+        model: 'mock-model',
+        allowedTools: ['echo'],
+        maxTurns: 8,
+        contextStrategy: {
+          type: 'compact',
+          maxTokens: 20,
+          preserveRecentTurns: 1,
+          minToolResultChars: 50,
+        },
+      })
+
+      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+      const lastCall = calls[calls.length - 1]!
+      const errorResults = lastCall.flatMap(m =>
+        m.content.filter(b => b.type === 'tool_result' && b.is_error),
+      )
+      // Error results should still have their original content (not compacted).
+      for (const er of errorResults) {
+        if (er.type === 'tool_result') {
+          expect(er.content).not.toContain('compacted')
+          expect(er.content).toContain('deliberate error')
+        }
+      }
+    })
+
+    it('does not re-compress markers from compressToolResults', async () => {
+      const calls: LLMMessage[][] = []
+      const adapter = buildMultiTurnAdapter(4, calls)
+      const { registry, executor } = buildEchoRegistry(longToolResult)
+      const runner = new AgentRunner(adapter, registry, executor, {
+        model: 'mock-model',
+        allowedTools: ['echo'],
+        maxTurns: 8,
+        compressToolResults: { minChars: 100 },
+        contextStrategy: {
+          type: 'compact',
+          maxTokens: 20,
+          preserveRecentTurns: 1,
+          minToolResultChars: 10,
+        },
+      })
+
+      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+      const lastCall = calls[calls.length - 1]!
+      const allToolResults = lastCall.flatMap(m =>
+        m.content.filter(b => b.type === 'tool_result'),
+      )
+      // No result should contain nested markers.
+      for (const tr of allToolResults) {
+        if (tr.type === 'tool_result') {
+          // Should not have a compact marker wrapping another marker.
+          const markerCount = (tr.content.match(/\[Tool/g) || []).length
+          expect(markerCount).toBeLessThanOrEqual(1)
+        }
+      }
+    })
+
+    it('truncates long assistant text blocks in old turns', async () => {
+      const calls: LLMMessage[][] = []
+      const responses: LLMResponse[] = [
+        // First turn: assistant with long text + tool_use
+        {
+          id: 'r1',
+          content: [
+            { type: 'text', text: longText },
+            { type: 'tool_use', id: 'tu-1', name: 'echo', input: { message: 'hi' } },
+          ],
+          model: 'mock-model',
+          stop_reason: 'tool_use',
+          usage: { input_tokens: 10, output_tokens: 20 },
+        },
+        toolUseResponse('echo', { message: 'turn2' }),
+        textResponse('done'),
+      ]
+      let idx = 0
+      const adapter: LLMAdapter = {
+        name: 'mock',
+        async chat(messages) {
+          calls.push(messages.map(m => ({ role: m.role, content: m.content })))
+          return responses[idx++]!
+        },
+        async *stream() { /* unused */ },
+      }
+      const { registry, executor } = buildEchoRegistry('short')
+      const runner = new AgentRunner(adapter, registry, executor, {
+        model: 'mock-model',
+        allowedTools: ['echo'],
+        maxTurns: 8,
+        contextStrategy: {
+          type: 'compact',
+          maxTokens: 20,
+          preserveRecentTurns: 1,
+          minTextBlockChars: 500,
+          textBlockExcerptChars: 100,
+        },
+      })
+
+      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+      const lastCall = calls[calls.length - 1]!
+      // The first assistant message (old zone) should have its text truncated.
+      const firstAssistant = lastCall.find(m => m.role === 'assistant')!
+      const textBlocks = firstAssistant.content.filter(b => b.type === 'text')
+      const truncated = textBlocks.find(
+        b => b.type === 'text' && b.text.includes('truncated'),
+      )
+      expect(truncated).toBeDefined()
+      if (truncated && truncated.type === 'text') {
+        expect(truncated.text.length).toBeLessThan(longText.length)
+        expect(truncated.text).toContain(`${longText.length} chars total`)
+      }
+    })
+
+    it('keeps recent turns intact within preserveRecentTurns', async () => {
+      const calls: LLMMessage[][] = []
+      const adapter = buildMultiTurnAdapter(4, calls)
+      const { registry, executor } = buildEchoRegistry(longToolResult)
+      const runner = new AgentRunner(adapter, registry, executor, {
+        model: 'mock-model',
+        allowedTools: ['echo'],
+        maxTurns: 8,
+        contextStrategy: {
+          type: 'compact',
+          maxTokens: 20,
+          preserveRecentTurns: 1,
+          minToolResultChars: 100,
+        },
+      })
+
+      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+      // The most recent tool_result (last user message with tool_result) should
+      // still contain the original long content.
+      const lastCall = calls[calls.length - 1]!
+      const userMsgs = lastCall.filter(m => m.role === 'user')
+      const lastUserWithToolResult = [...userMsgs]
+        .reverse()
+        .find(m => m.content.some(b => b.type === 'tool_result'))
+      expect(lastUserWithToolResult).toBeDefined()
+      const recentTr = lastUserWithToolResult!.content.find(b => b.type === 'tool_result')
+      if (recentTr && recentTr.type === 'tool_result') {
+        expect(recentTr.content).not.toContain('compacted')
+        expect(recentTr.content).toContain('result-data')
+      }
+    })
+
+    it('does not compact when all turns fit in preserveRecentTurns', async () => {
+      const calls: LLMMessage[][] = []
+      const adapter = buildMultiTurnAdapter(3, calls)
+      const { registry, executor } = buildEchoRegistry(longToolResult)
+      const runner = new AgentRunner(adapter, registry, executor, {
+        model: 'mock-model',
+        allowedTools: ['echo'],
+        maxTurns: 8,
+        contextStrategy: {
+          type: 'compact',
+          maxTokens: 20,
+          preserveRecentTurns: 10, // way more than actual turns
+          minToolResultChars: 100,
+        },
+      })
+
+      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+      // All tool results should still have original content.
+      const lastCall = calls[calls.length - 1]!
+      const toolResults = lastCall.flatMap(m =>
+        m.content.filter(b => b.type === 'tool_result'),
+      )
+      for (const tr of toolResults) {
+        if (tr.type === 'tool_result') {
+          expect(tr.content).not.toContain('compacted')
+        }
+      }
+    })
+
+    it('maintains correct role alternation after compaction', async () => {
+      const calls: LLMMessage[][] = []
+      const adapter = buildMultiTurnAdapter(5, calls)
+      const { registry, executor } = buildEchoRegistry(longToolResult)
+      const runner = new AgentRunner(adapter, registry, executor, {
+        model: 'mock-model',
+        allowedTools: ['echo'],
+        maxTurns: 10,
+        contextStrategy: {
+          type: 'compact',
+          maxTokens: 20,
+          preserveRecentTurns: 1,
+          minToolResultChars: 100,
+        },
+      })
+
+      await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+      // Check all LLM calls for role alternation.
+      for (const callMsgs of calls) {
+        for (let i = 1; i < callMsgs.length; i++) {
+          expect(callMsgs[i]!.role).not.toBe(callMsgs[i - 1]!.role)
+        }
+      }
+    })
+
+    it('returns ZERO_USAGE (no LLM cost from compaction)', async () => {
+      const calls: LLMMessage[][] = []
+      const adapter = buildMultiTurnAdapter(4, calls)
+      const { registry, executor } = buildEchoRegistry(longToolResult)
+      const runner = new AgentRunner(adapter, registry, executor, {
+        model: 'mock-model',
+        allowedTools: ['echo'],
+        maxTurns: 8,
+        contextStrategy: {
+          type: 'compact',
+          maxTokens: 20,
+          preserveRecentTurns: 1,
+          minToolResultChars: 100,
+        },
+      })
+
+      const result = await runner.run([
+        { role: 'user', content: [{ type: 'text', text: 'start' }] },
+      ])
+
+      // Token usage should only reflect the 4 actual LLM calls (no extra from compaction).
+      // Each toolUseResponse: input=15, output=25. textResponse: input=10, output=20.
+      // 3 tool calls + 1 final = (15*3 + 10) input, (25*3 + 20) output.
+      expect(result.tokenUsage.input_tokens).toBe(15 * 3 + 10)
+      expect(result.tokenUsage.output_tokens).toBe(25 * 3 + 20)
+    })
+  })
 })
--- a/tests/tool-result-compression.test.ts
+++ b/tests/tool-result-compression.test.ts
@ -0,0 +1,498 @@
+import { describe, it, expect } from 'vitest'
+import { z } from 'zod'
+import { AgentRunner } from '../src/agent/runner.js'
+import { ToolRegistry, defineTool } from '../src/tool/framework.js'
+import { ToolExecutor } from '../src/tool/executor.js'
+import type { LLMAdapter, LLMMessage, LLMResponse } from '../src/types.js'
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function textResponse(text: string): LLMResponse {
+  return {
+    id: `resp-${Math.random().toString(36).slice(2)}`,
+    content: [{ type: 'text', text }],
+    model: 'mock-model',
+    stop_reason: 'end_turn',
+    usage: { input_tokens: 10, output_tokens: 20 },
+  }
+}
+
+function toolUseResponse(toolName: string, input: Record<string, unknown>): LLMResponse {
+  return {
+    id: `resp-${Math.random().toString(36).slice(2)}`,
+    content: [{
+      type: 'tool_use',
+      id: `tu-${Math.random().toString(36).slice(2)}`,
+      name: toolName,
+      input,
+    }],
+    model: 'mock-model',
+    stop_reason: 'tool_use',
+    usage: { input_tokens: 15, output_tokens: 25 },
+  }
+}
+
+function buildRegistryAndExecutor(
+  toolOutput: string = 'x'.repeat(600),
+): { registry: ToolRegistry; executor: ToolExecutor } {
+  const registry = new ToolRegistry()
+  registry.register(
+    defineTool({
+      name: 'echo',
+      description: 'Echo input',
+      inputSchema: z.object({ message: z.string() }),
+      async execute() {
+        return { data: toolOutput }
+      },
+    }),
+  )
+  return { registry, executor: new ToolExecutor(registry) }
+}
+
+function buildErrorRegistryAndExecutor(): { registry: ToolRegistry; executor: ToolExecutor } {
+  const registry = new ToolRegistry()
+  registry.register(
+    defineTool({
+      name: 'fail',
+      description: 'Always fails',
+      inputSchema: z.object({ message: z.string() }),
+      async execute() {
+        return { data: 'E'.repeat(600), isError: true }
+      },
+    }),
+  )
+  return { registry, executor: new ToolExecutor(registry) }
+}
+
+/** Extract all tool_result content strings from messages sent to the LLM. */
+function extractToolResultContents(messages: LLMMessage[]): string[] {
+  return messages.flatMap(m =>
+    m.content
+      .filter((b): b is { type: 'tool_result'; tool_use_id: string; content: string; is_error?: boolean } =>
+        b.type === 'tool_result')
+      .map(b => b.content),
+  )
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe('AgentRunner compressToolResults', () => {
+  it('does NOT compress when compressToolResults is not set (default)', async () => {
+    const calls: LLMMessage[][] = []
+    const longOutput = 'x'.repeat(600)
+    const responses = [
+      toolUseResponse('echo', { message: 't1' }),
+      toolUseResponse('echo', { message: 't2' }),
+      textResponse('done'),
+    ]
+    let idx = 0
+    const adapter: LLMAdapter = {
+      name: 'mock',
+      async chat(messages) {
+        calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
+        return responses[idx++]!
+      },
+      async *stream() { /* unused */ },
+    }
+    const { registry, executor } = buildRegistryAndExecutor(longOutput)
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'mock-model',
+      allowedTools: ['echo'],
+      maxTurns: 5,
+      // compressToolResults not set
+    })
+
+    await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+    // Turn 3 should still see full tool results from turn 1
+    const turn3Messages = calls[2]!
+    const allToolResults = extractToolResultContents(turn3Messages)
+    expect(allToolResults.every(c => c === longOutput)).toBe(true)
+  })
+
+  it('compresses consumed tool results on turn 3+', async () => {
+    const calls: LLMMessage[][] = []
+    const longOutput = 'x'.repeat(600)
+    const responses = [
+      toolUseResponse('echo', { message: 't1' }),
+      toolUseResponse('echo', { message: 't2' }),
+      textResponse('done'),
+    ]
+    let idx = 0
+    const adapter: LLMAdapter = {
+      name: 'mock',
+      async chat(messages) {
+        calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
+        return responses[idx++]!
+      },
+      async *stream() { /* unused */ },
+    }
+    const { registry, executor } = buildRegistryAndExecutor(longOutput)
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'mock-model',
+      allowedTools: ['echo'],
+      maxTurns: 5,
+      compressToolResults: true,
+    })
+
+    await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+    // Turn 3: the LLM should see a compressed marker for turn 1 results
+    // but the full output for turn 2 results (most recent, not yet consumed).
+    const turn3Messages = calls[2]!
+    const allToolResults = extractToolResultContents(turn3Messages)
+    expect(allToolResults).toHaveLength(2)
+
+    // First result (turn 1) should be compressed
+    expect(allToolResults[0]).toContain('compressed')
+    expect(allToolResults[0]).toContain('600 chars')
+
+    // Second result (turn 2, most recent) should be preserved in full
+    expect(allToolResults[1]).toBe(longOutput)
+  })
+
+  it('preserves tool_use_id on compressed results', async () => {
+    const calls: LLMMessage[][] = []
+    const longOutput = 'x'.repeat(600)
+    const responses = [
+      toolUseResponse('echo', { message: 't1' }),
+      toolUseResponse('echo', { message: 't2' }),
+      textResponse('done'),
+    ]
+    let idx = 0
+    const adapter: LLMAdapter = {
+      name: 'mock',
+      async chat(messages) {
+        calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
+        return responses[idx++]!
+      },
+      async *stream() { /* unused */ },
+    }
+    const { registry, executor } = buildRegistryAndExecutor(longOutput)
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'mock-model',
+      allowedTools: ['echo'],
+      maxTurns: 5,
+      compressToolResults: true,
+    })
+
+    await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+    // Turn 3: verify compressed result still has tool_use_id
+    const turn3Messages = calls[2]!
+    const toolResultBlocks = turn3Messages.flatMap(m =>
+      m.content.filter(b => b.type === 'tool_result'),
+    )
+    for (const block of toolResultBlocks) {
+      expect(block).toHaveProperty('tool_use_id')
+      expect((block as { tool_use_id: string }).tool_use_id).toBeTruthy()
+    }
+  })
+
+  it('skips short tool results below minChars threshold', async () => {
+    const calls: LLMMessage[][] = []
+    const shortOutput = 'short' // 5 chars, well below 500 default
+    const responses = [
+      toolUseResponse('echo', { message: 't1' }),
+      toolUseResponse('echo', { message: 't2' }),
+      textResponse('done'),
+    ]
+    let idx = 0
+    const adapter: LLMAdapter = {
+      name: 'mock',
+      async chat(messages) {
+        calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
+        return responses[idx++]!
+      },
+      async *stream() { /* unused */ },
+    }
+    const { registry, executor } = buildRegistryAndExecutor(shortOutput)
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'mock-model',
+      allowedTools: ['echo'],
+      maxTurns: 5,
+      compressToolResults: true,
+    })
+
+    await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+    // Turn 3: short results should NOT be compressed
+    const turn3Messages = calls[2]!
+    const allToolResults = extractToolResultContents(turn3Messages)
+    expect(allToolResults.every(c => c === shortOutput)).toBe(true)
+  })
+
+  it('respects custom minChars threshold', async () => {
+    const calls: LLMMessage[][] = []
+    const output = 'x'.repeat(200)
+    const responses = [
+      toolUseResponse('echo', { message: 't1' }),
+      toolUseResponse('echo', { message: 't2' }),
+      textResponse('done'),
+    ]
+    let idx = 0
+    const adapter: LLMAdapter = {
+      name: 'mock',
+      async chat(messages) {
+        calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
+        return responses[idx++]!
+      },
+      async *stream() { /* unused */ },
+    }
+    const { registry, executor } = buildRegistryAndExecutor(output)
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'mock-model',
+      allowedTools: ['echo'],
+      maxTurns: 5,
+      compressToolResults: { minChars: 100 },
+    })
+
+    await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+    // With minChars=100, the 200-char output should be compressed
+    const turn3Messages = calls[2]!
+    const allToolResults = extractToolResultContents(turn3Messages)
+    expect(allToolResults[0]).toContain('compressed')
+    expect(allToolResults[0]).toContain('200 chars')
+  })
+
+  it('never compresses error tool results', async () => {
+    const calls: LLMMessage[][] = []
+    const responses = [
+      toolUseResponse('fail', { message: 't1' }),
+      toolUseResponse('fail', { message: 't2' }),
+      textResponse('done'),
+    ]
+    let idx = 0
+    const adapter: LLMAdapter = {
+      name: 'mock',
+      async chat(messages) {
+        calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
+        return responses[idx++]!
+      },
+      async *stream() { /* unused */ },
+    }
+    const { registry, executor } = buildErrorRegistryAndExecutor()
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'mock-model',
+      allowedTools: ['fail'],
+      maxTurns: 5,
+      compressToolResults: true,
+    })
+
+    await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+    // Error results should never be compressed even if long
+    const turn3Messages = calls[2]!
+    const allToolResults = extractToolResultContents(turn3Messages)
+    expect(allToolResults.every(c => c === 'E'.repeat(600))).toBe(true)
+  })
+
+  it('compresses selectively in multi-block tool_result messages (parallel tool calls)', async () => {
+    const calls: LLMMessage[][] = []
+    // Two tools: one returns long output, one returns short output
+    const registry = new ToolRegistry()
+    registry.register(
+      defineTool({
+        name: 'long_tool',
+        description: 'Returns long output',
+        inputSchema: z.object({ msg: z.string() }),
+        async execute() { return { data: 'L'.repeat(600) } },
+      }),
+    )
+    registry.register(
+      defineTool({
+        name: 'short_tool',
+        description: 'Returns short output',
+        inputSchema: z.object({ msg: z.string() }),
+        async execute() { return { data: 'S'.repeat(50) } },
+      }),
+    )
+    const executor = new ToolExecutor(registry)
+
+    // Turn 1: model calls both tools in parallel
+    const parallelResponse: LLMResponse = {
+      id: 'resp-parallel',
+      content: [
+        { type: 'tool_use', id: 'tu-long', name: 'long_tool', input: { msg: 'a' } },
+        { type: 'tool_use', id: 'tu-short', name: 'short_tool', input: { msg: 'b' } },
+      ],
+      model: 'mock-model',
+      stop_reason: 'tool_use',
+      usage: { input_tokens: 15, output_tokens: 25 },
+    }
+    const responses = [
+      parallelResponse,
+      toolUseResponse('long_tool', { msg: 't2' }),
+      textResponse('done'),
+    ]
+    let idx = 0
+    const adapter: LLMAdapter = {
+      name: 'mock',
+      async chat(messages) {
+        calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
+        return responses[idx++]!
+      },
+      async *stream() { /* unused */ },
+    }
+
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'mock-model',
+      allowedTools: ['long_tool', 'short_tool'],
+      maxTurns: 5,
+      compressToolResults: true,
+    })
+
+    await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+    // Turn 3: the parallel results from turn 1 should be selectively compressed.
+    // The long_tool result (600 chars) → compressed. The short_tool result (50 chars) → kept.
+    const turn3Messages = calls[2]!
+    const turn1ToolResults = turn3Messages.flatMap(m =>
+      m.content.filter((b): b is { type: 'tool_result'; tool_use_id: string; content: string } =>
+        b.type === 'tool_result'),
+    )
+    // Find the results from turn 1 (first user message with tool_results)
+    const firstToolResultMsg = turn3Messages.find(
+      m => m.role === 'user' && m.content.some(b => b.type === 'tool_result'),
+    )!
+    const blocks = firstToolResultMsg.content.filter(
+      (b): b is { type: 'tool_result'; tool_use_id: string; content: string } =>
+        b.type === 'tool_result',
+    )
+
+    // One should be compressed (long), one should be intact (short)
+    const compressedBlocks = blocks.filter(b => b.content.includes('compressed'))
+    const intactBlocks = blocks.filter(b => !b.content.includes('compressed'))
+    expect(compressedBlocks).toHaveLength(1)
+    expect(compressedBlocks[0]!.content).toContain('600 chars')
+    expect(intactBlocks).toHaveLength(1)
+    expect(intactBlocks[0]!.content).toBe('S'.repeat(50))
+  })
+
+  it('compounds compression across 4+ turns', async () => {
+    const calls: LLMMessage[][] = []
+    const longOutput = 'x'.repeat(600)
+    const responses = [
+      toolUseResponse('echo', { message: 't1' }),
+      toolUseResponse('echo', { message: 't2' }),
+      toolUseResponse('echo', { message: 't3' }),
+      textResponse('done'),
+    ]
+    let idx = 0
+    const adapter: LLMAdapter = {
+      name: 'mock',
+      async chat(messages) {
+        calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
+        return responses[idx++]!
+      },
+      async *stream() { /* unused */ },
+    }
+    const { registry, executor } = buildRegistryAndExecutor(longOutput)
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'mock-model',
+      allowedTools: ['echo'],
+      maxTurns: 6,
+      compressToolResults: true,
+    })
+
+    await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+    // Turn 4: turns 1 and 2 should both be compressed, turn 3 should be intact
+    const turn4Messages = calls[3]!
+    const allToolResults = extractToolResultContents(turn4Messages)
+    expect(allToolResults).toHaveLength(3)
+
+    // First two are compressed (turns 1 & 2)
+    expect(allToolResults[0]).toContain('compressed')
+    expect(allToolResults[1]).toContain('compressed')
+
+    // Last one (turn 3, most recent) preserved
+    expect(allToolResults[2]).toBe(longOutput)
+  })
+
+  it('does not re-compress already compressed markers with low minChars', async () => {
+    const calls: LLMMessage[][] = []
+    const longOutput = 'x'.repeat(600)
+    const responses = [
+      toolUseResponse('echo', { message: 't1' }),
+      toolUseResponse('echo', { message: 't2' }),
+      toolUseResponse('echo', { message: 't3' }),
+      textResponse('done'),
+    ]
+    let idx = 0
+    const adapter: LLMAdapter = {
+      name: 'mock',
+      async chat(messages) {
+        calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
+        return responses[idx++]!
+      },
+      async *stream() { /* unused */ },
+    }
+    const { registry, executor } = buildRegistryAndExecutor(longOutput)
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'mock-model',
+      allowedTools: ['echo'],
+      maxTurns: 6,
+      compressToolResults: { minChars: 10 }, // very low threshold
+    })
+
+    await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+
+    // Turn 4: turn 1 was compressed in turn 3. With minChars=10 the marker
+    // itself (55 chars) exceeds the threshold. Without the guard it would be
+    // re-compressed with a wrong char count (55 instead of 600).
+    const turn4Messages = calls[3]!
+    const allToolResults = extractToolResultContents(turn4Messages)
+
+    // Turn 1 result: should still show original 600 chars, not re-compressed
+    expect(allToolResults[0]).toContain('600 chars')
+    // Turn 2 result: compressed for the first time this turn
+    expect(allToolResults[1]).toContain('600 chars')
+    // Turn 3 result: most recent, preserved in full
+    expect(allToolResults[2]).toBe(longOutput)
+  })
+
+  it('works together with contextStrategy', async () => {
+    const calls: LLMMessage[][] = []
+    const longOutput = 'x'.repeat(600)
+    const responses = [
+      toolUseResponse('echo', { message: 't1' }),
+      toolUseResponse('echo', { message: 't2' }),
+      textResponse('done'),
+    ]
+    let idx = 0
+    const adapter: LLMAdapter = {
+      name: 'mock',
+      async chat(messages) {
+        calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
+        return responses[idx++]!
+      },
+      async *stream() { /* unused */ },
+    }
+    const { registry, executor } = buildRegistryAndExecutor(longOutput)
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'mock-model',
+      allowedTools: ['echo'],
+      maxTurns: 5,
+      compressToolResults: true,
+      contextStrategy: { type: 'sliding-window', maxTurns: 10 },
+    })
+
+    const result = await runner.run([
+      { role: 'user', content: [{ type: 'text', text: 'start' }] },
+    ])
+
+    // Should complete without error; both features coexist
+    expect(result.output).toBe('done')
+
+    // Turn 3 should have compressed turn 1 results
+    const turn3Messages = calls[2]!
+    const allToolResults = extractToolResultContents(turn3Messages)
+    expect(allToolResults[0]).toContain('compressed')
+  })
+})