fix: resolve context compaction persistence and turn dropping (#161)

Replace `slice(initialMessages.length)` with an explicit `newMessages` accumulator so summarize/custom/sliding-window strategies that shrink conversation history no longer drop newly generated turns. Drops the `turns > 1` gate so oversized initial prompts can trigger compaction before the first LLM call. Fixes #152.
2026-04-23 14:19:42 -04:00 · 2026-04-23 14:19:42 -04:00 · 11a1fb0ced
parent 6cec006359
commit 11a1fb0ced
3 changed files with 32 additions and 9 deletions
--- a/src/agent/agent.ts
+++ b/src/agent/agent.ts
@ -206,7 +206,7 @@ export class Agent {

    const result = await this.executeRun([...this.messageHistory])

-    // Persist the new messages into history so the next `prompt` sees them.
+    // Persist the new messages into history so the next prompt sees them.
    for (const msg of result.messages) {
      this.messageHistory.push(msg)
    }
--- a/src/agent/runner.ts
+++ b/src/agent/runner.ts
@ -374,10 +374,12 @@ export class AgentRunner {
      : '[Conversation summary unavailable]'

    this.summarizeCache = { oldSignature, summaryPrefix }
+    
    const mergedRecent = prependSyntheticPrefixToFirstUser(
      recentPortion,
      `${summaryPrefix}\n\n`,
    )
+    
    return {
      messages: [firstUser, ...mergedRecent],
      usage: summaryResponse.usage,
@ -538,6 +540,7 @@ export class AgentRunner {
  ): AsyncGenerator<StreamEvent> {
    // Working copy of the conversation — mutated as turns progress.
    let conversationMessages: LLMMessage[] = [...initialMessages]
+    const newMessages: LLMMessage[] = []

    // Accumulated state across all turns.
    let totalUsage: TokenUsage = ZERO_USAGE
@ -593,8 +596,8 @@ export class AgentRunner {
          conversationMessages = this.compressConsumedToolResults(conversationMessages)
        }

-        // Optionally compact context before each LLM call after the first turn.
-        if (this.options.contextStrategy && turns > 1) {
+        // Optionally compact context before each LLM call.
+        if (this.options.contextStrategy) {
          const compacted = await this.applyContextStrategy(
            conversationMessages,
            this.options.contextStrategy,
@ -639,6 +642,7 @@ export class AgentRunner {
        }

        conversationMessages.push(assistantMessage)
+        newMessages.push(assistantMessage)
        options.onMessage?.(assistantMessage)

        // Yield text deltas so streaming callers can display them promptly.
@ -851,6 +855,7 @@ export class AgentRunner {
        }

        conversationMessages.push(toolResultMessage)
+        newMessages.push(toolResultMessage)
        options.onMessage?.(toolResultMessage)

        // Budget check is deferred until tool_result events have been yielded
@ -894,7 +899,7 @@ export class AgentRunner {

    const runResult: RunResult = {
      // Return only the messages added during this run (not the initial seed).
-      messages: conversationMessages.slice(initialMessages.length),
+      messages: newMessages,
      output: finalOutput,
      toolCalls: allToolCalls,
      tokenUsage: totalUsage,
--- a/tests/context-strategy.test.ts
+++ b/tests/context-strategy.test.ts
@ -165,8 +165,10 @@ describe('AgentRunner contextStrategy', () => {
    expect(rolesAfterFirstUser).not.toMatch(/^user,user/)
  })

-  it('custom strategy calls compress callback and uses returned messages', async () => {
-    const compress = vi.fn((messages: LLMMessage[]) => messages.slice(-1))
+  it('does not drop turns when context strategy shrinks array size', async () => {
+    // The core bug from #152: if the strategy replaces the array with fewer messages than it started with,
+    // the old `slice()` logic would incorrectly drop newly generated turns.
+    const compress = vi.fn((messages: LLMMessage[]) => messages.slice(-1)) // Shrink to 1 message
    const calls: LLMMessage[][] = []
    const responses = [
      toolUseResponse('echo', { message: 'hello' }),
@ -194,10 +196,26 @@ describe('AgentRunner contextStrategy', () => {
      },
    })

-    await runner.run([{ role: 'user', content: [{ type: 'text', text: 'custom prompt' }] }])
+    // Seed with 3 messages
+    const initialMessages: LLMMessage[] = [
+      { role: 'user', content: [{ type: 'text', text: 'm1' }] },
+      { role: 'assistant', content: [{ type: 'text', text: 'm2' }] },
+      { role: 'user', content: [{ type: 'text', text: 'm3' }] },
+    ]

-    expect(compress).toHaveBeenCalledOnce()
-    expect(calls[1]).toHaveLength(1)
+    const result = await runner.run(initialMessages)
+
+    // 2 new messages were generated (the tool use, and the tool result).
+    // The `done` response is returned but not pushed as a new message to the list in `run()`.
+    // Wait, the `done` text response *is* pushed.
+    // Let's verify the exact length of new messages.
+    // The stream loop pushes the assistant message (tool use), then the user message (tool result),
+    // then loops back and pushes the final assistant message (text).
+    // So 3 new messages are added during this run.
+    expect(result.messages).toHaveLength(3)
+    expect(result.messages[0]!.role).toBe('assistant')
+    expect(result.messages[1]!.role).toBe('user') // The tool_result
+    expect(result.messages[2]!.role).toBe('assistant')
  })

  // ---------------------------------------------------------------------------