From 11a1fb0cedce41246244059b1ed2a6e9ef4ed066 Mon Sep 17 00:00:00 2001
From: Mark Galyan <55340148+apollo-mg@users.noreply.github.com>
Date: Thu, 23 Apr 2026 14:19:42 -0400
Subject: [PATCH] fix: resolve context compaction persistence and turn dropping
 (#161)

Replace `slice(initialMessages.length)` with an explicit `newMessages` accumulator so summarize/custom/sliding-window strategies that shrink conversation history no longer drop newly generated turns. Drops the `turns > 1` gate so oversized initial prompts can trigger compaction before the first LLM call.

Fixes #152.
---
 src/agent/agent.ts             |  2 +-
 src/agent/runner.ts            | 11 ++++++++---
 tests/context-strategy.test.ts | 28 +++++++++++++++++++++++-----
 3 files changed, 32 insertions(+), 9 deletions(-)
diff --git a/src/agent/agent.ts b/src/agent/agent.ts
index d7c808e..f55e9c1 100644
--- a/src/agent/agent.ts
+++ b/src/agent/agent.ts
@@ -206,7 +206,7 @@ export class Agent {
 
     const result = await this.executeRun([...this.messageHistory])
 
-    // Persist the new messages into history so the next `prompt` sees them.
+    // Persist the new messages into history so the next prompt sees them.
     for (const msg of result.messages) {
       this.messageHistory.push(msg)
     }
diff --git a/src/agent/runner.ts b/src/agent/runner.ts
index 6264c13..714af3c 100644
--- a/src/agent/runner.ts
+++ b/src/agent/runner.ts
@@ -374,10 +374,12 @@ export class AgentRunner {
       : '[Conversation summary unavailable]'
 
     this.summarizeCache = { oldSignature, summaryPrefix }
+    
     const mergedRecent = prependSyntheticPrefixToFirstUser(
       recentPortion,
       `${summaryPrefix}\n\n`,
     )
+    
     return {
       messages: [firstUser, ...mergedRecent],
       usage: summaryResponse.usage,
@@ -538,6 +540,7 @@ export class AgentRunner {
   ): AsyncGenerator<StreamEvent> {
     // Working copy of the conversation — mutated as turns progress.
     let conversationMessages: LLMMessage[] = [...initialMessages]
+    const newMessages: LLMMessage[] = []
 
     // Accumulated state across all turns.
     let totalUsage: TokenUsage = ZERO_USAGE
@@ -593,8 +596,8 @@ export class AgentRunner {
           conversationMessages = this.compressConsumedToolResults(conversationMessages)
         }
 
-        // Optionally compact context before each LLM call after the first turn.
-        if (this.options.contextStrategy && turns > 1) {
+        // Optionally compact context before each LLM call.
+        if (this.options.contextStrategy) {
           const compacted = await this.applyContextStrategy(
             conversationMessages,
             this.options.contextStrategy,
@@ -639,6 +642,7 @@ export class AgentRunner {
         }
 
         conversationMessages.push(assistantMessage)
+        newMessages.push(assistantMessage)
         options.onMessage?.(assistantMessage)
 
         // Yield text deltas so streaming callers can display them promptly.
@@ -851,6 +855,7 @@ export class AgentRunner {
         }
 
         conversationMessages.push(toolResultMessage)
+        newMessages.push(toolResultMessage)
         options.onMessage?.(toolResultMessage)
 
         // Budget check is deferred until tool_result events have been yielded
@@ -894,7 +899,7 @@ export class AgentRunner {
 
     const runResult: RunResult = {
       // Return only the messages added during this run (not the initial seed).
-      messages: conversationMessages.slice(initialMessages.length),
+      messages: newMessages,
       output: finalOutput,
       toolCalls: allToolCalls,
       tokenUsage: totalUsage,
diff --git a/tests/context-strategy.test.ts b/tests/context-strategy.test.ts
index 711d134..b49de3a 100644
--- a/tests/context-strategy.test.ts
+++ b/tests/context-strategy.test.ts
@@ -165,8 +165,10 @@ describe('AgentRunner contextStrategy', () => {
     expect(rolesAfterFirstUser).not.toMatch(/^user,user/)
   })
 
-  it('custom strategy calls compress callback and uses returned messages', async () => {
-    const compress = vi.fn((messages: LLMMessage[]) => messages.slice(-1))
+  it('does not drop turns when context strategy shrinks array size', async () => {
+    // The core bug from #152: if the strategy replaces the array with fewer messages than it started with,
+    // the old `slice()` logic would incorrectly drop newly generated turns.
+    const compress = vi.fn((messages: LLMMessage[]) => messages.slice(-1)) // Shrink to 1 message
     const calls: LLMMessage[][] = []
     const responses = [
       toolUseResponse('echo', { message: 'hello' }),
@@ -194,10 +196,26 @@ describe('AgentRunner contextStrategy', () => {
       },
     })
 
-    await runner.run([{ role: 'user', content: [{ type: 'text', text: 'custom prompt' }] }])
+    // Seed with 3 messages
+    const initialMessages: LLMMessage[] = [
+      { role: 'user', content: [{ type: 'text', text: 'm1' }] },
+      { role: 'assistant', content: [{ type: 'text', text: 'm2' }] },
+      { role: 'user', content: [{ type: 'text', text: 'm3' }] },
+    ]
 
-    expect(compress).toHaveBeenCalledOnce()
-    expect(calls[1]).toHaveLength(1)
+    const result = await runner.run(initialMessages)
+
+    // 2 new messages were generated (the tool use, and the tool result).
+    // The `done` response is returned but not pushed as a new message to the list in `run()`.
+    // Wait, the `done` text response *is* pushed.
+    // Let's verify the exact length of new messages.
+    // The stream loop pushes the assistant message (tool use), then the user message (tool result),
+    // then loops back and pushes the final assistant message (text).
+    // So 3 new messages are added during this run.
+    expect(result.messages).toHaveLength(3)
+    expect(result.messages[0]!.role).toBe('assistant')
+    expect(result.messages[1]!.role).toBe('user') // The tool_result
+    expect(result.messages[2]!.role).toBe('assistant')
   })
 
   // ---------------------------------------------------------------------------