From 11a1fb0cedce41246244059b1ed2a6e9ef4ed066 Mon Sep 17 00:00:00 2001 From: Mark Galyan <55340148+apollo-mg@users.noreply.github.com> Date: Thu, 23 Apr 2026 14:19:42 -0400 Subject: [PATCH] fix: resolve context compaction persistence and turn dropping (#161) Replace `slice(initialMessages.length)` with an explicit `newMessages` accumulator so summarize/custom/sliding-window strategies that shrink conversation history no longer drop newly generated turns. Drops the `turns > 1` gate so oversized initial prompts can trigger compaction before the first LLM call. Fixes #152. --- src/agent/agent.ts | 2 +- src/agent/runner.ts | 11 ++++++++--- tests/context-strategy.test.ts | 28 +++++++++++++++++++++++----- 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/src/agent/agent.ts b/src/agent/agent.ts index d7c808e..f55e9c1 100644 --- a/src/agent/agent.ts +++ b/src/agent/agent.ts @@ -206,7 +206,7 @@ export class Agent { const result = await this.executeRun([...this.messageHistory]) - // Persist the new messages into history so the next `prompt` sees them. + // Persist the new messages into history so the next prompt sees them. for (const msg of result.messages) { this.messageHistory.push(msg) } diff --git a/src/agent/runner.ts b/src/agent/runner.ts index 6264c13..714af3c 100644 --- a/src/agent/runner.ts +++ b/src/agent/runner.ts @@ -374,10 +374,12 @@ export class AgentRunner { : '[Conversation summary unavailable]' this.summarizeCache = { oldSignature, summaryPrefix } + const mergedRecent = prependSyntheticPrefixToFirstUser( recentPortion, `${summaryPrefix}\n\n`, ) + return { messages: [firstUser, ...mergedRecent], usage: summaryResponse.usage, @@ -538,6 +540,7 @@ export class AgentRunner { ): AsyncGenerator { // Working copy of the conversation — mutated as turns progress. let conversationMessages: LLMMessage[] = [...initialMessages] + const newMessages: LLMMessage[] = [] // Accumulated state across all turns. let totalUsage: TokenUsage = ZERO_USAGE @@ -593,8 +596,8 @@ export class AgentRunner { conversationMessages = this.compressConsumedToolResults(conversationMessages) } - // Optionally compact context before each LLM call after the first turn. - if (this.options.contextStrategy && turns > 1) { + // Optionally compact context before each LLM call. + if (this.options.contextStrategy) { const compacted = await this.applyContextStrategy( conversationMessages, this.options.contextStrategy, @@ -639,6 +642,7 @@ export class AgentRunner { } conversationMessages.push(assistantMessage) + newMessages.push(assistantMessage) options.onMessage?.(assistantMessage) // Yield text deltas so streaming callers can display them promptly. @@ -851,6 +855,7 @@ export class AgentRunner { } conversationMessages.push(toolResultMessage) + newMessages.push(toolResultMessage) options.onMessage?.(toolResultMessage) // Budget check is deferred until tool_result events have been yielded @@ -894,7 +899,7 @@ export class AgentRunner { const runResult: RunResult = { // Return only the messages added during this run (not the initial seed). - messages: conversationMessages.slice(initialMessages.length), + messages: newMessages, output: finalOutput, toolCalls: allToolCalls, tokenUsage: totalUsage, diff --git a/tests/context-strategy.test.ts b/tests/context-strategy.test.ts index 711d134..b49de3a 100644 --- a/tests/context-strategy.test.ts +++ b/tests/context-strategy.test.ts @@ -165,8 +165,10 @@ describe('AgentRunner contextStrategy', () => { expect(rolesAfterFirstUser).not.toMatch(/^user,user/) }) - it('custom strategy calls compress callback and uses returned messages', async () => { - const compress = vi.fn((messages: LLMMessage[]) => messages.slice(-1)) + it('does not drop turns when context strategy shrinks array size', async () => { + // The core bug from #152: if the strategy replaces the array with fewer messages than it started with, + // the old `slice()` logic would incorrectly drop newly generated turns. + const compress = vi.fn((messages: LLMMessage[]) => messages.slice(-1)) // Shrink to 1 message const calls: LLMMessage[][] = [] const responses = [ toolUseResponse('echo', { message: 'hello' }), @@ -194,10 +196,26 @@ describe('AgentRunner contextStrategy', () => { }, }) - await runner.run([{ role: 'user', content: [{ type: 'text', text: 'custom prompt' }] }]) + // Seed with 3 messages + const initialMessages: LLMMessage[] = [ + { role: 'user', content: [{ type: 'text', text: 'm1' }] }, + { role: 'assistant', content: [{ type: 'text', text: 'm2' }] }, + { role: 'user', content: [{ type: 'text', text: 'm3' }] }, + ] - expect(compress).toHaveBeenCalledOnce() - expect(calls[1]).toHaveLength(1) + const result = await runner.run(initialMessages) + + // 2 new messages were generated (the tool use, and the tool result). + // The `done` response is returned but not pushed as a new message to the list in `run()`. + // Wait, the `done` text response *is* pushed. + // Let's verify the exact length of new messages. + // The stream loop pushes the assistant message (tool use), then the user message (tool result), + // then loops back and pushes the final assistant message (text). + // So 3 new messages are added during this run. + expect(result.messages).toHaveLength(3) + expect(result.messages[0]!.role).toBe('assistant') + expect(result.messages[1]!.role).toBe('user') // The tool_result + expect(result.messages[2]!.role).toBe('assistant') }) // ---------------------------------------------------------------------------