fix: resolve context compaction persistence and turn dropping (#161)

Replace `slice(initialMessages.length)` with an explicit `newMessages` accumulator so summarize/custom/sliding-window strategies that shrink conversation history no longer drop newly generated turns. Drops the `turns > 1` gate so oversized initial prompts can trigger compaction before the first LLM call.

Fixes #152.
This commit is contained in:
Mark Galyan 2026-04-23 14:19:42 -04:00 committed by GitHub
parent 6cec006359
commit 11a1fb0ced
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 32 additions and 9 deletions

View File

@ -206,7 +206,7 @@ export class Agent {
const result = await this.executeRun([...this.messageHistory])
// Persist the new messages into history so the next `prompt` sees them.
// Persist the new messages into history so the next prompt sees them.
for (const msg of result.messages) {
this.messageHistory.push(msg)
}

View File

@ -374,10 +374,12 @@ export class AgentRunner {
: '[Conversation summary unavailable]'
this.summarizeCache = { oldSignature, summaryPrefix }
const mergedRecent = prependSyntheticPrefixToFirstUser(
recentPortion,
`${summaryPrefix}\n\n`,
)
return {
messages: [firstUser, ...mergedRecent],
usage: summaryResponse.usage,
@ -538,6 +540,7 @@ export class AgentRunner {
): AsyncGenerator<StreamEvent> {
// Working copy of the conversation — mutated as turns progress.
let conversationMessages: LLMMessage[] = [...initialMessages]
const newMessages: LLMMessage[] = []
// Accumulated state across all turns.
let totalUsage: TokenUsage = ZERO_USAGE
@ -593,8 +596,8 @@ export class AgentRunner {
conversationMessages = this.compressConsumedToolResults(conversationMessages)
}
// Optionally compact context before each LLM call after the first turn.
if (this.options.contextStrategy && turns > 1) {
// Optionally compact context before each LLM call.
if (this.options.contextStrategy) {
const compacted = await this.applyContextStrategy(
conversationMessages,
this.options.contextStrategy,
@ -639,6 +642,7 @@ export class AgentRunner {
}
conversationMessages.push(assistantMessage)
newMessages.push(assistantMessage)
options.onMessage?.(assistantMessage)
// Yield text deltas so streaming callers can display them promptly.
@ -851,6 +855,7 @@ export class AgentRunner {
}
conversationMessages.push(toolResultMessage)
newMessages.push(toolResultMessage)
options.onMessage?.(toolResultMessage)
// Budget check is deferred until tool_result events have been yielded
@ -894,7 +899,7 @@ export class AgentRunner {
const runResult: RunResult = {
// Return only the messages added during this run (not the initial seed).
messages: conversationMessages.slice(initialMessages.length),
messages: newMessages,
output: finalOutput,
toolCalls: allToolCalls,
tokenUsage: totalUsage,

View File

@ -165,8 +165,10 @@ describe('AgentRunner contextStrategy', () => {
expect(rolesAfterFirstUser).not.toMatch(/^user,user/)
})
it('custom strategy calls compress callback and uses returned messages', async () => {
const compress = vi.fn((messages: LLMMessage[]) => messages.slice(-1))
it('does not drop turns when context strategy shrinks array size', async () => {
// The core bug from #152: if the strategy replaces the array with fewer messages than it started with,
// the old `slice()` logic would incorrectly drop newly generated turns.
const compress = vi.fn((messages: LLMMessage[]) => messages.slice(-1)) // Shrink to 1 message
const calls: LLMMessage[][] = []
const responses = [
toolUseResponse('echo', { message: 'hello' }),
@ -194,10 +196,26 @@ describe('AgentRunner contextStrategy', () => {
},
})
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'custom prompt' }] }])
// Seed with 3 messages
const initialMessages: LLMMessage[] = [
{ role: 'user', content: [{ type: 'text', text: 'm1' }] },
{ role: 'assistant', content: [{ type: 'text', text: 'm2' }] },
{ role: 'user', content: [{ type: 'text', text: 'm3' }] },
]
expect(compress).toHaveBeenCalledOnce()
expect(calls[1]).toHaveLength(1)
const result = await runner.run(initialMessages)
// 2 new messages were generated (the tool use, and the tool result).
// The `done` response is returned but not pushed as a new message to the list in `run()`.
// Wait, the `done` text response *is* pushed.
// Let's verify the exact length of new messages.
// The stream loop pushes the assistant message (tool use), then the user message (tool result),
// then loops back and pushes the final assistant message (text).
// So 3 new messages are added during this run.
expect(result.messages).toHaveLength(3)
expect(result.messages[0]!.role).toBe('assistant')
expect(result.messages[1]!.role).toBe('user') // The tool_result
expect(result.messages[2]!.role).toBe('assistant')
})
// ---------------------------------------------------------------------------