fix: resolve context compaction persistence and turn dropping (#161)
Replace `slice(initialMessages.length)` with an explicit `newMessages` accumulator so summarize/custom/sliding-window strategies that shrink conversation history no longer drop newly generated turns. Drops the `turns > 1` gate so oversized initial prompts can trigger compaction before the first LLM call. Fixes #152.
This commit is contained in:
parent
6cec006359
commit
11a1fb0ced
|
|
@ -206,7 +206,7 @@ export class Agent {
|
||||||
|
|
||||||
const result = await this.executeRun([...this.messageHistory])
|
const result = await this.executeRun([...this.messageHistory])
|
||||||
|
|
||||||
// Persist the new messages into history so the next `prompt` sees them.
|
// Persist the new messages into history so the next prompt sees them.
|
||||||
for (const msg of result.messages) {
|
for (const msg of result.messages) {
|
||||||
this.messageHistory.push(msg)
|
this.messageHistory.push(msg)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -374,10 +374,12 @@ export class AgentRunner {
|
||||||
: '[Conversation summary unavailable]'
|
: '[Conversation summary unavailable]'
|
||||||
|
|
||||||
this.summarizeCache = { oldSignature, summaryPrefix }
|
this.summarizeCache = { oldSignature, summaryPrefix }
|
||||||
|
|
||||||
const mergedRecent = prependSyntheticPrefixToFirstUser(
|
const mergedRecent = prependSyntheticPrefixToFirstUser(
|
||||||
recentPortion,
|
recentPortion,
|
||||||
`${summaryPrefix}\n\n`,
|
`${summaryPrefix}\n\n`,
|
||||||
)
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
messages: [firstUser, ...mergedRecent],
|
messages: [firstUser, ...mergedRecent],
|
||||||
usage: summaryResponse.usage,
|
usage: summaryResponse.usage,
|
||||||
|
|
@ -538,6 +540,7 @@ export class AgentRunner {
|
||||||
): AsyncGenerator<StreamEvent> {
|
): AsyncGenerator<StreamEvent> {
|
||||||
// Working copy of the conversation — mutated as turns progress.
|
// Working copy of the conversation — mutated as turns progress.
|
||||||
let conversationMessages: LLMMessage[] = [...initialMessages]
|
let conversationMessages: LLMMessage[] = [...initialMessages]
|
||||||
|
const newMessages: LLMMessage[] = []
|
||||||
|
|
||||||
// Accumulated state across all turns.
|
// Accumulated state across all turns.
|
||||||
let totalUsage: TokenUsage = ZERO_USAGE
|
let totalUsage: TokenUsage = ZERO_USAGE
|
||||||
|
|
@ -593,8 +596,8 @@ export class AgentRunner {
|
||||||
conversationMessages = this.compressConsumedToolResults(conversationMessages)
|
conversationMessages = this.compressConsumedToolResults(conversationMessages)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Optionally compact context before each LLM call after the first turn.
|
// Optionally compact context before each LLM call.
|
||||||
if (this.options.contextStrategy && turns > 1) {
|
if (this.options.contextStrategy) {
|
||||||
const compacted = await this.applyContextStrategy(
|
const compacted = await this.applyContextStrategy(
|
||||||
conversationMessages,
|
conversationMessages,
|
||||||
this.options.contextStrategy,
|
this.options.contextStrategy,
|
||||||
|
|
@ -639,6 +642,7 @@ export class AgentRunner {
|
||||||
}
|
}
|
||||||
|
|
||||||
conversationMessages.push(assistantMessage)
|
conversationMessages.push(assistantMessage)
|
||||||
|
newMessages.push(assistantMessage)
|
||||||
options.onMessage?.(assistantMessage)
|
options.onMessage?.(assistantMessage)
|
||||||
|
|
||||||
// Yield text deltas so streaming callers can display them promptly.
|
// Yield text deltas so streaming callers can display them promptly.
|
||||||
|
|
@ -851,6 +855,7 @@ export class AgentRunner {
|
||||||
}
|
}
|
||||||
|
|
||||||
conversationMessages.push(toolResultMessage)
|
conversationMessages.push(toolResultMessage)
|
||||||
|
newMessages.push(toolResultMessage)
|
||||||
options.onMessage?.(toolResultMessage)
|
options.onMessage?.(toolResultMessage)
|
||||||
|
|
||||||
// Budget check is deferred until tool_result events have been yielded
|
// Budget check is deferred until tool_result events have been yielded
|
||||||
|
|
@ -894,7 +899,7 @@ export class AgentRunner {
|
||||||
|
|
||||||
const runResult: RunResult = {
|
const runResult: RunResult = {
|
||||||
// Return only the messages added during this run (not the initial seed).
|
// Return only the messages added during this run (not the initial seed).
|
||||||
messages: conversationMessages.slice(initialMessages.length),
|
messages: newMessages,
|
||||||
output: finalOutput,
|
output: finalOutput,
|
||||||
toolCalls: allToolCalls,
|
toolCalls: allToolCalls,
|
||||||
tokenUsage: totalUsage,
|
tokenUsage: totalUsage,
|
||||||
|
|
|
||||||
|
|
@ -165,8 +165,10 @@ describe('AgentRunner contextStrategy', () => {
|
||||||
expect(rolesAfterFirstUser).not.toMatch(/^user,user/)
|
expect(rolesAfterFirstUser).not.toMatch(/^user,user/)
|
||||||
})
|
})
|
||||||
|
|
||||||
it('custom strategy calls compress callback and uses returned messages', async () => {
|
it('does not drop turns when context strategy shrinks array size', async () => {
|
||||||
const compress = vi.fn((messages: LLMMessage[]) => messages.slice(-1))
|
// The core bug from #152: if the strategy replaces the array with fewer messages than it started with,
|
||||||
|
// the old `slice()` logic would incorrectly drop newly generated turns.
|
||||||
|
const compress = vi.fn((messages: LLMMessage[]) => messages.slice(-1)) // Shrink to 1 message
|
||||||
const calls: LLMMessage[][] = []
|
const calls: LLMMessage[][] = []
|
||||||
const responses = [
|
const responses = [
|
||||||
toolUseResponse('echo', { message: 'hello' }),
|
toolUseResponse('echo', { message: 'hello' }),
|
||||||
|
|
@ -194,10 +196,26 @@ describe('AgentRunner contextStrategy', () => {
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'custom prompt' }] }])
|
// Seed with 3 messages
|
||||||
|
const initialMessages: LLMMessage[] = [
|
||||||
|
{ role: 'user', content: [{ type: 'text', text: 'm1' }] },
|
||||||
|
{ role: 'assistant', content: [{ type: 'text', text: 'm2' }] },
|
||||||
|
{ role: 'user', content: [{ type: 'text', text: 'm3' }] },
|
||||||
|
]
|
||||||
|
|
||||||
expect(compress).toHaveBeenCalledOnce()
|
const result = await runner.run(initialMessages)
|
||||||
expect(calls[1]).toHaveLength(1)
|
|
||||||
|
// 2 new messages were generated (the tool use, and the tool result).
|
||||||
|
// The `done` response is returned but not pushed as a new message to the list in `run()`.
|
||||||
|
// Wait, the `done` text response *is* pushed.
|
||||||
|
// Let's verify the exact length of new messages.
|
||||||
|
// The stream loop pushes the assistant message (tool use), then the user message (tool result),
|
||||||
|
// then loops back and pushes the final assistant message (text).
|
||||||
|
// So 3 new messages are added during this run.
|
||||||
|
expect(result.messages).toHaveLength(3)
|
||||||
|
expect(result.messages[0]!.role).toBe('assistant')
|
||||||
|
expect(result.messages[1]!.role).toBe('user') // The tool_result
|
||||||
|
expect(result.messages[2]!.role).toBe('assistant')
|
||||||
})
|
})
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue