fix: resolve context compaction persistence and turn dropping (#161)
Replace `slice(initialMessages.length)` with an explicit `newMessages` accumulator so summarize/custom/sliding-window strategies that shrink conversation history no longer drop newly generated turns. Drops the `turns > 1` gate so oversized initial prompts can trigger compaction before the first LLM call. Fixes #152.
This commit is contained in:
parent
6cec006359
commit
11a1fb0ced
|
|
@ -206,7 +206,7 @@ export class Agent {
|
|||
|
||||
const result = await this.executeRun([...this.messageHistory])
|
||||
|
||||
// Persist the new messages into history so the next `prompt` sees them.
|
||||
// Persist the new messages into history so the next prompt sees them.
|
||||
for (const msg of result.messages) {
|
||||
this.messageHistory.push(msg)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -374,10 +374,12 @@ export class AgentRunner {
|
|||
: '[Conversation summary unavailable]'
|
||||
|
||||
this.summarizeCache = { oldSignature, summaryPrefix }
|
||||
|
||||
const mergedRecent = prependSyntheticPrefixToFirstUser(
|
||||
recentPortion,
|
||||
`${summaryPrefix}\n\n`,
|
||||
)
|
||||
|
||||
return {
|
||||
messages: [firstUser, ...mergedRecent],
|
||||
usage: summaryResponse.usage,
|
||||
|
|
@ -538,6 +540,7 @@ export class AgentRunner {
|
|||
): AsyncGenerator<StreamEvent> {
|
||||
// Working copy of the conversation — mutated as turns progress.
|
||||
let conversationMessages: LLMMessage[] = [...initialMessages]
|
||||
const newMessages: LLMMessage[] = []
|
||||
|
||||
// Accumulated state across all turns.
|
||||
let totalUsage: TokenUsage = ZERO_USAGE
|
||||
|
|
@ -593,8 +596,8 @@ export class AgentRunner {
|
|||
conversationMessages = this.compressConsumedToolResults(conversationMessages)
|
||||
}
|
||||
|
||||
// Optionally compact context before each LLM call after the first turn.
|
||||
if (this.options.contextStrategy && turns > 1) {
|
||||
// Optionally compact context before each LLM call.
|
||||
if (this.options.contextStrategy) {
|
||||
const compacted = await this.applyContextStrategy(
|
||||
conversationMessages,
|
||||
this.options.contextStrategy,
|
||||
|
|
@ -639,6 +642,7 @@ export class AgentRunner {
|
|||
}
|
||||
|
||||
conversationMessages.push(assistantMessage)
|
||||
newMessages.push(assistantMessage)
|
||||
options.onMessage?.(assistantMessage)
|
||||
|
||||
// Yield text deltas so streaming callers can display them promptly.
|
||||
|
|
@ -851,6 +855,7 @@ export class AgentRunner {
|
|||
}
|
||||
|
||||
conversationMessages.push(toolResultMessage)
|
||||
newMessages.push(toolResultMessage)
|
||||
options.onMessage?.(toolResultMessage)
|
||||
|
||||
// Budget check is deferred until tool_result events have been yielded
|
||||
|
|
@ -894,7 +899,7 @@ export class AgentRunner {
|
|||
|
||||
const runResult: RunResult = {
|
||||
// Return only the messages added during this run (not the initial seed).
|
||||
messages: conversationMessages.slice(initialMessages.length),
|
||||
messages: newMessages,
|
||||
output: finalOutput,
|
||||
toolCalls: allToolCalls,
|
||||
tokenUsage: totalUsage,
|
||||
|
|
|
|||
|
|
@ -165,8 +165,10 @@ describe('AgentRunner contextStrategy', () => {
|
|||
expect(rolesAfterFirstUser).not.toMatch(/^user,user/)
|
||||
})
|
||||
|
||||
it('custom strategy calls compress callback and uses returned messages', async () => {
|
||||
const compress = vi.fn((messages: LLMMessage[]) => messages.slice(-1))
|
||||
it('does not drop turns when context strategy shrinks array size', async () => {
|
||||
// The core bug from #152: if the strategy replaces the array with fewer messages than it started with,
|
||||
// the old `slice()` logic would incorrectly drop newly generated turns.
|
||||
const compress = vi.fn((messages: LLMMessage[]) => messages.slice(-1)) // Shrink to 1 message
|
||||
const calls: LLMMessage[][] = []
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 'hello' }),
|
||||
|
|
@ -194,10 +196,26 @@ describe('AgentRunner contextStrategy', () => {
|
|||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'custom prompt' }] }])
|
||||
// Seed with 3 messages
|
||||
const initialMessages: LLMMessage[] = [
|
||||
{ role: 'user', content: [{ type: 'text', text: 'm1' }] },
|
||||
{ role: 'assistant', content: [{ type: 'text', text: 'm2' }] },
|
||||
{ role: 'user', content: [{ type: 'text', text: 'm3' }] },
|
||||
]
|
||||
|
||||
expect(compress).toHaveBeenCalledOnce()
|
||||
expect(calls[1]).toHaveLength(1)
|
||||
const result = await runner.run(initialMessages)
|
||||
|
||||
// 2 new messages were generated (the tool use, and the tool result).
|
||||
// The `done` response is returned but not pushed as a new message to the list in `run()`.
|
||||
// Wait, the `done` text response *is* pushed.
|
||||
// Let's verify the exact length of new messages.
|
||||
// The stream loop pushes the assistant message (tool use), then the user message (tool result),
|
||||
// then loops back and pushes the final assistant message (text).
|
||||
// So 3 new messages are added during this run.
|
||||
expect(result.messages).toHaveLength(3)
|
||||
expect(result.messages[0]!.role).toBe('assistant')
|
||||
expect(result.messages[1]!.role).toBe('user') // The tool_result
|
||||
expect(result.messages[2]!.role).toBe('assistant')
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Reference in New Issue