diff --git a/src/agent/runner.ts b/src/agent/runner.ts index 5569a87..2b8fbce 100644 --- a/src/agent/runner.ts +++ b/src/agent/runner.ts @@ -176,6 +176,31 @@ function addTokenUsage(a: TokenUsage, b: TokenUsage): TokenUsage { const ZERO_USAGE: TokenUsage = { input_tokens: 0, output_tokens: 0 } +/** + * Prepends synthetic framing text to the first user message so we never emit + * consecutive `user` turns (Bedrock) and summaries do not concatenate onto + * the original user prompt (direct API). If there is no user message yet, + * inserts a single assistant text preamble. + */ +function prependSyntheticPrefixToFirstUser( + messages: LLMMessage[], + prefix: string, +): LLMMessage[] { + const userIdx = messages.findIndex(m => m.role === 'user') + if (userIdx < 0) { + return [{ + role: 'assistant', + content: [{ type: 'text', text: prefix.trimEnd() }], + }, ...messages] + } + const target = messages[userIdx]! + const merged: LLMMessage = { + role: 'user', + content: [{ type: 'text', text: prefix }, ...target.content], + } + return [...messages.slice(0, userIdx), merged, ...messages.slice(userIdx + 1)] +} + // --------------------------------------------------------------------------- // AgentRunner // --------------------------------------------------------------------------- @@ -197,7 +222,7 @@ export class AgentRunner { private readonly maxTurns: number private summarizeCache: { oldSignature: string - summaryMessage: LLMMessage + summaryPrefix: string } | null = null constructor( @@ -237,13 +262,10 @@ export class AgentRunner { const droppedPairs = Math.floor((afterFirst.length - kept.length) / 2) if (droppedPairs > 0) { - result.push({ - role: 'user', - content: [{ - type: 'text', - text: `[Earlier conversation history truncated — ${droppedPairs} turn(s) removed]`, - }], - }) + const notice = + `[Earlier conversation history truncated — ${droppedPairs} turn(s) removed]\n\n` + result.push(...prependSyntheticPrefixToFirstUser(kept, notice)) + return result } result.push(...kept) @@ -257,30 +279,36 @@ export class AgentRunner { baseChatOptions: LLMChatOptions, turns: number, options: RunOptions, - ): Promise { + ): Promise<{ messages: LLMMessage[]; usage: TokenUsage }> { const estimated = estimateTokens(messages) if (estimated <= maxTokens || messages.length < 4) { - return messages + return { messages, usage: ZERO_USAGE } } const firstUserIndex = messages.findIndex(m => m.role === 'user') if (firstUserIndex < 0 || firstUserIndex === messages.length - 1) { - return messages + return { messages, usage: ZERO_USAGE } } const firstUser = messages[firstUserIndex]! const rest = messages.slice(firstUserIndex + 1) if (rest.length < 2) { - return messages + return { messages, usage: ZERO_USAGE } } - const splitAt = Math.max(2, Math.floor(rest.length / 2)) + // Split on an even boundary so we never separate a tool_use assistant turn + // from its tool_result user message (rest is user/assistant pairs). + const splitAt = Math.max(2, Math.floor(rest.length / 4) * 2) const oldPortion = rest.slice(0, splitAt) const recentPortion = rest.slice(splitAt) const oldSignature = oldPortion.map(m => this.serializeMessage(m)).join('\n') if (this.summarizeCache !== null && this.summarizeCache.oldSignature === oldSignature) { - return [firstUser, this.summarizeCache.summaryMessage, ...recentPortion] + const mergedRecent = prependSyntheticPrefixToFirstUser( + recentPortion, + `${this.summarizeCache.summaryPrefix}\n\n`, + ) + return { messages: [firstUser, ...mergedRecent], usage: ZERO_USAGE } } const summaryPrompt = [ @@ -327,18 +355,19 @@ export class AgentRunner { } const summaryText = extractText(summaryResponse.content).trim() - const summaryMessage: LLMMessage = { - role: 'user', - content: [{ - type: 'text', - text: summaryText.length > 0 - ? `[Conversation summary]\n${summaryText}` - : '[Conversation summary unavailable]', - }], - } + const summaryPrefix = summaryText.length > 0 + ? `[Conversation summary]\n${summaryText}` + : '[Conversation summary unavailable]' - this.summarizeCache = { oldSignature, summaryMessage } - return [firstUser, summaryMessage, ...recentPortion] + this.summarizeCache = { oldSignature, summaryPrefix } + const mergedRecent = prependSyntheticPrefixToFirstUser( + recentPortion, + `${summaryPrefix}\n\n`, + ) + return { + messages: [firstUser, ...mergedRecent], + usage: summaryResponse.usage, + } } private async applyContextStrategy( @@ -347,9 +376,9 @@ export class AgentRunner { baseChatOptions: LLMChatOptions, turns: number, options: RunOptions, - ): Promise { + ): Promise<{ messages: LLMMessage[]; usage: TokenUsage }> { if (strategy.type === 'sliding-window') { - return this.truncateToSlidingWindow(messages, strategy.maxTurns) + return { messages: this.truncateToSlidingWindow(messages, strategy.maxTurns), usage: ZERO_USAGE } } if (strategy.type === 'summarize') { @@ -368,7 +397,7 @@ export class AgentRunner { if (!Array.isArray(compressed) || compressed.length === 0) { throw new Error('contextStrategy.custom.compress must return a non-empty LLMMessage[]') } - return compressed + return { messages: compressed, usage: ZERO_USAGE } } // ------------------------------------------------------------------------- @@ -535,13 +564,15 @@ export class AgentRunner { // Optionally compact context before each LLM call after the first turn. if (this.options.contextStrategy && turns > 1) { - conversationMessages = await this.applyContextStrategy( + const compacted = await this.applyContextStrategy( conversationMessages, this.options.contextStrategy, baseChatOptions, turns, options, ) + conversationMessages = compacted.messages + totalUsage = addTokenUsage(totalUsage, compacted.usage) } // ------------------------------------------------------------------ diff --git a/tests/context-strategy.test.ts b/tests/context-strategy.test.ts index dc1bc13..7c847b0 100644 --- a/tests/context-strategy.test.ts +++ b/tests/context-strategy.test.ts @@ -137,7 +137,7 @@ describe('AgentRunner contextStrategy', () => { contextStrategy: { type: 'summarize', maxTokens: 20 }, }) - await runner.run( + const result = await runner.run( [{ role: 'user', content: [{ type: 'text', text: 'start' }] }], { onTrace: (e) => { traces.push(e) }, runId: 'run-summary', traceAgent: 'context-agent' }, ) @@ -146,6 +146,23 @@ describe('AgentRunner contextStrategy', () => { expect(summaryCall).toBeDefined() const llmTraces = traces.filter(t => t.type === 'llm_call') expect(llmTraces.some(t => t.type === 'llm_call' && t.phase === 'summary')).toBe(true) + + // Summary adapter usage must count toward RunResult.tokenUsage (maxTokenBudget). + expect(result.tokenUsage.input_tokens).toBe(15 + 15 + 10 + 10) + expect(result.tokenUsage.output_tokens).toBe(25 + 25 + 20 + 20) + + // After compaction, summary text is folded into the next user turn (not a + // standalone user message), preserving user/assistant alternation. + const turnAfterSummary = calls.find( + c => c.messages.some( + m => m.role === 'user' && m.content.some( + b => b.type === 'text' && b.text.includes('[Conversation summary]'), + ), + ), + ) + expect(turnAfterSummary).toBeDefined() + const rolesAfterFirstUser = turnAfterSummary!.messages.map(m => m.role).join(',') + expect(rolesAfterFirstUser).not.toMatch(/^user,user/) }) it('custom strategy calls compress callback and uses returned messages', async () => {