feat: add context management strategies (sliding-window, summarize, custom) to prevent unbounded conversation growth
This commit is contained in:
parent
f1c7477a26
commit
eb484d9bbf
|
|
@ -114,6 +114,8 @@ const conversationAgent = new Agent(
|
||||||
model: 'claude-sonnet-4-6',
|
model: 'claude-sonnet-4-6',
|
||||||
systemPrompt: 'You are a TypeScript tutor. Give short, direct answers.',
|
systemPrompt: 'You are a TypeScript tutor. Give short, direct answers.',
|
||||||
maxTurns: 2,
|
maxTurns: 2,
|
||||||
|
// Keep only the most recent turn in long prompt() conversations.
|
||||||
|
contextStrategy: { type: 'sliding-window', maxTurns: 1 },
|
||||||
},
|
},
|
||||||
new ToolRegistry(), // no tools needed for this conversation
|
new ToolRegistry(), // no tools needed for this conversation
|
||||||
new ToolExecutor(new ToolRegistry()),
|
new ToolExecutor(new ToolRegistry()),
|
||||||
|
|
|
||||||
|
|
@ -153,6 +153,7 @@ export class Agent {
|
||||||
agentRole: this.config.systemPrompt?.slice(0, 50) ?? 'assistant',
|
agentRole: this.config.systemPrompt?.slice(0, 50) ?? 'assistant',
|
||||||
loopDetection: this.config.loopDetection,
|
loopDetection: this.config.loopDetection,
|
||||||
maxTokenBudget: this.config.maxTokenBudget,
|
maxTokenBudget: this.config.maxTokenBudget,
|
||||||
|
contextStrategy: this.config.contextStrategy,
|
||||||
}
|
}
|
||||||
|
|
||||||
this.runner = new AgentRunner(
|
this.runner = new AgentRunner(
|
||||||
|
|
|
||||||
|
|
@ -29,10 +29,12 @@ import type {
|
||||||
LoopDetectionConfig,
|
LoopDetectionConfig,
|
||||||
LoopDetectionInfo,
|
LoopDetectionInfo,
|
||||||
LLMToolDef,
|
LLMToolDef,
|
||||||
|
ContextStrategy,
|
||||||
} from '../types.js'
|
} from '../types.js'
|
||||||
import { TokenBudgetExceededError } from '../errors.js'
|
import { TokenBudgetExceededError } from '../errors.js'
|
||||||
import { LoopDetector } from './loop-detector.js'
|
import { LoopDetector } from './loop-detector.js'
|
||||||
import { emitTrace } from '../utils/trace.js'
|
import { emitTrace } from '../utils/trace.js'
|
||||||
|
import { estimateTokens } from '../utils/tokens.js'
|
||||||
import type { ToolRegistry } from '../tool/framework.js'
|
import type { ToolRegistry } from '../tool/framework.js'
|
||||||
import type { ToolExecutor } from '../tool/executor.js'
|
import type { ToolExecutor } from '../tool/executor.js'
|
||||||
|
|
||||||
|
|
@ -94,6 +96,8 @@ export interface RunnerOptions {
|
||||||
readonly loopDetection?: LoopDetectionConfig
|
readonly loopDetection?: LoopDetectionConfig
|
||||||
/** Maximum cumulative tokens (input + output) allowed for this run. */
|
/** Maximum cumulative tokens (input + output) allowed for this run. */
|
||||||
readonly maxTokenBudget?: number
|
readonly maxTokenBudget?: number
|
||||||
|
/** Optional context compression strategy for long multi-turn runs. */
|
||||||
|
readonly contextStrategy?: ContextStrategy
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -191,6 +195,10 @@ const ZERO_USAGE: TokenUsage = { input_tokens: 0, output_tokens: 0 }
|
||||||
*/
|
*/
|
||||||
export class AgentRunner {
|
export class AgentRunner {
|
||||||
private readonly maxTurns: number
|
private readonly maxTurns: number
|
||||||
|
private summarizeCache: {
|
||||||
|
oldSignature: string
|
||||||
|
summaryMessage: LLMMessage
|
||||||
|
} | null = null
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private readonly adapter: LLMAdapter,
|
private readonly adapter: LLMAdapter,
|
||||||
|
|
@ -201,6 +209,168 @@ export class AgentRunner {
|
||||||
this.maxTurns = options.maxTurns ?? 10
|
this.maxTurns = options.maxTurns ?? 10
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private serializeMessage(message: LLMMessage): string {
|
||||||
|
return JSON.stringify(message)
|
||||||
|
}
|
||||||
|
|
||||||
|
private truncateToSlidingWindow(messages: LLMMessage[], maxTurns: number): LLMMessage[] {
|
||||||
|
if (maxTurns <= 0) {
|
||||||
|
return messages
|
||||||
|
}
|
||||||
|
|
||||||
|
const firstUserIndex = messages.findIndex(m => m.role === 'user')
|
||||||
|
const firstUser = firstUserIndex >= 0 ? messages[firstUserIndex]! : null
|
||||||
|
const afterFirst = firstUserIndex >= 0
|
||||||
|
? messages.slice(firstUserIndex + 1)
|
||||||
|
: messages.slice()
|
||||||
|
|
||||||
|
if (afterFirst.length <= maxTurns * 2) {
|
||||||
|
return messages
|
||||||
|
}
|
||||||
|
|
||||||
|
const kept = afterFirst.slice(-maxTurns * 2)
|
||||||
|
const result: LLMMessage[] = []
|
||||||
|
|
||||||
|
if (firstUser !== null) {
|
||||||
|
result.push(firstUser)
|
||||||
|
}
|
||||||
|
|
||||||
|
const droppedPairs = Math.floor((afterFirst.length - kept.length) / 2)
|
||||||
|
if (droppedPairs > 0) {
|
||||||
|
result.push({
|
||||||
|
role: 'user',
|
||||||
|
content: [{
|
||||||
|
type: 'text',
|
||||||
|
text: `[Earlier conversation history truncated — ${droppedPairs} turn(s) removed]`,
|
||||||
|
}],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
result.push(...kept)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
private async summarizeMessages(
|
||||||
|
messages: LLMMessage[],
|
||||||
|
maxTokens: number,
|
||||||
|
summaryModel: string | undefined,
|
||||||
|
baseChatOptions: LLMChatOptions,
|
||||||
|
turns: number,
|
||||||
|
options: RunOptions,
|
||||||
|
): Promise<LLMMessage[]> {
|
||||||
|
const estimated = estimateTokens(messages)
|
||||||
|
if (estimated <= maxTokens || messages.length < 4) {
|
||||||
|
return messages
|
||||||
|
}
|
||||||
|
|
||||||
|
const firstUserIndex = messages.findIndex(m => m.role === 'user')
|
||||||
|
if (firstUserIndex < 0 || firstUserIndex === messages.length - 1) {
|
||||||
|
return messages
|
||||||
|
}
|
||||||
|
|
||||||
|
const firstUser = messages[firstUserIndex]!
|
||||||
|
const rest = messages.slice(firstUserIndex + 1)
|
||||||
|
if (rest.length < 2) {
|
||||||
|
return messages
|
||||||
|
}
|
||||||
|
|
||||||
|
const splitAt = Math.max(2, Math.floor(rest.length / 2))
|
||||||
|
const oldPortion = rest.slice(0, splitAt)
|
||||||
|
const recentPortion = rest.slice(splitAt)
|
||||||
|
|
||||||
|
const oldSignature = oldPortion.map(m => this.serializeMessage(m)).join('\n')
|
||||||
|
if (this.summarizeCache !== null && this.summarizeCache.oldSignature === oldSignature) {
|
||||||
|
return [firstUser, this.summarizeCache.summaryMessage, ...recentPortion]
|
||||||
|
}
|
||||||
|
|
||||||
|
const summaryPrompt = [
|
||||||
|
'Summarize the following conversation history for an LLM.',
|
||||||
|
'- Preserve user goals, constraints, and decisions.',
|
||||||
|
'- Keep key tool outputs and unresolved questions.',
|
||||||
|
'- Use concise bullets.',
|
||||||
|
'- Do not fabricate details.',
|
||||||
|
].join('\n')
|
||||||
|
|
||||||
|
const summaryInput: LLMMessage[] = [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{ type: 'text', text: summaryPrompt },
|
||||||
|
{ type: 'text', text: `\n\nConversation:\n${oldSignature}` },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
const summaryOptions: LLMChatOptions = {
|
||||||
|
...baseChatOptions,
|
||||||
|
model: summaryModel ?? this.options.model,
|
||||||
|
tools: undefined,
|
||||||
|
}
|
||||||
|
|
||||||
|
const summaryStartMs = Date.now()
|
||||||
|
const summaryResponse = await this.adapter.chat(summaryInput, summaryOptions)
|
||||||
|
if (options.onTrace) {
|
||||||
|
const summaryEndMs = Date.now()
|
||||||
|
emitTrace(options.onTrace, {
|
||||||
|
type: 'llm_call',
|
||||||
|
runId: options.runId ?? '',
|
||||||
|
taskId: options.taskId,
|
||||||
|
agent: options.traceAgent ?? this.options.agentName ?? 'unknown',
|
||||||
|
model: summaryOptions.model,
|
||||||
|
phase: 'summary',
|
||||||
|
turn: turns,
|
||||||
|
tokens: summaryResponse.usage,
|
||||||
|
startMs: summaryStartMs,
|
||||||
|
endMs: summaryEndMs,
|
||||||
|
durationMs: summaryEndMs - summaryStartMs,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
const summaryText = extractText(summaryResponse.content).trim()
|
||||||
|
const summaryMessage: LLMMessage = {
|
||||||
|
role: 'user',
|
||||||
|
content: [{
|
||||||
|
type: 'text',
|
||||||
|
text: summaryText.length > 0
|
||||||
|
? `[Conversation summary]\n${summaryText}`
|
||||||
|
: '[Conversation summary unavailable]',
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
|
this.summarizeCache = { oldSignature, summaryMessage }
|
||||||
|
return [firstUser, summaryMessage, ...recentPortion]
|
||||||
|
}
|
||||||
|
|
||||||
|
private async applyContextStrategy(
|
||||||
|
messages: LLMMessage[],
|
||||||
|
strategy: ContextStrategy,
|
||||||
|
baseChatOptions: LLMChatOptions,
|
||||||
|
turns: number,
|
||||||
|
options: RunOptions,
|
||||||
|
): Promise<LLMMessage[]> {
|
||||||
|
if (strategy.type === 'sliding-window') {
|
||||||
|
return this.truncateToSlidingWindow(messages, strategy.maxTurns)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strategy.type === 'summarize') {
|
||||||
|
return this.summarizeMessages(
|
||||||
|
messages,
|
||||||
|
strategy.maxTokens,
|
||||||
|
strategy.summaryModel,
|
||||||
|
baseChatOptions,
|
||||||
|
turns,
|
||||||
|
options,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const estimated = estimateTokens(messages)
|
||||||
|
const compressed = await strategy.compress(messages, estimated)
|
||||||
|
if (!Array.isArray(compressed) || compressed.length === 0) {
|
||||||
|
throw new Error('contextStrategy.custom.compress must return a non-empty LLMMessage[]')
|
||||||
|
}
|
||||||
|
return compressed
|
||||||
|
}
|
||||||
|
|
||||||
// -------------------------------------------------------------------------
|
// -------------------------------------------------------------------------
|
||||||
// Tool resolution
|
// Tool resolution
|
||||||
// -------------------------------------------------------------------------
|
// -------------------------------------------------------------------------
|
||||||
|
|
@ -313,7 +483,7 @@ export class AgentRunner {
|
||||||
options: RunOptions = {},
|
options: RunOptions = {},
|
||||||
): AsyncGenerator<StreamEvent> {
|
): AsyncGenerator<StreamEvent> {
|
||||||
// Working copy of the conversation — mutated as turns progress.
|
// Working copy of the conversation — mutated as turns progress.
|
||||||
const conversationMessages: LLMMessage[] = [...initialMessages]
|
let conversationMessages: LLMMessage[] = [...initialMessages]
|
||||||
|
|
||||||
// Accumulated state across all turns.
|
// Accumulated state across all turns.
|
||||||
let totalUsage: TokenUsage = ZERO_USAGE
|
let totalUsage: TokenUsage = ZERO_USAGE
|
||||||
|
|
@ -363,6 +533,17 @@ export class AgentRunner {
|
||||||
|
|
||||||
turns++
|
turns++
|
||||||
|
|
||||||
|
// Optionally compact context before each LLM call after the first turn.
|
||||||
|
if (this.options.contextStrategy && turns > 1) {
|
||||||
|
conversationMessages = await this.applyContextStrategy(
|
||||||
|
conversationMessages,
|
||||||
|
this.options.contextStrategy,
|
||||||
|
baseChatOptions,
|
||||||
|
turns,
|
||||||
|
options,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
// ------------------------------------------------------------------
|
// ------------------------------------------------------------------
|
||||||
// Step 1: Call the LLM and collect the full response for this turn.
|
// Step 1: Call the LLM and collect the full response for this turn.
|
||||||
// ------------------------------------------------------------------
|
// ------------------------------------------------------------------
|
||||||
|
|
@ -376,6 +557,7 @@ export class AgentRunner {
|
||||||
taskId: options.taskId,
|
taskId: options.taskId,
|
||||||
agent: options.traceAgent ?? this.options.agentName ?? 'unknown',
|
agent: options.traceAgent ?? this.options.agentName ?? 'unknown',
|
||||||
model: this.options.model,
|
model: this.options.model,
|
||||||
|
phase: 'turn',
|
||||||
turn: turns,
|
turn: turns,
|
||||||
tokens: response.usage,
|
tokens: response.usage,
|
||||||
startMs: llmStartMs,
|
startMs: llmStartMs,
|
||||||
|
|
|
||||||
|
|
@ -153,6 +153,7 @@ export type {
|
||||||
ToolCallRecord,
|
ToolCallRecord,
|
||||||
LoopDetectionConfig,
|
LoopDetectionConfig,
|
||||||
LoopDetectionInfo,
|
LoopDetectionInfo,
|
||||||
|
ContextStrategy,
|
||||||
|
|
||||||
// Team
|
// Team
|
||||||
TeamConfig,
|
TeamConfig,
|
||||||
|
|
|
||||||
16
src/types.ts
16
src/types.ts
|
|
@ -65,6 +65,18 @@ export interface LLMMessage {
|
||||||
readonly content: ContentBlock[]
|
readonly content: ContentBlock[]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Context management strategy for long-running agent conversations. */
|
||||||
|
export type ContextStrategy =
|
||||||
|
| { type: 'sliding-window'; maxTurns: number }
|
||||||
|
| { type: 'summarize'; maxTokens: number; summaryModel?: string }
|
||||||
|
| {
|
||||||
|
type: 'custom'
|
||||||
|
compress: (
|
||||||
|
messages: LLMMessage[],
|
||||||
|
estimatedTokens: number,
|
||||||
|
) => Promise<LLMMessage[]> | LLMMessage[]
|
||||||
|
}
|
||||||
|
|
||||||
/** Token accounting for a single API call. */
|
/** Token accounting for a single API call. */
|
||||||
export interface TokenUsage {
|
export interface TokenUsage {
|
||||||
readonly input_tokens: number
|
readonly input_tokens: number
|
||||||
|
|
@ -215,6 +227,8 @@ export interface AgentConfig {
|
||||||
readonly maxTokens?: number
|
readonly maxTokens?: number
|
||||||
/** Maximum cumulative tokens (input + output) allowed for this run. */
|
/** Maximum cumulative tokens (input + output) allowed for this run. */
|
||||||
readonly maxTokenBudget?: number
|
readonly maxTokenBudget?: number
|
||||||
|
/** Optional context compression policy to control input growth across turns. */
|
||||||
|
readonly contextStrategy?: ContextStrategy
|
||||||
readonly temperature?: number
|
readonly temperature?: number
|
||||||
/**
|
/**
|
||||||
* Maximum wall-clock time (in milliseconds) for the entire agent run.
|
* Maximum wall-clock time (in milliseconds) for the entire agent run.
|
||||||
|
|
@ -487,6 +501,8 @@ export interface TraceEventBase {
|
||||||
export interface LLMCallTrace extends TraceEventBase {
|
export interface LLMCallTrace extends TraceEventBase {
|
||||||
readonly type: 'llm_call'
|
readonly type: 'llm_call'
|
||||||
readonly model: string
|
readonly model: string
|
||||||
|
/** Distinguishes normal turn calls from context-summary calls. */
|
||||||
|
readonly phase?: 'turn' | 'summary'
|
||||||
readonly turn: number
|
readonly turn: number
|
||||||
readonly tokens: TokenUsage
|
readonly tokens: TokenUsage
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,27 @@
|
||||||
|
import type { LLMMessage } from '../types.js'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Estimate token count using a lightweight character heuristic.
|
||||||
|
* This intentionally avoids model-specific tokenizer dependencies.
|
||||||
|
*/
|
||||||
|
export function estimateTokens(messages: LLMMessage[]): number {
|
||||||
|
let chars = 0
|
||||||
|
|
||||||
|
for (const message of messages) {
|
||||||
|
for (const block of message.content) {
|
||||||
|
if (block.type === 'text') {
|
||||||
|
chars += block.text.length
|
||||||
|
} else if (block.type === 'tool_result') {
|
||||||
|
chars += block.content.length
|
||||||
|
} else if (block.type === 'tool_use') {
|
||||||
|
chars += JSON.stringify(block.input).length
|
||||||
|
} else if (block.type === 'image') {
|
||||||
|
// Account for non-text payloads with a small fixed cost.
|
||||||
|
chars += 64
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Conservative English heuristic: ~4 chars per token.
|
||||||
|
return Math.ceil(chars / 4)
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,185 @@
|
||||||
|
import { describe, it, expect, vi } from 'vitest'
|
||||||
|
import { z } from 'zod'
|
||||||
|
import { AgentRunner } from '../src/agent/runner.js'
|
||||||
|
import { ToolRegistry, defineTool } from '../src/tool/framework.js'
|
||||||
|
import { ToolExecutor } from '../src/tool/executor.js'
|
||||||
|
import type { LLMAdapter, LLMChatOptions, LLMMessage, LLMResponse, TraceEvent } from '../src/types.js'
|
||||||
|
|
||||||
|
function textResponse(text: string): LLMResponse {
|
||||||
|
return {
|
||||||
|
id: `resp-${Math.random().toString(36).slice(2)}`,
|
||||||
|
content: [{ type: 'text', text }],
|
||||||
|
model: 'mock-model',
|
||||||
|
stop_reason: 'end_turn',
|
||||||
|
usage: { input_tokens: 10, output_tokens: 20 },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function toolUseResponse(toolName: string, input: Record<string, unknown>): LLMResponse {
|
||||||
|
return {
|
||||||
|
id: `resp-${Math.random().toString(36).slice(2)}`,
|
||||||
|
content: [{
|
||||||
|
type: 'tool_use',
|
||||||
|
id: `tu-${Math.random().toString(36).slice(2)}`,
|
||||||
|
name: toolName,
|
||||||
|
input,
|
||||||
|
}],
|
||||||
|
model: 'mock-model',
|
||||||
|
stop_reason: 'tool_use',
|
||||||
|
usage: { input_tokens: 15, output_tokens: 25 },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildRegistryAndExecutor(): { registry: ToolRegistry; executor: ToolExecutor } {
|
||||||
|
const registry = new ToolRegistry()
|
||||||
|
registry.register(
|
||||||
|
defineTool({
|
||||||
|
name: 'echo',
|
||||||
|
description: 'Echo input',
|
||||||
|
inputSchema: z.object({ message: z.string() }),
|
||||||
|
async execute({ message }) {
|
||||||
|
return { data: message }
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
return { registry, executor: new ToolExecutor(registry) }
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('AgentRunner contextStrategy', () => {
|
||||||
|
it('keeps baseline behavior when contextStrategy is not set', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const adapter: LLMAdapter = {
|
||||||
|
name: 'mock',
|
||||||
|
async chat(messages) {
|
||||||
|
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
||||||
|
return calls.length === 1
|
||||||
|
? toolUseResponse('echo', { message: 'hello' })
|
||||||
|
: textResponse('done')
|
||||||
|
},
|
||||||
|
async *stream() {
|
||||||
|
/* unused */
|
||||||
|
},
|
||||||
|
}
|
||||||
|
const { registry, executor } = buildRegistryAndExecutor()
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 4,
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||||
|
expect(calls).toHaveLength(2)
|
||||||
|
expect(calls[0]).toHaveLength(1)
|
||||||
|
expect(calls[1]!.length).toBeGreaterThan(calls[0]!.length)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('sliding-window truncates old turns and preserves the first user message', async () => {
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const responses = [
|
||||||
|
toolUseResponse('echo', { message: 't1' }),
|
||||||
|
toolUseResponse('echo', { message: 't2' }),
|
||||||
|
toolUseResponse('echo', { message: 't3' }),
|
||||||
|
textResponse('done'),
|
||||||
|
]
|
||||||
|
let idx = 0
|
||||||
|
const adapter: LLMAdapter = {
|
||||||
|
name: 'mock',
|
||||||
|
async chat(messages) {
|
||||||
|
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
||||||
|
return responses[idx++]!
|
||||||
|
},
|
||||||
|
async *stream() {
|
||||||
|
/* unused */
|
||||||
|
},
|
||||||
|
}
|
||||||
|
const { registry, executor } = buildRegistryAndExecutor()
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 8,
|
||||||
|
contextStrategy: { type: 'sliding-window', maxTurns: 1 },
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'original prompt' }] }])
|
||||||
|
|
||||||
|
const laterCall = calls[calls.length - 1]!
|
||||||
|
const firstUserText = laterCall[0]!.content[0]
|
||||||
|
expect(firstUserText).toMatchObject({ type: 'text', text: 'original prompt' })
|
||||||
|
const flattenedText = laterCall.flatMap(m => m.content.filter(c => c.type === 'text'))
|
||||||
|
expect(flattenedText.some(c => c.type === 'text' && c.text.includes('truncated'))).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('summarize strategy replaces old context and emits summary trace call', async () => {
|
||||||
|
const calls: Array<{ messages: LLMMessage[]; options: LLMChatOptions }> = []
|
||||||
|
const traces: TraceEvent[] = []
|
||||||
|
const responses = [
|
||||||
|
toolUseResponse('echo', { message: 'first turn payload '.repeat(20) }),
|
||||||
|
toolUseResponse('echo', { message: 'second turn payload '.repeat(20) }),
|
||||||
|
textResponse('This is a concise summary.'),
|
||||||
|
textResponse('final answer'),
|
||||||
|
]
|
||||||
|
let idx = 0
|
||||||
|
const adapter: LLMAdapter = {
|
||||||
|
name: 'mock',
|
||||||
|
async chat(messages, options) {
|
||||||
|
calls.push({ messages: messages.map(m => ({ role: m.role, content: m.content })), options })
|
||||||
|
return responses[idx++]!
|
||||||
|
},
|
||||||
|
async *stream() {
|
||||||
|
/* unused */
|
||||||
|
},
|
||||||
|
}
|
||||||
|
const { registry, executor } = buildRegistryAndExecutor()
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 8,
|
||||||
|
contextStrategy: { type: 'summarize', maxTokens: 20 },
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run(
|
||||||
|
[{ role: 'user', content: [{ type: 'text', text: 'start' }] }],
|
||||||
|
{ onTrace: (e) => { traces.push(e) }, runId: 'run-summary', traceAgent: 'context-agent' },
|
||||||
|
)
|
||||||
|
|
||||||
|
const summaryCall = calls.find(c => c.messages.length === 1 && c.options.tools === undefined)
|
||||||
|
expect(summaryCall).toBeDefined()
|
||||||
|
const llmTraces = traces.filter(t => t.type === 'llm_call')
|
||||||
|
expect(llmTraces.some(t => t.type === 'llm_call' && t.phase === 'summary')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('custom strategy calls compress callback and uses returned messages', async () => {
|
||||||
|
const compress = vi.fn((messages: LLMMessage[]) => messages.slice(-1))
|
||||||
|
const calls: LLMMessage[][] = []
|
||||||
|
const responses = [
|
||||||
|
toolUseResponse('echo', { message: 'hello' }),
|
||||||
|
textResponse('done'),
|
||||||
|
]
|
||||||
|
let idx = 0
|
||||||
|
const adapter: LLMAdapter = {
|
||||||
|
name: 'mock',
|
||||||
|
async chat(messages) {
|
||||||
|
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
||||||
|
return responses[idx++]!
|
||||||
|
},
|
||||||
|
async *stream() {
|
||||||
|
/* unused */
|
||||||
|
},
|
||||||
|
}
|
||||||
|
const { registry, executor } = buildRegistryAndExecutor()
|
||||||
|
const runner = new AgentRunner(adapter, registry, executor, {
|
||||||
|
model: 'mock-model',
|
||||||
|
allowedTools: ['echo'],
|
||||||
|
maxTurns: 4,
|
||||||
|
contextStrategy: {
|
||||||
|
type: 'custom',
|
||||||
|
compress,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'custom prompt' }] }])
|
||||||
|
|
||||||
|
expect(compress).toHaveBeenCalledOnce()
|
||||||
|
expect(calls[1]).toHaveLength(1)
|
||||||
|
})
|
||||||
|
})
|
||||||
Loading…
Reference in New Issue