* feat: add rule-based compact context strategy (#111) Add `contextStrategy: 'compact'` as a zero-LLM-cost alternative to `summarize`. Instead of making an LLM call to compress everything into prose, it selectively compresses old turns using structural rules: - Preserve tool_use blocks (agent decisions) and error tool_results - Replace long tool_result content with compact markers including tool name - Truncate long assistant text blocks with head excerpts - Keep recent turns (configurable via preserveRecentTurns) fully intact - Detect already-compressed markers from compressToolResults to avoid double-processing Closes #111 * fix: remove redundant length guard and fix compact type indentation
This commit is contained in:
parent
696269c924
commit
6de7bbd41f
|
|
@ -400,6 +400,10 @@ export class AgentRunner {
|
|||
)
|
||||
}
|
||||
|
||||
if (strategy.type === 'compact') {
|
||||
return { messages: this.compactMessages(messages, strategy), usage: ZERO_USAGE }
|
||||
}
|
||||
|
||||
const estimated = estimateTokens(messages)
|
||||
const compressed = await strategy.compress(messages, estimated)
|
||||
if (!Array.isArray(compressed) || compressed.length === 0) {
|
||||
|
|
@ -860,6 +864,133 @@ export class AgentRunner {
|
|||
// Private helpers
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Rule-based selective context compaction (no LLM calls).
|
||||
*
|
||||
* Compresses old turns while preserving the conversation skeleton:
|
||||
* - tool_use blocks (decisions) are always kept
|
||||
* - Long tool_result content is replaced with a compact marker
|
||||
* - Long assistant text blocks are truncated with an excerpt
|
||||
* - Error tool_results are never compressed
|
||||
* - Recent turns (within `preserveRecentTurns`) are kept intact
|
||||
*/
|
||||
private compactMessages(
|
||||
messages: LLMMessage[],
|
||||
strategy: Extract<ContextStrategy, { type: 'compact' }>,
|
||||
): LLMMessage[] {
|
||||
const estimated = estimateTokens(messages)
|
||||
if (estimated <= strategy.maxTokens) {
|
||||
return messages
|
||||
}
|
||||
|
||||
const preserveRecent = strategy.preserveRecentTurns ?? 4
|
||||
const minToolResultChars = strategy.minToolResultChars ?? 200
|
||||
const minTextBlockChars = strategy.minTextBlockChars ?? 2000
|
||||
const textBlockExcerptChars = strategy.textBlockExcerptChars ?? 200
|
||||
|
||||
// Find the first user message — it is always preserved as-is.
|
||||
const firstUserIndex = messages.findIndex(m => m.role === 'user')
|
||||
if (firstUserIndex < 0 || firstUserIndex === messages.length - 1) {
|
||||
return messages
|
||||
}
|
||||
|
||||
// Walk backward to find the boundary between old and recent turns.
|
||||
// A "turn pair" is an assistant message followed by a user message.
|
||||
let boundary = messages.length
|
||||
let pairsFound = 0
|
||||
for (let i = messages.length - 1; i > firstUserIndex && pairsFound < preserveRecent; i--) {
|
||||
if (messages[i]!.role === 'user' && i > 0 && messages[i - 1]!.role === 'assistant') {
|
||||
pairsFound++
|
||||
boundary = i - 1
|
||||
}
|
||||
}
|
||||
|
||||
// If all turns fit within the recent window, nothing to compact.
|
||||
if (boundary <= firstUserIndex + 1) {
|
||||
return messages
|
||||
}
|
||||
|
||||
// Build a tool_use_id → tool name lookup from old assistant messages.
|
||||
const toolNameMap = new Map<string, string>()
|
||||
for (let i = firstUserIndex + 1; i < boundary; i++) {
|
||||
const msg = messages[i]!
|
||||
if (msg.role !== 'assistant') continue
|
||||
for (const block of msg.content) {
|
||||
if (block.type === 'tool_use') {
|
||||
toolNameMap.set(block.id, block.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process old messages (between first user and boundary).
|
||||
let anyChanged = false
|
||||
const result: LLMMessage[] = []
|
||||
|
||||
for (let i = 0; i < messages.length; i++) {
|
||||
// First user message and recent messages: keep intact.
|
||||
if (i <= firstUserIndex || i >= boundary) {
|
||||
result.push(messages[i]!)
|
||||
continue
|
||||
}
|
||||
|
||||
const msg = messages[i]!
|
||||
let msgChanged = false
|
||||
const newContent = msg.content.map((block): ContentBlock => {
|
||||
if (msg.role === 'assistant') {
|
||||
// tool_use blocks: always preserve (decisions).
|
||||
if (block.type === 'tool_use') return block
|
||||
// Long text blocks: truncate with excerpt.
|
||||
if (block.type === 'text' && block.text.length >= minTextBlockChars) {
|
||||
msgChanged = true
|
||||
return {
|
||||
type: 'text',
|
||||
text: `${block.text.slice(0, textBlockExcerptChars)}... [truncated — ${block.text.length} chars total]`,
|
||||
} satisfies TextBlock
|
||||
}
|
||||
// Image blocks in old turns: replace with marker.
|
||||
if (block.type === 'image') {
|
||||
msgChanged = true
|
||||
return { type: 'text', text: '[Image compacted]' } satisfies TextBlock
|
||||
}
|
||||
return block
|
||||
}
|
||||
|
||||
// User messages in old zone.
|
||||
if (block.type === 'tool_result') {
|
||||
// Error results: always preserve.
|
||||
if (block.is_error) return block
|
||||
// Already compressed by compressToolResults or a prior compact pass.
|
||||
if (
|
||||
block.content.startsWith('[Tool output compressed') ||
|
||||
block.content.startsWith('[Tool result:')
|
||||
) {
|
||||
return block
|
||||
}
|
||||
// Short results: preserve.
|
||||
if (block.content.length < minToolResultChars) return block
|
||||
// Compress.
|
||||
const toolName = toolNameMap.get(block.tool_use_id) ?? 'unknown'
|
||||
msgChanged = true
|
||||
return {
|
||||
type: 'tool_result',
|
||||
tool_use_id: block.tool_use_id,
|
||||
content: `[Tool result: ${toolName} — ${block.content.length} chars, compacted]`,
|
||||
} satisfies ToolResultBlock
|
||||
}
|
||||
return block
|
||||
})
|
||||
|
||||
if (msgChanged) {
|
||||
anyChanged = true
|
||||
result.push({ role: msg.role, content: newContent } as LLMMessage)
|
||||
} else {
|
||||
result.push(msg)
|
||||
}
|
||||
}
|
||||
|
||||
return anyChanged ? result : messages
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace consumed tool results with compact markers.
|
||||
*
|
||||
|
|
|
|||
13
src/types.ts
13
src/types.ts
|
|
@ -69,6 +69,19 @@ export interface LLMMessage {
|
|||
export type ContextStrategy =
|
||||
| { type: 'sliding-window'; maxTurns: number }
|
||||
| { type: 'summarize'; maxTokens: number; summaryModel?: string }
|
||||
| {
|
||||
type: 'compact'
|
||||
/** Estimated token threshold that triggers compaction. Compaction is skipped when below this. */
|
||||
maxTokens: number
|
||||
/** Number of recent turn pairs (assistant+user) to keep intact. Default: 4. */
|
||||
preserveRecentTurns?: number
|
||||
/** Minimum chars in a tool_result content to qualify for compaction. Default: 200. */
|
||||
minToolResultChars?: number
|
||||
/** Minimum chars in an assistant text block to qualify for truncation. Default: 2000. */
|
||||
minTextBlockChars?: number
|
||||
/** Maximum chars to keep from a truncated text block (head excerpt). Default: 200. */
|
||||
textBlockExcerptChars?: number
|
||||
}
|
||||
| {
|
||||
type: 'custom'
|
||||
compress: (
|
||||
|
|
|
|||
|
|
@ -199,4 +199,428 @@ describe('AgentRunner contextStrategy', () => {
|
|||
expect(compress).toHaveBeenCalledOnce()
|
||||
expect(calls[1]).toHaveLength(1)
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// compact strategy
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('compact strategy', () => {
|
||||
const longText = 'x'.repeat(3000)
|
||||
const longToolResult = 'result-data '.repeat(100) // ~1200 chars
|
||||
|
||||
function buildMultiTurnAdapter(
|
||||
responseCount: number,
|
||||
calls: LLMMessage[][],
|
||||
): LLMAdapter {
|
||||
const responses: LLMResponse[] = []
|
||||
for (let i = 0; i < responseCount - 1; i++) {
|
||||
responses.push(toolUseResponse('echo', { message: `turn-${i}` }))
|
||||
}
|
||||
responses.push(textResponse('done'))
|
||||
let idx = 0
|
||||
return {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
}
|
||||
|
||||
/** Build a registry with an echo tool that returns a fixed result string. */
|
||||
function buildEchoRegistry(result: string): { registry: ToolRegistry; executor: ToolExecutor } {
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(
|
||||
defineTool({
|
||||
name: 'echo',
|
||||
description: 'Echo input',
|
||||
inputSchema: z.object({ message: z.string() }),
|
||||
async execute() {
|
||||
return { data: result }
|
||||
},
|
||||
}),
|
||||
)
|
||||
return { registry, executor: new ToolExecutor(registry) }
|
||||
}
|
||||
|
||||
it('does not activate below maxTokens threshold', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(3, calls)
|
||||
const { registry, executor } = buildEchoRegistry('short')
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: { type: 'compact', maxTokens: 999999 },
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// On the 3rd call (turn 3), all previous messages should still be intact
|
||||
// because estimated tokens are way below the threshold.
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
const allToolResults = lastCall.flatMap(m =>
|
||||
m.content.filter(b => b.type === 'tool_result'),
|
||||
)
|
||||
for (const tr of allToolResults) {
|
||||
if (tr.type === 'tool_result') {
|
||||
expect(tr.content).not.toContain('compacted')
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('compresses old tool_result blocks when tokens exceed threshold', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(4, calls)
|
||||
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20, // very low to always trigger
|
||||
preserveRecentTurns: 1, // only protect the most recent turn
|
||||
minToolResultChars: 100,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// On the last call, old tool results should have compact markers.
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
const toolResults = lastCall.flatMap(m =>
|
||||
m.content.filter(b => b.type === 'tool_result'),
|
||||
)
|
||||
const compacted = toolResults.filter(
|
||||
b => b.type === 'tool_result' && b.content.includes('compacted'),
|
||||
)
|
||||
expect(compacted.length).toBeGreaterThan(0)
|
||||
// Marker should include tool name.
|
||||
for (const tr of compacted) {
|
||||
if (tr.type === 'tool_result') {
|
||||
expect(tr.content).toMatch(/\[Tool result: echo/)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('preserves the first user message', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(4, calls)
|
||||
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 1,
|
||||
minToolResultChars: 100,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'original prompt' }] }])
|
||||
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
const firstUser = lastCall.find(m => m.role === 'user')!
|
||||
expect(firstUser.content[0]).toMatchObject({ type: 'text', text: 'original prompt' })
|
||||
})
|
||||
|
||||
it('preserves tool_use blocks in old turns', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(4, calls)
|
||||
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 1,
|
||||
minToolResultChars: 100,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Every assistant message should still have its tool_use block.
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
const assistantMsgs = lastCall.filter(m => m.role === 'assistant')
|
||||
for (const msg of assistantMsgs) {
|
||||
const toolUses = msg.content.filter(b => b.type === 'tool_use')
|
||||
// The last assistant message is "done" (text only), others have tool_use.
|
||||
if (msg.content.some(b => b.type === 'text' && b.text === 'done')) continue
|
||||
expect(toolUses.length).toBeGreaterThan(0)
|
||||
}
|
||||
})
|
||||
|
||||
it('preserves error tool_result blocks', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const responses: LLMResponse[] = [
|
||||
toolUseResponse('echo', { message: 'will-fail' }),
|
||||
toolUseResponse('echo', { message: 'ok' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
// Tool that fails on first call, succeeds on second.
|
||||
let callCount = 0
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(
|
||||
defineTool({
|
||||
name: 'echo',
|
||||
description: 'Echo input',
|
||||
inputSchema: z.object({ message: z.string() }),
|
||||
async execute() {
|
||||
callCount++
|
||||
if (callCount === 1) {
|
||||
throw new Error('deliberate error '.repeat(40))
|
||||
}
|
||||
return { data: longToolResult }
|
||||
},
|
||||
}),
|
||||
)
|
||||
const executor = new ToolExecutor(registry)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 1,
|
||||
minToolResultChars: 50,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
const errorResults = lastCall.flatMap(m =>
|
||||
m.content.filter(b => b.type === 'tool_result' && b.is_error),
|
||||
)
|
||||
// Error results should still have their original content (not compacted).
|
||||
for (const er of errorResults) {
|
||||
if (er.type === 'tool_result') {
|
||||
expect(er.content).not.toContain('compacted')
|
||||
expect(er.content).toContain('deliberate error')
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('does not re-compress markers from compressToolResults', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(4, calls)
|
||||
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
compressToolResults: { minChars: 100 },
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 1,
|
||||
minToolResultChars: 10,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
const allToolResults = lastCall.flatMap(m =>
|
||||
m.content.filter(b => b.type === 'tool_result'),
|
||||
)
|
||||
// No result should contain nested markers.
|
||||
for (const tr of allToolResults) {
|
||||
if (tr.type === 'tool_result') {
|
||||
// Should not have a compact marker wrapping another marker.
|
||||
const markerCount = (tr.content.match(/\[Tool/g) || []).length
|
||||
expect(markerCount).toBeLessThanOrEqual(1)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('truncates long assistant text blocks in old turns', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const responses: LLMResponse[] = [
|
||||
// First turn: assistant with long text + tool_use
|
||||
{
|
||||
id: 'r1',
|
||||
content: [
|
||||
{ type: 'text', text: longText },
|
||||
{ type: 'tool_use', id: 'tu-1', name: 'echo', input: { message: 'hi' } },
|
||||
],
|
||||
model: 'mock-model',
|
||||
stop_reason: 'tool_use',
|
||||
usage: { input_tokens: 10, output_tokens: 20 },
|
||||
},
|
||||
toolUseResponse('echo', { message: 'turn2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildEchoRegistry('short')
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 1,
|
||||
minTextBlockChars: 500,
|
||||
textBlockExcerptChars: 100,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
// The first assistant message (old zone) should have its text truncated.
|
||||
const firstAssistant = lastCall.find(m => m.role === 'assistant')!
|
||||
const textBlocks = firstAssistant.content.filter(b => b.type === 'text')
|
||||
const truncated = textBlocks.find(
|
||||
b => b.type === 'text' && b.text.includes('truncated'),
|
||||
)
|
||||
expect(truncated).toBeDefined()
|
||||
if (truncated && truncated.type === 'text') {
|
||||
expect(truncated.text.length).toBeLessThan(longText.length)
|
||||
expect(truncated.text).toContain(`${longText.length} chars total`)
|
||||
}
|
||||
})
|
||||
|
||||
it('keeps recent turns intact within preserveRecentTurns', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(4, calls)
|
||||
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 1,
|
||||
minToolResultChars: 100,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// The most recent tool_result (last user message with tool_result) should
|
||||
// still contain the original long content.
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
const userMsgs = lastCall.filter(m => m.role === 'user')
|
||||
const lastUserWithToolResult = [...userMsgs]
|
||||
.reverse()
|
||||
.find(m => m.content.some(b => b.type === 'tool_result'))
|
||||
expect(lastUserWithToolResult).toBeDefined()
|
||||
const recentTr = lastUserWithToolResult!.content.find(b => b.type === 'tool_result')
|
||||
if (recentTr && recentTr.type === 'tool_result') {
|
||||
expect(recentTr.content).not.toContain('compacted')
|
||||
expect(recentTr.content).toContain('result-data')
|
||||
}
|
||||
})
|
||||
|
||||
it('does not compact when all turns fit in preserveRecentTurns', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(3, calls)
|
||||
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 10, // way more than actual turns
|
||||
minToolResultChars: 100,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// All tool results should still have original content.
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
const toolResults = lastCall.flatMap(m =>
|
||||
m.content.filter(b => b.type === 'tool_result'),
|
||||
)
|
||||
for (const tr of toolResults) {
|
||||
if (tr.type === 'tool_result') {
|
||||
expect(tr.content).not.toContain('compacted')
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('maintains correct role alternation after compaction', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(5, calls)
|
||||
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 10,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 1,
|
||||
minToolResultChars: 100,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Check all LLM calls for role alternation.
|
||||
for (const callMsgs of calls) {
|
||||
for (let i = 1; i < callMsgs.length; i++) {
|
||||
expect(callMsgs[i]!.role).not.toBe(callMsgs[i - 1]!.role)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('returns ZERO_USAGE (no LLM cost from compaction)', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(4, calls)
|
||||
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 1,
|
||||
minToolResultChars: 100,
|
||||
},
|
||||
})
|
||||
|
||||
const result = await runner.run([
|
||||
{ role: 'user', content: [{ type: 'text', text: 'start' }] },
|
||||
])
|
||||
|
||||
// Token usage should only reflect the 4 actual LLM calls (no extra from compaction).
|
||||
// Each toolUseResponse: input=15, output=25. textResponse: input=10, output=20.
|
||||
// 3 tool calls + 1 final = (15*3 + 10) input, (25*3 + 20) output.
|
||||
expect(result.tokenUsage.input_tokens).toBe(15 * 3 + 10)
|
||||
expect(result.tokenUsage.output_tokens).toBe(25 * 3 + 20)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
|
|||
Loading…
Reference in New Issue