feat: add tool output auto-truncation at framework level (#110)

Prevent context blowup from large tool outputs by adding opt-in
character-based truncation (head 70% + tail 30% with marker).
Agent-level `maxToolOutputChars` and per-tool `maxOutputChars`
with per-tool taking priority. Marker overhead is budgeted so
the result never exceeds the configured limit.
This commit is contained in:
JackChen 2026-04-16 17:22:22 +08:00
parent 017e0f42f6
commit 4da2ea9b16
6 changed files with 301 additions and 5 deletions

View File

@ -89,7 +89,7 @@ export type { TaskQueueEvent } from './task/queue.js'
// ---------------------------------------------------------------------------
export { defineTool, ToolRegistry, zodToJsonSchema } from './tool/framework.js'
export { ToolExecutor } from './tool/executor.js'
export { ToolExecutor, truncateToolOutput } from './tool/executor.js'
export type { ToolExecutorOptions, BatchToolCall } from './tool/executor.js'
export {
registerBuiltInTools,

View File

@ -212,7 +212,11 @@ function resolveTokenBudget(primary?: number, fallback?: number): number | undef
function buildAgent(config: AgentConfig): Agent {
const registry = new ToolRegistry()
registerBuiltInTools(registry)
const executor = new ToolExecutor(registry)
const executor = new ToolExecutor(registry, {
...(config.maxToolOutputChars !== undefined
? { maxToolOutputChars: config.maxToolOutputChars }
: {}),
})
return new Agent(config, registry, executor)
}

View File

@ -24,6 +24,11 @@ export interface ToolExecutorOptions {
* Defaults to 4.
*/
maxConcurrency?: number
/**
* Agent-level default for maximum tool output length in characters.
* Per-tool `maxOutputChars` takes priority over this value.
*/
maxToolOutputChars?: number
}
/** Describes one call in a batch. */
@ -47,10 +52,12 @@ export interface BatchToolCall {
export class ToolExecutor {
private readonly registry: ToolRegistry
private readonly semaphore: Semaphore
private readonly maxToolOutputChars?: number
constructor(registry: ToolRegistry, options: ToolExecutorOptions = {}) {
this.registry = registry
this.semaphore = new Semaphore(options.maxConcurrency ?? 4)
this.maxToolOutputChars = options.maxToolOutputChars
}
// -------------------------------------------------------------------------
@ -156,7 +163,7 @@ export class ToolExecutor {
// --- Execute ---
try {
const result = await tool.execute(parseResult.data, context)
return result
return this.maybeTruncate(tool, result)
} catch (err) {
const message =
err instanceof Error
@ -164,10 +171,26 @@ export class ToolExecutor {
: typeof err === 'string'
? err
: JSON.stringify(err)
return this.errorResult(`Tool "${tool.name}" threw an error: ${message}`)
return this.maybeTruncate(tool, this.errorResult(`Tool "${tool.name}" threw an error: ${message}`))
}
}
/**
* Apply truncation to a tool result if a character limit is configured.
* Priority: per-tool `maxOutputChars` > agent-level `maxToolOutputChars`.
*/
private maybeTruncate(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
tool: ToolDefinition<any>,
result: ToolResult,
): ToolResult {
const maxChars = tool.maxOutputChars ?? this.maxToolOutputChars
if (maxChars === undefined || maxChars <= 0 || result.data.length <= maxChars) {
return result
}
return { ...result, data: truncateToolOutput(result.data, maxChars) }
}
/** Construct an error ToolResult. */
private errorResult(message: string): ToolResult {
return {
@ -176,3 +199,31 @@ export class ToolExecutor {
}
}
}
// ---------------------------------------------------------------------------
// Truncation helper
// ---------------------------------------------------------------------------
/**
* Truncate tool output to fit within `maxChars`, preserving the head (~70%)
* and tail (~30%) with a marker indicating how many characters were removed.
*
* The marker itself is counted against the budget so the returned string
* never exceeds `maxChars`. When `maxChars` is too small to fit any
* content alongside the marker, a marker-only string is returned.
*/
export function truncateToolOutput(data: string, maxChars: number): string {
if (data.length <= maxChars) return data
// Estimate marker length (digit count may shrink after subtracting content,
// but using data.length gives a safe upper-bound for the digit count).
const markerTemplate = '\n\n[...truncated characters...]\n\n'
const markerOverhead = markerTemplate.length + String(data.length).length
const available = Math.max(0, maxChars - markerOverhead)
const headChars = Math.floor(available * 0.7)
const tailChars = available - headChars
const truncatedCount = data.length - headChars - tailChars
return `${data.slice(0, headChars)}\n\n[...truncated ${truncatedCount} characters...]\n\n${data.slice(-tailChars)}`
}

View File

@ -76,6 +76,12 @@ export function defineTool<TInput>(config: {
* Optional JSON Schema for the LLM (bypasses Zod JSON Schema conversion).
*/
llmInputSchema?: Record<string, unknown>
/**
* Per-tool maximum output length in characters. When set, tool output
* exceeding this limit is truncated (head + tail with a marker in between).
* Takes priority over agent-level `maxToolOutputChars`.
*/
maxOutputChars?: number
execute: (input: TInput, context: ToolUseContext) => Promise<ToolResult>
}): ToolDefinition<TInput> {
return {
@ -85,6 +91,9 @@ export function defineTool<TInput>(config: {
...(config.llmInputSchema !== undefined
? { llmInputSchema: config.llmInputSchema }
: {}),
...(config.maxOutputChars !== undefined
? { maxOutputChars: config.maxOutputChars }
: {}),
execute: config.execute,
}
}

View File

@ -194,6 +194,12 @@ export interface ToolDefinition<TInput = Record<string, unknown>> {
* deriving JSON Schema from `inputSchema` (Zod).
*/
readonly llmInputSchema?: Record<string, unknown>
/**
* Per-tool maximum output length in characters. When set, tool output
* exceeding this limit is truncated (head + tail with a marker in between).
* Takes priority over {@link AgentConfig.maxToolOutputChars}.
*/
readonly maxOutputChars?: number
execute(input: TInput, context: ToolUseContext): Promise<ToolResult>
}
@ -252,6 +258,13 @@ export interface AgentConfig {
* output is parsed as JSON and validated against this schema. A single
* retry with error feedback is attempted on validation failure.
*/
/**
* Maximum tool output length in characters for all tools used by this agent.
* When set, tool outputs exceeding this limit are truncated (head + tail
* with a marker in between). Per-tool {@link ToolDefinition.maxOutputChars}
* takes priority over this value.
*/
readonly maxToolOutputChars?: number
readonly outputSchema?: ZodSchema
/**
* Called before each agent run. Receives the prompt and agent config.

View File

@ -1,7 +1,7 @@
import { describe, it, expect, vi } from 'vitest'
import { z } from 'zod'
import { ToolRegistry, defineTool } from '../src/tool/framework.js'
import { ToolExecutor } from '../src/tool/executor.js'
import { ToolExecutor, truncateToolOutput } from '../src/tool/executor.js'
import type { ToolUseContext } from '../src/types.js'
// ---------------------------------------------------------------------------
@ -191,3 +191,222 @@ describe('ToolRegistry', () => {
expect(defs[0].inputSchema).toHaveProperty('properties')
})
})
// ---------------------------------------------------------------------------
// truncateToolOutput
// ---------------------------------------------------------------------------
describe('truncateToolOutput', () => {
it('returns data unchanged when under the limit', () => {
const data = 'short output'
expect(truncateToolOutput(data, 100)).toBe(data)
})
it('returns data unchanged when exactly at the limit', () => {
const data = 'x'.repeat(100)
expect(truncateToolOutput(data, 100)).toBe(data)
})
it('truncates data exceeding the limit with head/tail and marker', () => {
const data = 'A'.repeat(300) + 'B'.repeat(700)
const result = truncateToolOutput(data, 500)
expect(result).toContain('[...truncated')
expect(result.length).toBeLessThanOrEqual(500)
// Head portion starts with As
expect(result.startsWith('A')).toBe(true)
// Tail portion ends with Bs
expect(result.endsWith('B')).toBe(true)
})
it('result never exceeds maxChars', () => {
const data = 'x'.repeat(10000)
const result = truncateToolOutput(data, 1000)
expect(result.length).toBeLessThanOrEqual(1000)
expect(result).toContain('[...truncated')
})
it('handles empty string', () => {
expect(truncateToolOutput('', 100)).toBe('')
})
it('handles very small maxChars gracefully', () => {
const data = 'x'.repeat(100)
// With maxChars=1, the marker alone exceeds the budget, but it should not crash
const result = truncateToolOutput(data, 1)
expect(result).toContain('[...truncated')
})
})
// ---------------------------------------------------------------------------
// Tool output truncation (integration)
// ---------------------------------------------------------------------------
describe('ToolExecutor output truncation', () => {
it('truncates output when agent-level maxToolOutputChars is set', async () => {
const bigTool = defineTool({
name: 'big',
description: 'Returns large output.',
inputSchema: z.object({}),
execute: async () => ({ data: 'x'.repeat(5000) }),
})
const registry = new ToolRegistry()
registry.register(bigTool)
const executor = new ToolExecutor(registry, { maxToolOutputChars: 200 })
const result = await executor.execute('big', {}, dummyContext)
expect(result.data.length).toBeLessThan(5000)
expect(result.data).toContain('[...truncated')
})
it('does not truncate when output is under the limit', async () => {
const smallTool = defineTool({
name: 'small',
description: 'Returns small output.',
inputSchema: z.object({}),
execute: async () => ({ data: 'hello' }),
})
const registry = new ToolRegistry()
registry.register(smallTool)
const executor = new ToolExecutor(registry, { maxToolOutputChars: 200 })
const result = await executor.execute('small', {}, dummyContext)
expect(result.data).toBe('hello')
})
it('per-tool maxOutputChars overrides agent-level setting (smaller)', async () => {
const toolWithLimit = defineTool({
name: 'limited',
description: 'Has its own limit.',
inputSchema: z.object({}),
maxOutputChars: 200,
execute: async () => ({ data: 'y'.repeat(5000) }),
})
const registry = new ToolRegistry()
registry.register(toolWithLimit)
// Agent-level is 1000 but tool-level is 200 -- tool wins
const executor = new ToolExecutor(registry, { maxToolOutputChars: 1000 })
const result = await executor.execute('limited', {}, dummyContext)
expect(result.data).toContain('[...truncated')
expect(result.data.length).toBeLessThanOrEqual(200)
})
it('per-tool maxOutputChars overrides agent-level setting (larger)', async () => {
const toolWithLimit = defineTool({
name: 'limited',
description: 'Has its own limit.',
inputSchema: z.object({}),
maxOutputChars: 2000,
execute: async () => ({ data: 'y'.repeat(5000) }),
})
const registry = new ToolRegistry()
registry.register(toolWithLimit)
// Agent-level is 500 but tool-level is 2000 -- tool wins
const executor = new ToolExecutor(registry, { maxToolOutputChars: 500 })
const result = await executor.execute('limited', {}, dummyContext)
expect(result.data).toContain('[...truncated')
expect(result.data.length).toBeLessThanOrEqual(2000)
expect(result.data.length).toBeGreaterThan(500)
})
it('per-tool maxOutputChars works without agent-level setting', async () => {
const toolWithLimit = defineTool({
name: 'limited',
description: 'Has its own limit.',
inputSchema: z.object({}),
maxOutputChars: 300,
execute: async () => ({ data: 'z'.repeat(5000) }),
})
const registry = new ToolRegistry()
registry.register(toolWithLimit)
const executor = new ToolExecutor(registry)
const result = await executor.execute('limited', {}, dummyContext)
expect(result.data).toContain('[...truncated')
expect(result.data.length).toBeLessThanOrEqual(300)
})
it('truncates error results too', async () => {
const errorTool = defineTool({
name: 'errorbig',
description: 'Throws a huge error.',
inputSchema: z.object({}),
execute: async () => { throw new Error('E'.repeat(5000)) },
})
const registry = new ToolRegistry()
registry.register(errorTool)
const executor = new ToolExecutor(registry, { maxToolOutputChars: 200 })
const result = await executor.execute('errorbig', {}, dummyContext)
expect(result.isError).toBe(true)
expect(result.data).toContain('[...truncated')
expect(result.data.length).toBeLessThan(5000)
})
it('no truncation when maxToolOutputChars is 0', async () => {
const bigTool = defineTool({
name: 'big',
description: 'Returns large output.',
inputSchema: z.object({}),
execute: async () => ({ data: 'x'.repeat(5000) }),
})
const registry = new ToolRegistry()
registry.register(bigTool)
const executor = new ToolExecutor(registry, { maxToolOutputChars: 0 })
const result = await executor.execute('big', {}, dummyContext)
expect(result.data.length).toBe(5000)
})
it('no truncation when maxToolOutputChars is negative', async () => {
const bigTool = defineTool({
name: 'big',
description: 'Returns large output.',
inputSchema: z.object({}),
execute: async () => ({ data: 'x'.repeat(5000) }),
})
const registry = new ToolRegistry()
registry.register(bigTool)
const executor = new ToolExecutor(registry, { maxToolOutputChars: -100 })
const result = await executor.execute('big', {}, dummyContext)
expect(result.data.length).toBe(5000)
})
it('defineTool passes maxOutputChars to the ToolDefinition', () => {
const tool = defineTool({
name: 'test',
description: 'test',
inputSchema: z.object({}),
maxOutputChars: 500,
execute: async () => ({ data: 'ok' }),
})
expect(tool.maxOutputChars).toBe(500)
})
it('defineTool omits maxOutputChars when not specified', () => {
const tool = defineTool({
name: 'test',
description: 'test',
inputSchema: z.object({}),
execute: async () => ({ data: 'ok' }),
})
expect(tool.maxOutputChars).toBeUndefined()
})
it('no truncation when neither limit is set', async () => {
const bigTool = defineTool({
name: 'big',
description: 'Returns large output.',
inputSchema: z.object({}),
execute: async () => ({ data: 'x'.repeat(50000) }),
})
const registry = new ToolRegistry()
registry.register(bigTool)
const executor = new ToolExecutor(registry)
const result = await executor.execute('big', {}, dummyContext)
expect(result.data.length).toBe(50000)
})
})