feat: add optional outputSchema (Zod) for structured agent output (#36)

When `outputSchema` is set on AgentConfig, the agent's final text output is parsed as JSON, validated against the Zod schema, and exposed via `result.structured`. On validation failure a single retry with error feedback is attempted automatically. Closes #29
2026-04-03 13:45:47 +08:00 · 2026-04-03 13:45:47 +08:00 · fbc5546fa1
parent e0438e3764
commit fbc5546fa1
6 changed files with 583 additions and 6 deletions
--- a/src/agent/agent.ts
+++ b/src/agent/agent.ts
@ -35,7 +35,12 @@ import type {
 import type { ToolDefinition as FrameworkToolDefinition, ToolRegistry } from '../tool/framework.js'
 import type { ToolExecutor } from '../tool/executor.js'
 import { createAdapter } from '../llm/adapter.js'
-import { AgentRunner, type RunnerOptions, type RunOptions } from './runner.js'
+import { AgentRunner, type RunnerOptions, type RunOptions, type RunResult } from './runner.js'
 import {
  buildStructuredOutputInstruction,
  extractJSON,
  validateOutput,
 } from './structured-output.js'
 // ---------------------------------------------------------------------------
 // Internal helpers
@ -111,9 +116,18 @@ export class Agent {
    const provider = this.config.provider ?? 'anthropic'
    const adapter = await createAdapter(provider, this.config.apiKey, this.config.baseURL)
    // Append structured-output instructions when an outputSchema is configured.
    let effectiveSystemPrompt = this.config.systemPrompt
    if (this.config.outputSchema) {
      const instruction = buildStructuredOutputInstruction(this.config.outputSchema)
      effectiveSystemPrompt = effectiveSystemPrompt
        ? effectiveSystemPrompt + '\n' + instruction
        : instruction
    }
    const runnerOptions: RunnerOptions = {
      model: this.config.model,
-      systemPrompt: this.config.systemPrompt,
+      systemPrompt: effectiveSystemPrompt,
      maxTurns: this.config.maxTurns,
      maxTokens: this.config.maxTokens,
      temperature: this.config.temperature,
@ -264,10 +278,19 @@ export class Agent {
      }
      const result = await runner.run(messages, runOptions)
      this.state.tokenUsage = addUsage(this.state.tokenUsage, result.tokenUsage)
      this.transitionTo('completed')
      // --- Structured output validation ---
      if (this.config.outputSchema) {
        return this.validateStructuredOutput(
          messages,
          result,
          runner,
          runOptions,
        )
      }
      this.transitionTo('completed')
      return this.toAgentRunResult(result, true)
    } catch (err) {
      const error = err instanceof Error ? err : new Error(String(err))
@ -279,6 +302,86 @@ export class Agent {
        messages: [],
        tokenUsage: ZERO_USAGE,
        toolCalls: [],
        structured: undefined,
      }
    }
  }
  /**
   * Validate agent output against the configured `outputSchema`.
   * On first validation failure, retry once with error feedback.
   */
  private async validateStructuredOutput(
    originalMessages: LLMMessage[],
    result: RunResult,
    runner: AgentRunner,
    runOptions: RunOptions,
  ): Promise<AgentRunResult> {
    const schema = this.config.outputSchema!
    // First attempt
    let firstAttemptError: unknown
    try {
      const parsed = extractJSON(result.output)
      const validated = validateOutput(schema, parsed)
      this.transitionTo('completed')
      return this.toAgentRunResult(result, true, validated)
    } catch (e) {
      firstAttemptError = e
    }
    // Retry: send full context + error feedback
    const errorMsg = firstAttemptError instanceof Error
      ? firstAttemptError.message
      : String(firstAttemptError)
    const retryMessages: LLMMessage[] = [
      ...originalMessages,
      ...result.messages,
      {
        role: 'user' as const,
        content: [{
          type: 'text' as const,
          text: [
            'Your previous response did not produce valid JSON matching the required schema.',
            '',
            `Error: ${errorMsg}`,
            '',
            'Please try again. Respond with ONLY valid JSON, no other text.',
          ].join('\n'),
        }],
      },
    ]
    const retryResult = await runner.run(retryMessages, runOptions)
    this.state.tokenUsage = addUsage(this.state.tokenUsage, retryResult.tokenUsage)
    const mergedTokenUsage = addUsage(result.tokenUsage, retryResult.tokenUsage)
    const mergedMessages = [...result.messages, ...retryResult.messages]
    const mergedToolCalls = [...result.toolCalls, ...retryResult.toolCalls]
    try {
      const parsed = extractJSON(retryResult.output)
      const validated = validateOutput(schema, parsed)
      this.transitionTo('completed')
      return {
        success: true,
        output: retryResult.output,
        messages: mergedMessages,
        tokenUsage: mergedTokenUsage,
        toolCalls: mergedToolCalls,
        structured: validated,
      }
    } catch {
      // Retry also failed
      this.transitionTo('completed')
      return {
        success: false,
        output: retryResult.output,
        messages: mergedMessages,
        tokenUsage: mergedTokenUsage,
        toolCalls: mergedToolCalls,
        structured: undefined,
      }
    }
  }
@ -331,8 +434,9 @@ export class Agent {
  // -------------------------------------------------------------------------
  private toAgentRunResult(
-    result: import('./runner.js').RunResult,
+    result: RunResult,
    success: boolean,
    structured?: unknown,
  ): AgentRunResult {
    return {
      success,
@ -340,6 +444,7 @@ export class Agent {
      messages: result.messages,
      tokenUsage: result.tokenUsage,
      toolCalls: result.toolCalls,
      structured,
    }
  }
--- a/src/agent/structured-output.ts
+++ b/src/agent/structured-output.ts
@ -0,0 +1,126 @@
 /**
 * @fileoverview Structured output utilities for agent responses.
 *
 * Provides JSON extraction, Zod validation, and system-prompt injection so
 * that agents can return typed, schema-validated output.
 */
 import { type ZodSchema } from 'zod'
 import { zodToJsonSchema } from '../tool/framework.js'
 // ---------------------------------------------------------------------------
 // System-prompt instruction builder
 // ---------------------------------------------------------------------------
 /**
 * Build a JSON-mode instruction block to append to the agent's system prompt.
 *
 * Converts the Zod schema to JSON Schema and formats it as a clear directive
 * for the LLM to respond with valid JSON matching the schema.
 */
 export function buildStructuredOutputInstruction(schema: ZodSchema): string {
  const jsonSchema = zodToJsonSchema(schema)
  return [
    '',
    '## Output Format (REQUIRED)',
    'You MUST respond with ONLY valid JSON that conforms to the following JSON Schema.',
    'Do NOT include any text, markdown fences, or explanation outside the JSON object.',
    'Do NOT wrap the JSON in ```json code fences.',
    '',
    '```',
    JSON.stringify(jsonSchema, null, 2),
    '```',
  ].join('\n')
 }
 // ---------------------------------------------------------------------------
 // JSON extraction
 // ---------------------------------------------------------------------------
 /**
 * Attempt to extract and parse JSON from the agent's raw text output.
 *
 * Handles three cases in order:
 * 1. The output is already valid JSON (ideal case)
 * 2. The output contains a ` ```json ` fenced block
 * 3. The output contains a bare JSON object/array (first `{`/`[` to last `}`/`]`)
 *
 * @throws {Error} when no valid JSON can be extracted
 */
 export function extractJSON(raw: string): unknown {
  const trimmed = raw.trim()
  // Case 1: Direct parse
  try {
    return JSON.parse(trimmed)
  } catch {
    // Continue to fallback strategies
  }
  // Case 2a: Prefer ```json tagged fence
  const jsonFenceMatch = trimmed.match(/```json\s*([\s\S]*?)```/)
  if (jsonFenceMatch?.[1]) {
    try {
      return JSON.parse(jsonFenceMatch[1].trim())
    } catch {
      // Continue
    }
  }
  // Case 2b: Fall back to bare ``` fence
  const bareFenceMatch = trimmed.match(/```\s*([\s\S]*?)```/)
  if (bareFenceMatch?.[1]) {
    try {
      return JSON.parse(bareFenceMatch[1].trim())
    } catch {
      // Continue
    }
  }
  // Case 3: Find first { to last } (object)
  const objStart = trimmed.indexOf('{')
  const objEnd = trimmed.lastIndexOf('}')
  if (objStart !== -1 && objEnd > objStart) {
    try {
      return JSON.parse(trimmed.slice(objStart, objEnd + 1))
    } catch {
      // Fall through
    }
  }
  // Case 3b: Find first [ to last ] (array)
  const arrStart = trimmed.indexOf('[')
  const arrEnd = trimmed.lastIndexOf(']')
  if (arrStart !== -1 && arrEnd > arrStart) {
    try {
      return JSON.parse(trimmed.slice(arrStart, arrEnd + 1))
    } catch {
      // Fall through
    }
  }
  throw new Error(
    `Failed to extract JSON from output. Raw output begins with: "${trimmed.slice(0, 100)}"`,
  )
 }
 // ---------------------------------------------------------------------------
 // Zod validation
 // ---------------------------------------------------------------------------
 /**
 * Validate a parsed JSON value against a Zod schema.
 *
 * @returns The validated (and potentially transformed) value on success.
 * @throws {Error} with a human-readable Zod error message on failure.
 */
 export function validateOutput(schema: ZodSchema, data: unknown): unknown {
  const result = schema.safeParse(data)
  if (result.success) {
    return result.data
  }
  const issues = result.error.issues
    .map(issue => `  - ${issue.path.length > 0 ? issue.path.join('.') : '(root)'}: ${issue.message}`)
    .join('\n')
  throw new Error(`Output validation failed:\n${issues}`)
 }
--- a/src/index.ts
+++ b/src/index.ts
@ -63,6 +63,7 @@ export type { SchedulingStrategy } from './orchestrator/scheduler.js'
 // ---------------------------------------------------------------------------
 export { Agent } from './agent/agent.js'
 export { buildStructuredOutputInstruction, extractJSON, validateOutput } from './agent/structured-output.js'
 export { AgentPool, Semaphore } from './agent/pool.js'
 export type { PoolStatus } from './agent/pool.js'
--- a/src/orchestrator/orchestrator.ts
+++ b/src/orchestrator/orchestrator.ts
@ -837,13 +837,15 @@ export class OpenMultiAgent {
      if (!existing) {
        collapsed.set(agentName, result)
      } else {
-        // Merge multiple results for the same agent (multi-task case)
+        // Merge multiple results for the same agent (multi-task case).
        // Keep the latest `structured` value (last completed task wins).
        collapsed.set(agentName, {
          success: existing.success && result.success,
          output: [existing.output, result.output].filter(Boolean).join('\n\n---\n\n'),
          messages: [...existing.messages, ...result.messages],
          tokenUsage: addUsage(existing.tokenUsage, result.tokenUsage),
          toolCalls: [...existing.toolCalls, ...result.toolCalls],
          structured: result.structured ?? existing.structured,
        })
      }
--- a/src/types.ts
+++ b/src/types.ts
@ -201,6 +201,12 @@ export interface AgentConfig {
  readonly maxTurns?: number
  readonly maxTokens?: number
  readonly temperature?: number
  /**
   * Optional Zod schema for structured output.  When set, the agent's final
   * output is parsed as JSON and validated against this schema.  A single
   * retry with error feedback is attempted on validation failure.
   */
  readonly outputSchema?: ZodSchema
 }
 /** Lifecycle state tracked during an agent run. */
@ -227,6 +233,12 @@ export interface AgentRunResult {
  readonly messages: LLMMessage[]
  readonly tokenUsage: TokenUsage
  readonly toolCalls: ToolCallRecord[]
  /**
   * Parsed and validated structured output when `outputSchema` is set on the
   * agent config.  `undefined` when no schema is configured or validation
   * failed after retry.
   */
  readonly structured?: unknown
 }
 // ---------------------------------------------------------------------------
--- a/tests/structured-output.test.ts
+++ b/tests/structured-output.test.ts
@ -0,0 +1,331 @@
 import { describe, it, expect } from 'vitest'
 import { z } from 'zod'
 import {
  buildStructuredOutputInstruction,
  extractJSON,
  validateOutput,
 } from '../src/agent/structured-output.js'
 import { Agent } from '../src/agent/agent.js'
 import { AgentRunner } from '../src/agent/runner.js'
 import { ToolRegistry } from '../src/tool/framework.js'
 import { ToolExecutor } from '../src/tool/executor.js'
 import type { AgentConfig, LLMAdapter, LLMResponse } from '../src/types.js'
 // ---------------------------------------------------------------------------
 // Mock LLM adapter factory
 // ---------------------------------------------------------------------------
 function mockAdapter(responses: string[]): LLMAdapter {
  let callIndex = 0
  return {
    name: 'mock',
    async chat() {
      const text = responses[callIndex++] ?? ''
      return {
        id: `mock-${callIndex}`,
        content: [{ type: 'text' as const, text }],
        model: 'mock-model',
        stop_reason: 'end_turn',
        usage: { input_tokens: 10, output_tokens: 20 },
      } satisfies LLMResponse
    },
    async *stream() {
      /* unused in these tests */
    },
  }
 }
 // ---------------------------------------------------------------------------
 // extractJSON
 // ---------------------------------------------------------------------------
 describe('extractJSON', () => {
  it('parses clean JSON', () => {
    expect(extractJSON('{"a":1}')).toEqual({ a: 1 })
  })
  it('parses JSON wrapped in ```json fence', () => {
    const raw = 'Here is the result:\n```json\n{"a":1}\n```\nDone.'
    expect(extractJSON(raw)).toEqual({ a: 1 })
  })
  it('parses JSON wrapped in bare ``` fence', () => {
    const raw = '```\n{"a":1}\n```'
    expect(extractJSON(raw)).toEqual({ a: 1 })
  })
  it('extracts embedded JSON object from surrounding text', () => {
    const raw = 'The answer is {"summary":"hello","score":5} as shown above.'
    expect(extractJSON(raw)).toEqual({ summary: 'hello', score: 5 })
  })
  it('extracts JSON array', () => {
    expect(extractJSON('[1,2,3]')).toEqual([1, 2, 3])
  })
  it('extracts embedded JSON array from surrounding text', () => {
    const raw = 'Here: [{"a":1},{"a":2}] end'
    expect(extractJSON(raw)).toEqual([{ a: 1 }, { a: 2 }])
  })
  it('throws on non-JSON text', () => {
    expect(() => extractJSON('just plain text')).toThrow('Failed to extract JSON')
  })
  it('throws on empty string', () => {
    expect(() => extractJSON('')).toThrow('Failed to extract JSON')
  })
 })
 // ---------------------------------------------------------------------------
 // validateOutput
 // ---------------------------------------------------------------------------
 describe('validateOutput', () => {
  const schema = z.object({
    summary: z.string(),
    score: z.number().min(0).max(10),
  })
  it('returns validated data on success', () => {
    const data = { summary: 'hello', score: 5 }
    expect(validateOutput(schema, data)).toEqual(data)
  })
  it('throws on missing field', () => {
    expect(() => validateOutput(schema, { summary: 'hello' })).toThrow(
      'Output validation failed',
    )
  })
  it('throws on wrong type', () => {
    expect(() =>
      validateOutput(schema, { summary: 'hello', score: 'not a number' }),
    ).toThrow('Output validation failed')
  })
  it('throws on value out of range', () => {
    expect(() =>
      validateOutput(schema, { summary: 'hello', score: 99 }),
    ).toThrow('Output validation failed')
  })
  it('applies Zod transforms', () => {
    const transformSchema = z.object({
      name: z.string().transform(s => s.toUpperCase()),
    })
    const result = validateOutput(transformSchema, { name: 'alice' })
    expect(result).toEqual({ name: 'ALICE' })
  })
  it('strips unknown keys with strict schema', () => {
    const strictSchema = z.object({ a: z.number() }).strict()
    expect(() =>
      validateOutput(strictSchema, { a: 1, b: 2 }),
    ).toThrow('Output validation failed')
  })
  it('shows (root) for root-level errors', () => {
    const stringSchema = z.string()
    expect(() => validateOutput(stringSchema, 42)).toThrow('(root)')
  })
 })
 // ---------------------------------------------------------------------------
 // buildStructuredOutputInstruction
 // ---------------------------------------------------------------------------
 describe('buildStructuredOutputInstruction', () => {
  it('includes the JSON Schema representation', () => {
    const schema = z.object({
      summary: z.string(),
      score: z.number(),
    })
    const instruction = buildStructuredOutputInstruction(schema)
    expect(instruction).toContain('Output Format (REQUIRED)')
    expect(instruction).toContain('"type": "object"')
    expect(instruction).toContain('"summary"')
    expect(instruction).toContain('"score"')
    expect(instruction).toContain('ONLY valid JSON')
  })
  it('includes description from Zod schema', () => {
    const schema = z.object({
      name: z.string().describe('The person name'),
    })
    const instruction = buildStructuredOutputInstruction(schema)
    expect(instruction).toContain('The person name')
  })
 })
 // ---------------------------------------------------------------------------
 // Agent integration (mocked LLM)
 // ---------------------------------------------------------------------------
 /**
 * Build an Agent with a mocked LLM adapter by injecting an AgentRunner
 * directly into the Agent's private `runner` field, bypassing `createAdapter`.
 */
 function buildMockAgent(config: AgentConfig, responses: string[]): Agent {
  const adapter = mockAdapter(responses)
  const registry = new ToolRegistry()
  const executor = new ToolExecutor(registry)
  const agent = new Agent(config, registry, executor)
  // Inject a pre-built runner so `getRunner()` returns it without calling createAdapter.
  const runner = new AgentRunner(adapter, registry, executor, {
    model: config.model,
    systemPrompt: config.systemPrompt,
    maxTurns: config.maxTurns,
    maxTokens: config.maxTokens,
    temperature: config.temperature,
    agentName: config.name,
  })
  ;(agent as any).runner = runner
  return agent
 }
 describe('Agent structured output (end-to-end)', () => {
  const schema = z.object({
    summary: z.string(),
    sentiment: z.enum(['positive', 'negative', 'neutral']),
    confidence: z.number().min(0).max(1),
  })
  const baseConfig: AgentConfig = {
    name: 'test-agent',
    model: 'mock-model',
    systemPrompt: 'You are a test agent.',
    outputSchema: schema,
  }
  it('happy path: valid JSON on first attempt', async () => {
    const validJSON = JSON.stringify({
      summary: 'Great product',
      sentiment: 'positive',
      confidence: 0.95,
    })
    const agent = buildMockAgent(baseConfig, [validJSON])
    const result = await agent.run('Analyze this review')
    expect(result.success).toBe(true)
    expect(result.structured).toEqual({
      summary: 'Great product',
      sentiment: 'positive',
      confidence: 0.95,
    })
  })
  it('retry: invalid first attempt, valid second attempt', async () => {
    const invalidJSON = JSON.stringify({
      summary: 'Great product',
      sentiment: 'INVALID_VALUE',
      confidence: 0.95,
    })
    const validJSON = JSON.stringify({
      summary: 'Great product',
      sentiment: 'positive',
      confidence: 0.95,
    })
    const agent = buildMockAgent(baseConfig, [invalidJSON, validJSON])
    const result = await agent.run('Analyze this review')
    expect(result.success).toBe(true)
    expect(result.structured).toEqual({
      summary: 'Great product',
      sentiment: 'positive',
      confidence: 0.95,
    })
    // Token usage should reflect both attempts
    expect(result.tokenUsage.input_tokens).toBe(20) // 10 + 10
    expect(result.tokenUsage.output_tokens).toBe(40) // 20 + 20
  })
  it('both attempts fail: success=false, structured=undefined', async () => {
    const bad1 = '{"summary": "ok", "sentiment": "WRONG"}'
    const bad2 = '{"summary": "ok", "sentiment": "ALSO_WRONG"}'
    const agent = buildMockAgent(baseConfig, [bad1, bad2])
    const result = await agent.run('Analyze this review')
    expect(result.success).toBe(false)
    expect(result.structured).toBeUndefined()
  })
  it('no outputSchema: original behavior, structured is undefined', async () => {
    const configNoSchema: AgentConfig = {
      name: 'plain-agent',
      model: 'mock-model',
      systemPrompt: 'You are a test agent.',
    }
    const agent = buildMockAgent(configNoSchema, ['Just plain text output'])
    const result = await agent.run('Hello')
    expect(result.success).toBe(true)
    expect(result.output).toBe('Just plain text output')
    expect(result.structured).toBeUndefined()
  })
  it('handles JSON wrapped in markdown fence', async () => {
    const fenced = '```json\n{"summary":"ok","sentiment":"neutral","confidence":0.5}\n```'
    const agent = buildMockAgent(baseConfig, [fenced])
    const result = await agent.run('Analyze')
    expect(result.success).toBe(true)
    expect(result.structured).toEqual({
      summary: 'ok',
      sentiment: 'neutral',
      confidence: 0.5,
    })
  })
  it('non-JSON output triggers retry, valid JSON on retry succeeds', async () => {
    const nonJSON = 'I am not sure how to analyze this.'
    const validJSON = JSON.stringify({
      summary: 'Uncertain',
      sentiment: 'neutral',
      confidence: 0.1,
    })
    const agent = buildMockAgent(baseConfig, [nonJSON, validJSON])
    const result = await agent.run('Analyze this review')
    expect(result.success).toBe(true)
    expect(result.structured).toEqual({
      summary: 'Uncertain',
      sentiment: 'neutral',
      confidence: 0.1,
    })
  })
  it('non-JSON output on both attempts: success=false', async () => {
    const agent = buildMockAgent(baseConfig, [
      'Sorry, I cannot do that.',
      'Still cannot do it.',
    ])
    const result = await agent.run('Analyze this review')
    expect(result.success).toBe(false)
    expect(result.structured).toBeUndefined()
  })
  it('token usage on first-attempt success reflects single call only', async () => {
    const validJSON = JSON.stringify({
      summary: 'Good',
      sentiment: 'positive',
      confidence: 0.9,
    })
    const agent = buildMockAgent(baseConfig, [validJSON])
    const result = await agent.run('Analyze')
    expect(result.tokenUsage.input_tokens).toBe(10)
    expect(result.tokenUsage.output_tokens).toBe(20)
  })
 })