feat: add optional outputSchema (Zod) for structured agent output

When `outputSchema` is set on AgentConfig, the agent's final text output is parsed as JSON, validated against the Zod schema, and exposed via `result.structured`. On validation failure a single retry with error feedback is attempted automatically. Closes #29
2026-04-03 13:43:51 +08:00 · 2026-04-03 13:43:51 +08:00 · 73d15a2d07
parent e0438e3764
commit 73d15a2d07
6 changed files with 583 additions and 6 deletions
--- a/src/agent/agent.ts
+++ b/src/agent/agent.ts
@ -35,7 +35,12 @@ import type {
 import type { ToolDefinition as FrameworkToolDefinition, ToolRegistry } from '../tool/framework.js'
 import type { ToolExecutor } from '../tool/executor.js'
 import { createAdapter } from '../llm/adapter.js'
-import { AgentRunner, type RunnerOptions, type RunOptions } from './runner.js'
+import { AgentRunner, type RunnerOptions, type RunOptions, type RunResult } from './runner.js'
+import {
+  buildStructuredOutputInstruction,
+  extractJSON,
+  validateOutput,
+} from './structured-output.js'

 // ---------------------------------------------------------------------------
 // Internal helpers
@ -111,9 +116,18 @@ export class Agent {
    const provider = this.config.provider ?? 'anthropic'
    const adapter = await createAdapter(provider, this.config.apiKey, this.config.baseURL)

+    // Append structured-output instructions when an outputSchema is configured.
+    let effectiveSystemPrompt = this.config.systemPrompt
+    if (this.config.outputSchema) {
+      const instruction = buildStructuredOutputInstruction(this.config.outputSchema)
+      effectiveSystemPrompt = effectiveSystemPrompt
+        ? effectiveSystemPrompt + '\n' + instruction
+        : instruction
+    }
+
    const runnerOptions: RunnerOptions = {
      model: this.config.model,
-      systemPrompt: this.config.systemPrompt,
+      systemPrompt: effectiveSystemPrompt,
      maxTurns: this.config.maxTurns,
      maxTokens: this.config.maxTokens,
      temperature: this.config.temperature,
@ -264,10 +278,19 @@ export class Agent {
      }

      const result = await runner.run(messages, runOptions)
-
      this.state.tokenUsage = addUsage(this.state.tokenUsage, result.tokenUsage)
-      this.transitionTo('completed')

+      // --- Structured output validation ---
+      if (this.config.outputSchema) {
+        return this.validateStructuredOutput(
+          messages,
+          result,
+          runner,
+          runOptions,
+        )
+      }
+
+      this.transitionTo('completed')
      return this.toAgentRunResult(result, true)
    } catch (err) {
      const error = err instanceof Error ? err : new Error(String(err))
@ -279,6 +302,86 @@ export class Agent {
        messages: [],
        tokenUsage: ZERO_USAGE,
        toolCalls: [],
+        structured: undefined,
+      }
+    }
+  }
+
+  /**
+   * Validate agent output against the configured `outputSchema`.
+   * On first validation failure, retry once with error feedback.
+   */
+  private async validateStructuredOutput(
+    originalMessages: LLMMessage[],
+    result: RunResult,
+    runner: AgentRunner,
+    runOptions: RunOptions,
+  ): Promise<AgentRunResult> {
+    const schema = this.config.outputSchema!
+
+    // First attempt
+    let firstAttemptError: unknown
+    try {
+      const parsed = extractJSON(result.output)
+      const validated = validateOutput(schema, parsed)
+      this.transitionTo('completed')
+      return this.toAgentRunResult(result, true, validated)
+    } catch (e) {
+      firstAttemptError = e
+    }
+
+    // Retry: send full context + error feedback
+    const errorMsg = firstAttemptError instanceof Error
+      ? firstAttemptError.message
+      : String(firstAttemptError)
+
+    const retryMessages: LLMMessage[] = [
+      ...originalMessages,
+      ...result.messages,
+      {
+        role: 'user' as const,
+        content: [{
+          type: 'text' as const,
+          text: [
+            'Your previous response did not produce valid JSON matching the required schema.',
+            '',
+            `Error: ${errorMsg}`,
+            '',
+            'Please try again. Respond with ONLY valid JSON, no other text.',
+          ].join('\n'),
+        }],
+      },
+    ]
+
+    const retryResult = await runner.run(retryMessages, runOptions)
+    this.state.tokenUsage = addUsage(this.state.tokenUsage, retryResult.tokenUsage)
+
+    const mergedTokenUsage = addUsage(result.tokenUsage, retryResult.tokenUsage)
+    const mergedMessages = [...result.messages, ...retryResult.messages]
+    const mergedToolCalls = [...result.toolCalls, ...retryResult.toolCalls]
+
+    try {
+      const parsed = extractJSON(retryResult.output)
+      const validated = validateOutput(schema, parsed)
+      this.transitionTo('completed')
+      return {
+        success: true,
+        output: retryResult.output,
+        messages: mergedMessages,
+        tokenUsage: mergedTokenUsage,
+        toolCalls: mergedToolCalls,
+        structured: validated,
+      }
+    } catch {
+      // Retry also failed
+      this.transitionTo('completed')
+      return {
+        success: false,
+        output: retryResult.output,
+        messages: mergedMessages,
+        tokenUsage: mergedTokenUsage,
+        toolCalls: mergedToolCalls,
+        structured: undefined,
      }
    }
  }
@ -331,8 +434,9 @@ export class Agent {
  // -------------------------------------------------------------------------

  private toAgentRunResult(
-    result: import('./runner.js').RunResult,
+    result: RunResult,
    success: boolean,
+    structured?: unknown,
  ): AgentRunResult {
    return {
      success,
@ -340,6 +444,7 @@ export class Agent {
      messages: result.messages,
      tokenUsage: result.tokenUsage,
      toolCalls: result.toolCalls,
+      structured,
    }
  }

--- a/src/agent/structured-output.ts
+++ b/src/agent/structured-output.ts
@ -0,0 +1,126 @@
+/**
+ * @fileoverview Structured output utilities for agent responses.
+ *
+ * Provides JSON extraction, Zod validation, and system-prompt injection so
+ * that agents can return typed, schema-validated output.
+ */
+
+import { type ZodSchema } from 'zod'
+import { zodToJsonSchema } from '../tool/framework.js'
+
+// ---------------------------------------------------------------------------
+// System-prompt instruction builder
+// ---------------------------------------------------------------------------
+
+/**
+ * Build a JSON-mode instruction block to append to the agent's system prompt.
+ *
+ * Converts the Zod schema to JSON Schema and formats it as a clear directive
+ * for the LLM to respond with valid JSON matching the schema.
+ */
+export function buildStructuredOutputInstruction(schema: ZodSchema): string {
+  const jsonSchema = zodToJsonSchema(schema)
+  return [
+    '',
+    '## Output Format (REQUIRED)',
+    'You MUST respond with ONLY valid JSON that conforms to the following JSON Schema.',
+    'Do NOT include any text, markdown fences, or explanation outside the JSON object.',
+    'Do NOT wrap the JSON in ```json code fences.',
+    '',
+    '```',
+    JSON.stringify(jsonSchema, null, 2),
+    '```',
+  ].join('\n')
+}
+
+// ---------------------------------------------------------------------------
+// JSON extraction
+// ---------------------------------------------------------------------------
+
+/**
+ * Attempt to extract and parse JSON from the agent's raw text output.
+ *
+ * Handles three cases in order:
+ * 1. The output is already valid JSON (ideal case)
+ * 2. The output contains a ` ```json ` fenced block
+ * 3. The output contains a bare JSON object/array (first `{`/`[` to last `}`/`]`)
+ *
+ * @throws {Error} when no valid JSON can be extracted
+ */
+export function extractJSON(raw: string): unknown {
+  const trimmed = raw.trim()
+
+  // Case 1: Direct parse
+  try {
+    return JSON.parse(trimmed)
+  } catch {
+    // Continue to fallback strategies
+  }
+
+  // Case 2a: Prefer ```json tagged fence
+  const jsonFenceMatch = trimmed.match(/```json\s*([\s\S]*?)```/)
+  if (jsonFenceMatch?.[1]) {
+    try {
+      return JSON.parse(jsonFenceMatch[1].trim())
+    } catch {
+      // Continue
+    }
+  }
+
+  // Case 2b: Fall back to bare ``` fence
+  const bareFenceMatch = trimmed.match(/```\s*([\s\S]*?)```/)
+  if (bareFenceMatch?.[1]) {
+    try {
+      return JSON.parse(bareFenceMatch[1].trim())
+    } catch {
+      // Continue
+    }
+  }
+
+  // Case 3: Find first { to last } (object)
+  const objStart = trimmed.indexOf('{')
+  const objEnd = trimmed.lastIndexOf('}')
+  if (objStart !== -1 && objEnd > objStart) {
+    try {
+      return JSON.parse(trimmed.slice(objStart, objEnd + 1))
+    } catch {
+      // Fall through
+    }
+  }
+
+  // Case 3b: Find first [ to last ] (array)
+  const arrStart = trimmed.indexOf('[')
+  const arrEnd = trimmed.lastIndexOf(']')
+  if (arrStart !== -1 && arrEnd > arrStart) {
+    try {
+      return JSON.parse(trimmed.slice(arrStart, arrEnd + 1))
+    } catch {
+      // Fall through
+    }
+  }
+
+  throw new Error(
+    `Failed to extract JSON from output. Raw output begins with: "${trimmed.slice(0, 100)}"`,
+  )
+}
+
+// ---------------------------------------------------------------------------
+// Zod validation
+// ---------------------------------------------------------------------------
+
+/**
+ * Validate a parsed JSON value against a Zod schema.
+ *
+ * @returns The validated (and potentially transformed) value on success.
+ * @throws {Error} with a human-readable Zod error message on failure.
+ */
+export function validateOutput(schema: ZodSchema, data: unknown): unknown {
+  const result = schema.safeParse(data)
+  if (result.success) {
+    return result.data
+  }
+  const issues = result.error.issues
+    .map(issue => `  - ${issue.path.length > 0 ? issue.path.join('.') : '(root)'}: ${issue.message}`)
+    .join('\n')
+  throw new Error(`Output validation failed:\n${issues}`)
+}
--- a/src/index.ts
+++ b/src/index.ts
@ -63,6 +63,7 @@ export type { SchedulingStrategy } from './orchestrator/scheduler.js'
 // ---------------------------------------------------------------------------

 export { Agent } from './agent/agent.js'
+export { buildStructuredOutputInstruction, extractJSON, validateOutput } from './agent/structured-output.js'
 export { AgentPool, Semaphore } from './agent/pool.js'
 export type { PoolStatus } from './agent/pool.js'

--- a/src/orchestrator/orchestrator.ts
+++ b/src/orchestrator/orchestrator.ts
@ -837,13 +837,15 @@ export class OpenMultiAgent {
      if (!existing) {
        collapsed.set(agentName, result)
      } else {
-        // Merge multiple results for the same agent (multi-task case)
+        // Merge multiple results for the same agent (multi-task case).
+        // Keep the latest `structured` value (last completed task wins).
        collapsed.set(agentName, {
          success: existing.success && result.success,
          output: [existing.output, result.output].filter(Boolean).join('\n\n---\n\n'),
          messages: [...existing.messages, ...result.messages],
          tokenUsage: addUsage(existing.tokenUsage, result.tokenUsage),
          toolCalls: [...existing.toolCalls, ...result.toolCalls],
+          structured: result.structured ?? existing.structured,
        })
      }

--- a/src/types.ts
+++ b/src/types.ts
@ -201,6 +201,12 @@ export interface AgentConfig {
  readonly maxTurns?: number
  readonly maxTokens?: number
  readonly temperature?: number
+  /**
+   * Optional Zod schema for structured output.  When set, the agent's final
+   * output is parsed as JSON and validated against this schema.  A single
+   * retry with error feedback is attempted on validation failure.
+   */
+  readonly outputSchema?: ZodSchema
 }

 /** Lifecycle state tracked during an agent run. */
@ -227,6 +233,12 @@ export interface AgentRunResult {
  readonly messages: LLMMessage[]
  readonly tokenUsage: TokenUsage
  readonly toolCalls: ToolCallRecord[]
+  /**
+   * Parsed and validated structured output when `outputSchema` is set on the
+   * agent config.  `undefined` when no schema is configured or validation
+   * failed after retry.
+   */
+  readonly structured?: unknown
 }

 // ---------------------------------------------------------------------------
--- a/tests/structured-output.test.ts
+++ b/tests/structured-output.test.ts
@ -0,0 +1,331 @@
+import { describe, it, expect } from 'vitest'
+import { z } from 'zod'
+import {
+  buildStructuredOutputInstruction,
+  extractJSON,
+  validateOutput,
+} from '../src/agent/structured-output.js'
+import { Agent } from '../src/agent/agent.js'
+import { AgentRunner } from '../src/agent/runner.js'
+import { ToolRegistry } from '../src/tool/framework.js'
+import { ToolExecutor } from '../src/tool/executor.js'
+import type { AgentConfig, LLMAdapter, LLMResponse } from '../src/types.js'
+
+// ---------------------------------------------------------------------------
+// Mock LLM adapter factory
+// ---------------------------------------------------------------------------
+
+function mockAdapter(responses: string[]): LLMAdapter {
+  let callIndex = 0
+  return {
+    name: 'mock',
+    async chat() {
+      const text = responses[callIndex++] ?? ''
+      return {
+        id: `mock-${callIndex}`,
+        content: [{ type: 'text' as const, text }],
+        model: 'mock-model',
+        stop_reason: 'end_turn',
+        usage: { input_tokens: 10, output_tokens: 20 },
+      } satisfies LLMResponse
+    },
+    async *stream() {
+      /* unused in these tests */
+    },
+  }
+}
+
+// ---------------------------------------------------------------------------
+// extractJSON
+// ---------------------------------------------------------------------------
+
+describe('extractJSON', () => {
+  it('parses clean JSON', () => {
+    expect(extractJSON('{"a":1}')).toEqual({ a: 1 })
+  })
+
+  it('parses JSON wrapped in ```json fence', () => {
+    const raw = 'Here is the result:\n```json\n{"a":1}\n```\nDone.'
+    expect(extractJSON(raw)).toEqual({ a: 1 })
+  })
+
+  it('parses JSON wrapped in bare ``` fence', () => {
+    const raw = '```\n{"a":1}\n```'
+    expect(extractJSON(raw)).toEqual({ a: 1 })
+  })
+
+  it('extracts embedded JSON object from surrounding text', () => {
+    const raw = 'The answer is {"summary":"hello","score":5} as shown above.'
+    expect(extractJSON(raw)).toEqual({ summary: 'hello', score: 5 })
+  })
+
+  it('extracts JSON array', () => {
+    expect(extractJSON('[1,2,3]')).toEqual([1, 2, 3])
+  })
+
+  it('extracts embedded JSON array from surrounding text', () => {
+    const raw = 'Here: [{"a":1},{"a":2}] end'
+    expect(extractJSON(raw)).toEqual([{ a: 1 }, { a: 2 }])
+  })
+
+  it('throws on non-JSON text', () => {
+    expect(() => extractJSON('just plain text')).toThrow('Failed to extract JSON')
+  })
+
+  it('throws on empty string', () => {
+    expect(() => extractJSON('')).toThrow('Failed to extract JSON')
+  })
+})
+
+// ---------------------------------------------------------------------------
+// validateOutput
+// ---------------------------------------------------------------------------
+
+describe('validateOutput', () => {
+  const schema = z.object({
+    summary: z.string(),
+    score: z.number().min(0).max(10),
+  })
+
+  it('returns validated data on success', () => {
+    const data = { summary: 'hello', score: 5 }
+    expect(validateOutput(schema, data)).toEqual(data)
+  })
+
+  it('throws on missing field', () => {
+    expect(() => validateOutput(schema, { summary: 'hello' })).toThrow(
+      'Output validation failed',
+    )
+  })
+
+  it('throws on wrong type', () => {
+    expect(() =>
+      validateOutput(schema, { summary: 'hello', score: 'not a number' }),
+    ).toThrow('Output validation failed')
+  })
+
+  it('throws on value out of range', () => {
+    expect(() =>
+      validateOutput(schema, { summary: 'hello', score: 99 }),
+    ).toThrow('Output validation failed')
+  })
+
+  it('applies Zod transforms', () => {
+    const transformSchema = z.object({
+      name: z.string().transform(s => s.toUpperCase()),
+    })
+    const result = validateOutput(transformSchema, { name: 'alice' })
+    expect(result).toEqual({ name: 'ALICE' })
+  })
+
+  it('strips unknown keys with strict schema', () => {
+    const strictSchema = z.object({ a: z.number() }).strict()
+    expect(() =>
+      validateOutput(strictSchema, { a: 1, b: 2 }),
+    ).toThrow('Output validation failed')
+  })
+
+  it('shows (root) for root-level errors', () => {
+    const stringSchema = z.string()
+    expect(() => validateOutput(stringSchema, 42)).toThrow('(root)')
+  })
+})
+
+// ---------------------------------------------------------------------------
+// buildStructuredOutputInstruction
+// ---------------------------------------------------------------------------
+
+describe('buildStructuredOutputInstruction', () => {
+  it('includes the JSON Schema representation', () => {
+    const schema = z.object({
+      summary: z.string(),
+      score: z.number(),
+    })
+    const instruction = buildStructuredOutputInstruction(schema)
+
+    expect(instruction).toContain('Output Format (REQUIRED)')
+    expect(instruction).toContain('"type": "object"')
+    expect(instruction).toContain('"summary"')
+    expect(instruction).toContain('"score"')
+    expect(instruction).toContain('ONLY valid JSON')
+  })
+
+  it('includes description from Zod schema', () => {
+    const schema = z.object({
+      name: z.string().describe('The person name'),
+    })
+    const instruction = buildStructuredOutputInstruction(schema)
+    expect(instruction).toContain('The person name')
+  })
+})
+
+// ---------------------------------------------------------------------------
+// Agent integration (mocked LLM)
+// ---------------------------------------------------------------------------
+
+/**
+ * Build an Agent with a mocked LLM adapter by injecting an AgentRunner
+ * directly into the Agent's private `runner` field, bypassing `createAdapter`.
+ */
+function buildMockAgent(config: AgentConfig, responses: string[]): Agent {
+  const adapter = mockAdapter(responses)
+  const registry = new ToolRegistry()
+  const executor = new ToolExecutor(registry)
+  const agent = new Agent(config, registry, executor)
+
+  // Inject a pre-built runner so `getRunner()` returns it without calling createAdapter.
+  const runner = new AgentRunner(adapter, registry, executor, {
+    model: config.model,
+    systemPrompt: config.systemPrompt,
+    maxTurns: config.maxTurns,
+    maxTokens: config.maxTokens,
+    temperature: config.temperature,
+    agentName: config.name,
+  })
+  ;(agent as any).runner = runner
+
+  return agent
+}
+
+describe('Agent structured output (end-to-end)', () => {
+  const schema = z.object({
+    summary: z.string(),
+    sentiment: z.enum(['positive', 'negative', 'neutral']),
+    confidence: z.number().min(0).max(1),
+  })
+
+  const baseConfig: AgentConfig = {
+    name: 'test-agent',
+    model: 'mock-model',
+    systemPrompt: 'You are a test agent.',
+    outputSchema: schema,
+  }
+
+  it('happy path: valid JSON on first attempt', async () => {
+    const validJSON = JSON.stringify({
+      summary: 'Great product',
+      sentiment: 'positive',
+      confidence: 0.95,
+    })
+
+    const agent = buildMockAgent(baseConfig, [validJSON])
+    const result = await agent.run('Analyze this review')
+
+    expect(result.success).toBe(true)
+    expect(result.structured).toEqual({
+      summary: 'Great product',
+      sentiment: 'positive',
+      confidence: 0.95,
+    })
+  })
+
+  it('retry: invalid first attempt, valid second attempt', async () => {
+    const invalidJSON = JSON.stringify({
+      summary: 'Great product',
+      sentiment: 'INVALID_VALUE',
+      confidence: 0.95,
+    })
+    const validJSON = JSON.stringify({
+      summary: 'Great product',
+      sentiment: 'positive',
+      confidence: 0.95,
+    })
+
+    const agent = buildMockAgent(baseConfig, [invalidJSON, validJSON])
+    const result = await agent.run('Analyze this review')
+
+    expect(result.success).toBe(true)
+    expect(result.structured).toEqual({
+      summary: 'Great product',
+      sentiment: 'positive',
+      confidence: 0.95,
+    })
+    // Token usage should reflect both attempts
+    expect(result.tokenUsage.input_tokens).toBe(20) // 10 + 10
+    expect(result.tokenUsage.output_tokens).toBe(40) // 20 + 20
+  })
+
+  it('both attempts fail: success=false, structured=undefined', async () => {
+    const bad1 = '{"summary": "ok", "sentiment": "WRONG"}'
+    const bad2 = '{"summary": "ok", "sentiment": "ALSO_WRONG"}'
+
+    const agent = buildMockAgent(baseConfig, [bad1, bad2])
+    const result = await agent.run('Analyze this review')
+
+    expect(result.success).toBe(false)
+    expect(result.structured).toBeUndefined()
+  })
+
+  it('no outputSchema: original behavior, structured is undefined', async () => {
+    const configNoSchema: AgentConfig = {
+      name: 'plain-agent',
+      model: 'mock-model',
+      systemPrompt: 'You are a test agent.',
+    }
+
+    const agent = buildMockAgent(configNoSchema, ['Just plain text output'])
+    const result = await agent.run('Hello')
+
+    expect(result.success).toBe(true)
+    expect(result.output).toBe('Just plain text output')
+    expect(result.structured).toBeUndefined()
+  })
+
+  it('handles JSON wrapped in markdown fence', async () => {
+    const fenced = '```json\n{"summary":"ok","sentiment":"neutral","confidence":0.5}\n```'
+
+    const agent = buildMockAgent(baseConfig, [fenced])
+    const result = await agent.run('Analyze')
+
+    expect(result.success).toBe(true)
+    expect(result.structured).toEqual({
+      summary: 'ok',
+      sentiment: 'neutral',
+      confidence: 0.5,
+    })
+  })
+
+  it('non-JSON output triggers retry, valid JSON on retry succeeds', async () => {
+    const nonJSON = 'I am not sure how to analyze this.'
+    const validJSON = JSON.stringify({
+      summary: 'Uncertain',
+      sentiment: 'neutral',
+      confidence: 0.1,
+    })
+
+    const agent = buildMockAgent(baseConfig, [nonJSON, validJSON])
+    const result = await agent.run('Analyze this review')
+
+    expect(result.success).toBe(true)
+    expect(result.structured).toEqual({
+      summary: 'Uncertain',
+      sentiment: 'neutral',
+      confidence: 0.1,
+    })
+  })
+
+  it('non-JSON output on both attempts: success=false', async () => {
+    const agent = buildMockAgent(baseConfig, [
+      'Sorry, I cannot do that.',
+      'Still cannot do it.',
+    ])
+    const result = await agent.run('Analyze this review')
+
+    expect(result.success).toBe(false)
+    expect(result.structured).toBeUndefined()
+  })
+
+  it('token usage on first-attempt success reflects single call only', async () => {
+    const validJSON = JSON.stringify({
+      summary: 'Good',
+      sentiment: 'positive',
+      confidence: 0.9,
+    })
+
+    const agent = buildMockAgent(baseConfig, [validJSON])
+    const result = await agent.run('Analyze')
+
+    expect(result.tokenUsage.input_tokens).toBe(10)
+    expect(result.tokenUsage.output_tokens).toBe(20)
+  })
+})