diff --git a/src/agent/agent.ts b/src/agent/agent.ts index 4ef392e..a178e37 100644 --- a/src/agent/agent.ts +++ b/src/agent/agent.ts @@ -35,7 +35,12 @@ import type { import type { ToolDefinition as FrameworkToolDefinition, ToolRegistry } from '../tool/framework.js' import type { ToolExecutor } from '../tool/executor.js' import { createAdapter } from '../llm/adapter.js' -import { AgentRunner, type RunnerOptions, type RunOptions } from './runner.js' +import { AgentRunner, type RunnerOptions, type RunOptions, type RunResult } from './runner.js' +import { + buildStructuredOutputInstruction, + extractJSON, + validateOutput, +} from './structured-output.js' // --------------------------------------------------------------------------- // Internal helpers @@ -111,9 +116,18 @@ export class Agent { const provider = this.config.provider ?? 'anthropic' const adapter = await createAdapter(provider, this.config.apiKey, this.config.baseURL) + // Append structured-output instructions when an outputSchema is configured. + let effectiveSystemPrompt = this.config.systemPrompt + if (this.config.outputSchema) { + const instruction = buildStructuredOutputInstruction(this.config.outputSchema) + effectiveSystemPrompt = effectiveSystemPrompt + ? effectiveSystemPrompt + '\n' + instruction + : instruction + } + const runnerOptions: RunnerOptions = { model: this.config.model, - systemPrompt: this.config.systemPrompt, + systemPrompt: effectiveSystemPrompt, maxTurns: this.config.maxTurns, maxTokens: this.config.maxTokens, temperature: this.config.temperature, @@ -264,10 +278,19 @@ export class Agent { } const result = await runner.run(messages, runOptions) - this.state.tokenUsage = addUsage(this.state.tokenUsage, result.tokenUsage) - this.transitionTo('completed') + // --- Structured output validation --- + if (this.config.outputSchema) { + return this.validateStructuredOutput( + messages, + result, + runner, + runOptions, + ) + } + + this.transitionTo('completed') return this.toAgentRunResult(result, true) } catch (err) { const error = err instanceof Error ? err : new Error(String(err)) @@ -279,6 +302,86 @@ export class Agent { messages: [], tokenUsage: ZERO_USAGE, toolCalls: [], + structured: undefined, + } + } + } + + /** + * Validate agent output against the configured `outputSchema`. + * On first validation failure, retry once with error feedback. + */ + private async validateStructuredOutput( + originalMessages: LLMMessage[], + result: RunResult, + runner: AgentRunner, + runOptions: RunOptions, + ): Promise { + const schema = this.config.outputSchema! + + // First attempt + let firstAttemptError: unknown + try { + const parsed = extractJSON(result.output) + const validated = validateOutput(schema, parsed) + this.transitionTo('completed') + return this.toAgentRunResult(result, true, validated) + } catch (e) { + firstAttemptError = e + } + + // Retry: send full context + error feedback + const errorMsg = firstAttemptError instanceof Error + ? firstAttemptError.message + : String(firstAttemptError) + + const retryMessages: LLMMessage[] = [ + ...originalMessages, + ...result.messages, + { + role: 'user' as const, + content: [{ + type: 'text' as const, + text: [ + 'Your previous response did not produce valid JSON matching the required schema.', + '', + `Error: ${errorMsg}`, + '', + 'Please try again. Respond with ONLY valid JSON, no other text.', + ].join('\n'), + }], + }, + ] + + const retryResult = await runner.run(retryMessages, runOptions) + this.state.tokenUsage = addUsage(this.state.tokenUsage, retryResult.tokenUsage) + + const mergedTokenUsage = addUsage(result.tokenUsage, retryResult.tokenUsage) + const mergedMessages = [...result.messages, ...retryResult.messages] + const mergedToolCalls = [...result.toolCalls, ...retryResult.toolCalls] + + try { + const parsed = extractJSON(retryResult.output) + const validated = validateOutput(schema, parsed) + this.transitionTo('completed') + return { + success: true, + output: retryResult.output, + messages: mergedMessages, + tokenUsage: mergedTokenUsage, + toolCalls: mergedToolCalls, + structured: validated, + } + } catch { + // Retry also failed + this.transitionTo('completed') + return { + success: false, + output: retryResult.output, + messages: mergedMessages, + tokenUsage: mergedTokenUsage, + toolCalls: mergedToolCalls, + structured: undefined, } } } @@ -331,8 +434,9 @@ export class Agent { // ------------------------------------------------------------------------- private toAgentRunResult( - result: import('./runner.js').RunResult, + result: RunResult, success: boolean, + structured?: unknown, ): AgentRunResult { return { success, @@ -340,6 +444,7 @@ export class Agent { messages: result.messages, tokenUsage: result.tokenUsage, toolCalls: result.toolCalls, + structured, } } diff --git a/src/agent/structured-output.ts b/src/agent/structured-output.ts new file mode 100644 index 0000000..3da0f06 --- /dev/null +++ b/src/agent/structured-output.ts @@ -0,0 +1,126 @@ +/** + * @fileoverview Structured output utilities for agent responses. + * + * Provides JSON extraction, Zod validation, and system-prompt injection so + * that agents can return typed, schema-validated output. + */ + +import { type ZodSchema } from 'zod' +import { zodToJsonSchema } from '../tool/framework.js' + +// --------------------------------------------------------------------------- +// System-prompt instruction builder +// --------------------------------------------------------------------------- + +/** + * Build a JSON-mode instruction block to append to the agent's system prompt. + * + * Converts the Zod schema to JSON Schema and formats it as a clear directive + * for the LLM to respond with valid JSON matching the schema. + */ +export function buildStructuredOutputInstruction(schema: ZodSchema): string { + const jsonSchema = zodToJsonSchema(schema) + return [ + '', + '## Output Format (REQUIRED)', + 'You MUST respond with ONLY valid JSON that conforms to the following JSON Schema.', + 'Do NOT include any text, markdown fences, or explanation outside the JSON object.', + 'Do NOT wrap the JSON in ```json code fences.', + '', + '```', + JSON.stringify(jsonSchema, null, 2), + '```', + ].join('\n') +} + +// --------------------------------------------------------------------------- +// JSON extraction +// --------------------------------------------------------------------------- + +/** + * Attempt to extract and parse JSON from the agent's raw text output. + * + * Handles three cases in order: + * 1. The output is already valid JSON (ideal case) + * 2. The output contains a ` ```json ` fenced block + * 3. The output contains a bare JSON object/array (first `{`/`[` to last `}`/`]`) + * + * @throws {Error} when no valid JSON can be extracted + */ +export function extractJSON(raw: string): unknown { + const trimmed = raw.trim() + + // Case 1: Direct parse + try { + return JSON.parse(trimmed) + } catch { + // Continue to fallback strategies + } + + // Case 2a: Prefer ```json tagged fence + const jsonFenceMatch = trimmed.match(/```json\s*([\s\S]*?)```/) + if (jsonFenceMatch?.[1]) { + try { + return JSON.parse(jsonFenceMatch[1].trim()) + } catch { + // Continue + } + } + + // Case 2b: Fall back to bare ``` fence + const bareFenceMatch = trimmed.match(/```\s*([\s\S]*?)```/) + if (bareFenceMatch?.[1]) { + try { + return JSON.parse(bareFenceMatch[1].trim()) + } catch { + // Continue + } + } + + // Case 3: Find first { to last } (object) + const objStart = trimmed.indexOf('{') + const objEnd = trimmed.lastIndexOf('}') + if (objStart !== -1 && objEnd > objStart) { + try { + return JSON.parse(trimmed.slice(objStart, objEnd + 1)) + } catch { + // Fall through + } + } + + // Case 3b: Find first [ to last ] (array) + const arrStart = trimmed.indexOf('[') + const arrEnd = trimmed.lastIndexOf(']') + if (arrStart !== -1 && arrEnd > arrStart) { + try { + return JSON.parse(trimmed.slice(arrStart, arrEnd + 1)) + } catch { + // Fall through + } + } + + throw new Error( + `Failed to extract JSON from output. Raw output begins with: "${trimmed.slice(0, 100)}"`, + ) +} + +// --------------------------------------------------------------------------- +// Zod validation +// --------------------------------------------------------------------------- + +/** + * Validate a parsed JSON value against a Zod schema. + * + * @returns The validated (and potentially transformed) value on success. + * @throws {Error} with a human-readable Zod error message on failure. + */ +export function validateOutput(schema: ZodSchema, data: unknown): unknown { + const result = schema.safeParse(data) + if (result.success) { + return result.data + } + const issues = result.error.issues + .map(issue => ` - ${issue.path.length > 0 ? issue.path.join('.') : '(root)'}: ${issue.message}`) + .join('\n') + throw new Error(`Output validation failed:\n${issues}`) +} diff --git a/src/index.ts b/src/index.ts index 814996f..fb8b6bf 100644 --- a/src/index.ts +++ b/src/index.ts @@ -63,6 +63,7 @@ export type { SchedulingStrategy } from './orchestrator/scheduler.js' // --------------------------------------------------------------------------- export { Agent } from './agent/agent.js' +export { buildStructuredOutputInstruction, extractJSON, validateOutput } from './agent/structured-output.js' export { AgentPool, Semaphore } from './agent/pool.js' export type { PoolStatus } from './agent/pool.js' diff --git a/src/orchestrator/orchestrator.ts b/src/orchestrator/orchestrator.ts index 1da8fb5..76e060f 100644 --- a/src/orchestrator/orchestrator.ts +++ b/src/orchestrator/orchestrator.ts @@ -837,13 +837,15 @@ export class OpenMultiAgent { if (!existing) { collapsed.set(agentName, result) } else { - // Merge multiple results for the same agent (multi-task case) + // Merge multiple results for the same agent (multi-task case). + // Keep the latest `structured` value (last completed task wins). collapsed.set(agentName, { success: existing.success && result.success, output: [existing.output, result.output].filter(Boolean).join('\n\n---\n\n'), messages: [...existing.messages, ...result.messages], tokenUsage: addUsage(existing.tokenUsage, result.tokenUsage), toolCalls: [...existing.toolCalls, ...result.toolCalls], + structured: result.structured ?? existing.structured, }) } diff --git a/src/types.ts b/src/types.ts index bd44065..6e76640 100644 --- a/src/types.ts +++ b/src/types.ts @@ -201,6 +201,12 @@ export interface AgentConfig { readonly maxTurns?: number readonly maxTokens?: number readonly temperature?: number + /** + * Optional Zod schema for structured output. When set, the agent's final + * output is parsed as JSON and validated against this schema. A single + * retry with error feedback is attempted on validation failure. + */ + readonly outputSchema?: ZodSchema } /** Lifecycle state tracked during an agent run. */ @@ -227,6 +233,12 @@ export interface AgentRunResult { readonly messages: LLMMessage[] readonly tokenUsage: TokenUsage readonly toolCalls: ToolCallRecord[] + /** + * Parsed and validated structured output when `outputSchema` is set on the + * agent config. `undefined` when no schema is configured or validation + * failed after retry. + */ + readonly structured?: unknown } // --------------------------------------------------------------------------- diff --git a/tests/structured-output.test.ts b/tests/structured-output.test.ts new file mode 100644 index 0000000..27f9201 --- /dev/null +++ b/tests/structured-output.test.ts @@ -0,0 +1,331 @@ +import { describe, it, expect } from 'vitest' +import { z } from 'zod' +import { + buildStructuredOutputInstruction, + extractJSON, + validateOutput, +} from '../src/agent/structured-output.js' +import { Agent } from '../src/agent/agent.js' +import { AgentRunner } from '../src/agent/runner.js' +import { ToolRegistry } from '../src/tool/framework.js' +import { ToolExecutor } from '../src/tool/executor.js' +import type { AgentConfig, LLMAdapter, LLMResponse } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Mock LLM adapter factory +// --------------------------------------------------------------------------- + +function mockAdapter(responses: string[]): LLMAdapter { + let callIndex = 0 + return { + name: 'mock', + async chat() { + const text = responses[callIndex++] ?? '' + return { + id: `mock-${callIndex}`, + content: [{ type: 'text' as const, text }], + model: 'mock-model', + stop_reason: 'end_turn', + usage: { input_tokens: 10, output_tokens: 20 }, + } satisfies LLMResponse + }, + async *stream() { + /* unused in these tests */ + }, + } +} + +// --------------------------------------------------------------------------- +// extractJSON +// --------------------------------------------------------------------------- + +describe('extractJSON', () => { + it('parses clean JSON', () => { + expect(extractJSON('{"a":1}')).toEqual({ a: 1 }) + }) + + it('parses JSON wrapped in ```json fence', () => { + const raw = 'Here is the result:\n```json\n{"a":1}\n```\nDone.' + expect(extractJSON(raw)).toEqual({ a: 1 }) + }) + + it('parses JSON wrapped in bare ``` fence', () => { + const raw = '```\n{"a":1}\n```' + expect(extractJSON(raw)).toEqual({ a: 1 }) + }) + + it('extracts embedded JSON object from surrounding text', () => { + const raw = 'The answer is {"summary":"hello","score":5} as shown above.' + expect(extractJSON(raw)).toEqual({ summary: 'hello', score: 5 }) + }) + + it('extracts JSON array', () => { + expect(extractJSON('[1,2,3]')).toEqual([1, 2, 3]) + }) + + it('extracts embedded JSON array from surrounding text', () => { + const raw = 'Here: [{"a":1},{"a":2}] end' + expect(extractJSON(raw)).toEqual([{ a: 1 }, { a: 2 }]) + }) + + it('throws on non-JSON text', () => { + expect(() => extractJSON('just plain text')).toThrow('Failed to extract JSON') + }) + + it('throws on empty string', () => { + expect(() => extractJSON('')).toThrow('Failed to extract JSON') + }) +}) + +// --------------------------------------------------------------------------- +// validateOutput +// --------------------------------------------------------------------------- + +describe('validateOutput', () => { + const schema = z.object({ + summary: z.string(), + score: z.number().min(0).max(10), + }) + + it('returns validated data on success', () => { + const data = { summary: 'hello', score: 5 } + expect(validateOutput(schema, data)).toEqual(data) + }) + + it('throws on missing field', () => { + expect(() => validateOutput(schema, { summary: 'hello' })).toThrow( + 'Output validation failed', + ) + }) + + it('throws on wrong type', () => { + expect(() => + validateOutput(schema, { summary: 'hello', score: 'not a number' }), + ).toThrow('Output validation failed') + }) + + it('throws on value out of range', () => { + expect(() => + validateOutput(schema, { summary: 'hello', score: 99 }), + ).toThrow('Output validation failed') + }) + + it('applies Zod transforms', () => { + const transformSchema = z.object({ + name: z.string().transform(s => s.toUpperCase()), + }) + const result = validateOutput(transformSchema, { name: 'alice' }) + expect(result).toEqual({ name: 'ALICE' }) + }) + + it('strips unknown keys with strict schema', () => { + const strictSchema = z.object({ a: z.number() }).strict() + expect(() => + validateOutput(strictSchema, { a: 1, b: 2 }), + ).toThrow('Output validation failed') + }) + + it('shows (root) for root-level errors', () => { + const stringSchema = z.string() + expect(() => validateOutput(stringSchema, 42)).toThrow('(root)') + }) +}) + +// --------------------------------------------------------------------------- +// buildStructuredOutputInstruction +// --------------------------------------------------------------------------- + +describe('buildStructuredOutputInstruction', () => { + it('includes the JSON Schema representation', () => { + const schema = z.object({ + summary: z.string(), + score: z.number(), + }) + const instruction = buildStructuredOutputInstruction(schema) + + expect(instruction).toContain('Output Format (REQUIRED)') + expect(instruction).toContain('"type": "object"') + expect(instruction).toContain('"summary"') + expect(instruction).toContain('"score"') + expect(instruction).toContain('ONLY valid JSON') + }) + + it('includes description from Zod schema', () => { + const schema = z.object({ + name: z.string().describe('The person name'), + }) + const instruction = buildStructuredOutputInstruction(schema) + expect(instruction).toContain('The person name') + }) +}) + +// --------------------------------------------------------------------------- +// Agent integration (mocked LLM) +// --------------------------------------------------------------------------- + +/** + * Build an Agent with a mocked LLM adapter by injecting an AgentRunner + * directly into the Agent's private `runner` field, bypassing `createAdapter`. + */ +function buildMockAgent(config: AgentConfig, responses: string[]): Agent { + const adapter = mockAdapter(responses) + const registry = new ToolRegistry() + const executor = new ToolExecutor(registry) + const agent = new Agent(config, registry, executor) + + // Inject a pre-built runner so `getRunner()` returns it without calling createAdapter. + const runner = new AgentRunner(adapter, registry, executor, { + model: config.model, + systemPrompt: config.systemPrompt, + maxTurns: config.maxTurns, + maxTokens: config.maxTokens, + temperature: config.temperature, + agentName: config.name, + }) + ;(agent as any).runner = runner + + return agent +} + +describe('Agent structured output (end-to-end)', () => { + const schema = z.object({ + summary: z.string(), + sentiment: z.enum(['positive', 'negative', 'neutral']), + confidence: z.number().min(0).max(1), + }) + + const baseConfig: AgentConfig = { + name: 'test-agent', + model: 'mock-model', + systemPrompt: 'You are a test agent.', + outputSchema: schema, + } + + it('happy path: valid JSON on first attempt', async () => { + const validJSON = JSON.stringify({ + summary: 'Great product', + sentiment: 'positive', + confidence: 0.95, + }) + + const agent = buildMockAgent(baseConfig, [validJSON]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(true) + expect(result.structured).toEqual({ + summary: 'Great product', + sentiment: 'positive', + confidence: 0.95, + }) + }) + + it('retry: invalid first attempt, valid second attempt', async () => { + const invalidJSON = JSON.stringify({ + summary: 'Great product', + sentiment: 'INVALID_VALUE', + confidence: 0.95, + }) + const validJSON = JSON.stringify({ + summary: 'Great product', + sentiment: 'positive', + confidence: 0.95, + }) + + const agent = buildMockAgent(baseConfig, [invalidJSON, validJSON]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(true) + expect(result.structured).toEqual({ + summary: 'Great product', + sentiment: 'positive', + confidence: 0.95, + }) + // Token usage should reflect both attempts + expect(result.tokenUsage.input_tokens).toBe(20) // 10 + 10 + expect(result.tokenUsage.output_tokens).toBe(40) // 20 + 20 + }) + + it('both attempts fail: success=false, structured=undefined', async () => { + const bad1 = '{"summary": "ok", "sentiment": "WRONG"}' + const bad2 = '{"summary": "ok", "sentiment": "ALSO_WRONG"}' + + const agent = buildMockAgent(baseConfig, [bad1, bad2]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(false) + expect(result.structured).toBeUndefined() + }) + + it('no outputSchema: original behavior, structured is undefined', async () => { + const configNoSchema: AgentConfig = { + name: 'plain-agent', + model: 'mock-model', + systemPrompt: 'You are a test agent.', + } + + const agent = buildMockAgent(configNoSchema, ['Just plain text output']) + const result = await agent.run('Hello') + + expect(result.success).toBe(true) + expect(result.output).toBe('Just plain text output') + expect(result.structured).toBeUndefined() + }) + + it('handles JSON wrapped in markdown fence', async () => { + const fenced = '```json\n{"summary":"ok","sentiment":"neutral","confidence":0.5}\n```' + + const agent = buildMockAgent(baseConfig, [fenced]) + const result = await agent.run('Analyze') + + expect(result.success).toBe(true) + expect(result.structured).toEqual({ + summary: 'ok', + sentiment: 'neutral', + confidence: 0.5, + }) + }) + + it('non-JSON output triggers retry, valid JSON on retry succeeds', async () => { + const nonJSON = 'I am not sure how to analyze this.' + const validJSON = JSON.stringify({ + summary: 'Uncertain', + sentiment: 'neutral', + confidence: 0.1, + }) + + const agent = buildMockAgent(baseConfig, [nonJSON, validJSON]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(true) + expect(result.structured).toEqual({ + summary: 'Uncertain', + sentiment: 'neutral', + confidence: 0.1, + }) + }) + + it('non-JSON output on both attempts: success=false', async () => { + const agent = buildMockAgent(baseConfig, [ + 'Sorry, I cannot do that.', + 'Still cannot do it.', + ]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(false) + expect(result.structured).toBeUndefined() + }) + + it('token usage on first-attempt success reflects single call only', async () => { + const validJSON = JSON.stringify({ + summary: 'Good', + sentiment: 'positive', + confidence: 0.9, + }) + + const agent = buildMockAgent(baseConfig, [validJSON]) + const result = await agent.run('Analyze') + + expect(result.tokenUsage.input_tokens).toBe(10) + expect(result.tokenUsage.output_tokens).toBe(20) + }) +})