open-multi-agent/tests/structured-output.test.ts

import { describe, it, expect } from 'vitest'
import { z } from 'zod'
import {
  buildStructuredOutputInstruction,
  extractJSON,
  validateOutput,
} from '../src/agent/structured-output.js'
import { Agent } from '../src/agent/agent.js'
import { AgentRunner } from '../src/agent/runner.js'
import { ToolRegistry } from '../src/tool/framework.js'
import { ToolExecutor } from '../src/tool/executor.js'
import type { AgentConfig, LLMAdapter, LLMResponse } from '../src/types.js'

// ---------------------------------------------------------------------------
// Mock LLM adapter factory
// ---------------------------------------------------------------------------

function mockAdapter(responses: string[]): LLMAdapter {
  let callIndex = 0
  return {
    name: 'mock',
    async chat() {
      const text = responses[callIndex++] ?? ''
      return {
        id: `mock-${callIndex}`,
        content: [{ type: 'text' as const, text }],
        model: 'mock-model',
        stop_reason: 'end_turn',
        usage: { input_tokens: 10, output_tokens: 20 },
      } satisfies LLMResponse
    },
    async *stream() {
      /* unused in these tests */
    },
  }
}

// ---------------------------------------------------------------------------
// extractJSON
// ---------------------------------------------------------------------------

describe('extractJSON', () => {
  it('parses clean JSON', () => {
    expect(extractJSON('{"a":1}')).toEqual({ a: 1 })
  })

  it('parses JSON wrapped in ```json fence', () => {
    const raw = 'Here is the result:\n```json\n{"a":1}\n```\nDone.'
    expect(extractJSON(raw)).toEqual({ a: 1 })
  })

  it('parses JSON wrapped in bare ``` fence', () => {
    const raw = '```\n{"a":1}\n```'
    expect(extractJSON(raw)).toEqual({ a: 1 })
  })

  it('extracts embedded JSON object from surrounding text', () => {
    const raw = 'The answer is {"summary":"hello","score":5} as shown above.'
    expect(extractJSON(raw)).toEqual({ summary: 'hello', score: 5 })
  })

  it('extracts JSON array', () => {
    expect(extractJSON('[1,2,3]')).toEqual([1, 2, 3])
  })

  it('extracts embedded JSON array from surrounding text', () => {
    const raw = 'Here: [{"a":1},{"a":2}] end'
    expect(extractJSON(raw)).toEqual([{ a: 1 }, { a: 2 }])
  })

  it('throws on non-JSON text', () => {
    expect(() => extractJSON('just plain text')).toThrow('Failed to extract JSON')
  })

  it('throws on empty string', () => {
    expect(() => extractJSON('')).toThrow('Failed to extract JSON')
  })
})

// ---------------------------------------------------------------------------
// validateOutput
// ---------------------------------------------------------------------------

describe('validateOutput', () => {
  const schema = z.object({
    summary: z.string(),
    score: z.number().min(0).max(10),
  })

  it('returns validated data on success', () => {
    const data = { summary: 'hello', score: 5 }
    expect(validateOutput(schema, data)).toEqual(data)
  })

  it('throws on missing field', () => {
    expect(() => validateOutput(schema, { summary: 'hello' })).toThrow(
      'Output validation failed',
    )
  })

  it('throws on wrong type', () => {
    expect(() =>
      validateOutput(schema, { summary: 'hello', score: 'not a number' }),
    ).toThrow('Output validation failed')
  })

  it('throws on value out of range', () => {
    expect(() =>
      validateOutput(schema, { summary: 'hello', score: 99 }),
    ).toThrow('Output validation failed')
  })

  it('applies Zod transforms', () => {
    const transformSchema = z.object({
      name: z.string().transform(s => s.toUpperCase()),
    })
    const result = validateOutput(transformSchema, { name: 'alice' })
    expect(result).toEqual({ name: 'ALICE' })
  })

  it('strips unknown keys with strict schema', () => {
    const strictSchema = z.object({ a: z.number() }).strict()
    expect(() =>
      validateOutput(strictSchema, { a: 1, b: 2 }),
    ).toThrow('Output validation failed')
  })

  it('shows (root) for root-level errors', () => {
    const stringSchema = z.string()
    expect(() => validateOutput(stringSchema, 42)).toThrow('(root)')
  })
})

// ---------------------------------------------------------------------------
// buildStructuredOutputInstruction
// ---------------------------------------------------------------------------

describe('buildStructuredOutputInstruction', () => {
  it('includes the JSON Schema representation', () => {
    const schema = z.object({
      summary: z.string(),
      score: z.number(),
    })
    const instruction = buildStructuredOutputInstruction(schema)

    expect(instruction).toContain('Output Format (REQUIRED)')
    expect(instruction).toContain('"type": "object"')
    expect(instruction).toContain('"summary"')
    expect(instruction).toContain('"score"')
    expect(instruction).toContain('ONLY valid JSON')
  })

  it('includes description from Zod schema', () => {
    const schema = z.object({
      name: z.string().describe('The person name'),
    })
    const instruction = buildStructuredOutputInstruction(schema)
    expect(instruction).toContain('The person name')
  })
})

// ---------------------------------------------------------------------------
// Agent integration (mocked LLM)
// ---------------------------------------------------------------------------

/**
 * Build an Agent with a mocked LLM adapter by injecting an AgentRunner
 * directly into the Agent's private `runner` field, bypassing `createAdapter`.
 */
function buildMockAgent(config: AgentConfig, responses: string[]): Agent {
  const adapter = mockAdapter(responses)
  const registry = new ToolRegistry()
  const executor = new ToolExecutor(registry)
  const agent = new Agent(config, registry, executor)

  // Inject a pre-built runner so `getRunner()` returns it without calling createAdapter.
  const runner = new AgentRunner(adapter, registry, executor, {
    model: config.model,
    systemPrompt: config.systemPrompt,
    maxTurns: config.maxTurns,
    maxTokens: config.maxTokens,
    temperature: config.temperature,
    agentName: config.name,
  })
  ;(agent as any).runner = runner

  return agent
}

describe('Agent structured output (end-to-end)', () => {
  const schema = z.object({
    summary: z.string(),
    sentiment: z.enum(['positive', 'negative', 'neutral']),
    confidence: z.number().min(0).max(1),
  })

  const baseConfig: AgentConfig = {
    name: 'test-agent',
    model: 'mock-model',
    systemPrompt: 'You are a test agent.',
    outputSchema: schema,
  }

  it('happy path: valid JSON on first attempt', async () => {
    const validJSON = JSON.stringify({
      summary: 'Great product',
      sentiment: 'positive',
      confidence: 0.95,
    })

    const agent = buildMockAgent(baseConfig, [validJSON])
    const result = await agent.run('Analyze this review')

    expect(result.success).toBe(true)
    expect(result.structured).toEqual({
      summary: 'Great product',
      sentiment: 'positive',
      confidence: 0.95,
    })
  })

  it('retry: invalid first attempt, valid second attempt', async () => {
    const invalidJSON = JSON.stringify({
      summary: 'Great product',
      sentiment: 'INVALID_VALUE',
      confidence: 0.95,
    })
    const validJSON = JSON.stringify({
      summary: 'Great product',
      sentiment: 'positive',
      confidence: 0.95,
    })

    const agent = buildMockAgent(baseConfig, [invalidJSON, validJSON])
    const result = await agent.run('Analyze this review')

    expect(result.success).toBe(true)
    expect(result.structured).toEqual({
      summary: 'Great product',
      sentiment: 'positive',
      confidence: 0.95,
    })
    // Token usage should reflect both attempts
    expect(result.tokenUsage.input_tokens).toBe(20) // 10 + 10
    expect(result.tokenUsage.output_tokens).toBe(40) // 20 + 20
  })

  it('both attempts fail: success=false, structured=undefined', async () => {
    const bad1 = '{"summary": "ok", "sentiment": "WRONG"}'
    const bad2 = '{"summary": "ok", "sentiment": "ALSO_WRONG"}'

    const agent = buildMockAgent(baseConfig, [bad1, bad2])
    const result = await agent.run('Analyze this review')

    expect(result.success).toBe(false)
    expect(result.structured).toBeUndefined()
  })

  it('no outputSchema: original behavior, structured is undefined', async () => {
    const configNoSchema: AgentConfig = {
      name: 'plain-agent',
      model: 'mock-model',
      systemPrompt: 'You are a test agent.',
    }

    const agent = buildMockAgent(configNoSchema, ['Just plain text output'])
    const result = await agent.run('Hello')

    expect(result.success).toBe(true)
    expect(result.output).toBe('Just plain text output')
    expect(result.structured).toBeUndefined()
  })

  it('handles JSON wrapped in markdown fence', async () => {
    const fenced = '```json\n{"summary":"ok","sentiment":"neutral","confidence":0.5}\n```'

    const agent = buildMockAgent(baseConfig, [fenced])
    const result = await agent.run('Analyze')

    expect(result.success).toBe(true)
    expect(result.structured).toEqual({
      summary: 'ok',
      sentiment: 'neutral',
      confidence: 0.5,
    })
  })

  it('non-JSON output triggers retry, valid JSON on retry succeeds', async () => {
    const nonJSON = 'I am not sure how to analyze this.'
    const validJSON = JSON.stringify({
      summary: 'Uncertain',
      sentiment: 'neutral',
      confidence: 0.1,
    })

    const agent = buildMockAgent(baseConfig, [nonJSON, validJSON])
    const result = await agent.run('Analyze this review')

    expect(result.success).toBe(true)
    expect(result.structured).toEqual({
      summary: 'Uncertain',
      sentiment: 'neutral',
      confidence: 0.1,
    })
  })

  it('non-JSON output on both attempts: success=false', async () => {
    const agent = buildMockAgent(baseConfig, [
      'Sorry, I cannot do that.',
      'Still cannot do it.',
    ])
    const result = await agent.run('Analyze this review')

    expect(result.success).toBe(false)
    expect(result.structured).toBeUndefined()
  })

  it('token usage on first-attempt success reflects single call only', async () => {
    const validJSON = JSON.stringify({
      summary: 'Good',
      sentiment: 'positive',
      confidence: 0.9,
    })

    const agent = buildMockAgent(baseConfig, [validJSON])
    const result = await agent.run('Analyze')

    expect(result.tokenUsage.input_tokens).toBe(10)
    expect(result.tokenUsage.output_tokens).toBe(20)
  })
})