open-multi-agent/tests/trace.test.ts

import { describe, it, expect, vi } from 'vitest'
import { z } from 'zod'
import { Agent } from '../src/agent/agent.js'
import { AgentRunner, type RunOptions } from '../src/agent/runner.js'
import { ToolRegistry, defineTool } from '../src/tool/framework.js'
import { ToolExecutor } from '../src/tool/executor.js'
import { executeWithRetry } from '../src/orchestrator/orchestrator.js'
import { emitTrace, generateRunId } from '../src/utils/trace.js'
import { createTask } from '../src/task/task.js'
import type {
  AgentConfig,
  AgentRunResult,
  LLMAdapter,
  LLMResponse,
  TraceEvent,
} from '../src/types.js'

// ---------------------------------------------------------------------------
// Mock adapters
// ---------------------------------------------------------------------------

function mockAdapter(responses: LLMResponse[]): LLMAdapter {
  let callIndex = 0
  return {
    name: 'mock',
    async chat() {
      return responses[callIndex++]!
    },
    async *stream() {
      /* unused */
    },
  }
}

function textResponse(text: string): LLMResponse {
  return {
    id: `resp-${Math.random().toString(36).slice(2)}`,
    content: [{ type: 'text' as const, text }],
    model: 'mock-model',
    stop_reason: 'end_turn',
    usage: { input_tokens: 10, output_tokens: 20 },
  }
}

function toolUseResponse(toolName: string, input: Record<string, unknown>): LLMResponse {
  return {
    id: `resp-${Math.random().toString(36).slice(2)}`,
    content: [
      {
        type: 'tool_use' as const,
        id: `tu-${Math.random().toString(36).slice(2)}`,
        name: toolName,
        input,
      },
    ],
    model: 'mock-model',
    stop_reason: 'tool_use',
    usage: { input_tokens: 15, output_tokens: 25 },
  }
}

function buildMockAgent(
  config: AgentConfig,
  responses: LLMResponse[],
  registry?: ToolRegistry,
  executor?: ToolExecutor,
): Agent {
  const reg = registry ?? new ToolRegistry()
  const exec = executor ?? new ToolExecutor(reg)
  const adapter = mockAdapter(responses)
  const agent = new Agent(config, reg, exec)

  const runner = new AgentRunner(adapter, reg, exec, {
    model: config.model,
    systemPrompt: config.systemPrompt,
    maxTurns: config.maxTurns,
    maxTokens: config.maxTokens,
    temperature: config.temperature,
    agentName: config.name,
  })
  ;(agent as any).runner = runner

  return agent
}

// ---------------------------------------------------------------------------
// emitTrace helper
// ---------------------------------------------------------------------------

describe('emitTrace', () => {
  it('does nothing when fn is undefined', () => {
    // Should not throw
    emitTrace(undefined, {
      type: 'agent',
      runId: 'r1',
      agent: 'a',
      turns: 1,
      tokens: { input_tokens: 0, output_tokens: 0 },
      toolCalls: 0,
      startMs: 0,
      endMs: 0,
      durationMs: 0,
    })
  })

  it('calls fn with the event', () => {
    const fn = vi.fn()
    const event: TraceEvent = {
      type: 'agent',
      runId: 'r1',
      agent: 'a',
      turns: 1,
      tokens: { input_tokens: 0, output_tokens: 0 },
      toolCalls: 0,
      startMs: 0,
      endMs: 0,
      durationMs: 0,
    }
    emitTrace(fn, event)
    expect(fn).toHaveBeenCalledWith(event)
  })

  it('swallows errors thrown by callback', () => {
    const fn = () => { throw new Error('boom') }
    expect(() =>
      emitTrace(fn, {
        type: 'agent',
        runId: 'r1',
        agent: 'a',
        turns: 1,
        tokens: { input_tokens: 0, output_tokens: 0 },
        toolCalls: 0,
        startMs: 0,
        endMs: 0,
        durationMs: 0,
      }),
    ).not.toThrow()
  })
})

describe('generateRunId', () => {
  it('returns a UUID string', () => {
    const id = generateRunId()
    expect(id).toMatch(/^[0-9a-f-]{36}$/)
  })

  it('returns unique IDs', () => {
    const ids = new Set(Array.from({ length: 100 }, generateRunId))
    expect(ids.size).toBe(100)
  })
})

// ---------------------------------------------------------------------------
// AgentRunner trace events
// ---------------------------------------------------------------------------

describe('AgentRunner trace events', () => {
  it('emits llm_call trace for each LLM turn', async () => {
    const traces: TraceEvent[] = []
    const registry = new ToolRegistry()
    const executor = new ToolExecutor(registry)
    const adapter = mockAdapter([textResponse('Hello!')])

    const runner = new AgentRunner(adapter, registry, executor, {
      model: 'test-model',
      agentName: 'test-agent',
    })

    const runOptions: RunOptions = {
      onTrace: (e) => traces.push(e),
      runId: 'run-1',
      traceAgent: 'test-agent',
    }

    await runner.run(
      [{ role: 'user', content: [{ type: 'text', text: 'hi' }] }],
      runOptions,
    )

    const llmTraces = traces.filter(t => t.type === 'llm_call')
    expect(llmTraces).toHaveLength(1)

    const llm = llmTraces[0]!
    expect(llm.type).toBe('llm_call')
    expect(llm.runId).toBe('run-1')
    expect(llm.agent).toBe('test-agent')
    expect(llm.model).toBe('test-model')
    expect(llm.turn).toBe(1)
    expect(llm.tokens).toEqual({ input_tokens: 10, output_tokens: 20 })
    expect(llm.durationMs).toBeGreaterThanOrEqual(0)
    expect(llm.startMs).toBeLessThanOrEqual(llm.endMs)
  })

  it('emits tool_call trace with correct fields', async () => {
    const traces: TraceEvent[] = []
    const registry = new ToolRegistry()
    registry.register(
      defineTool({
        name: 'echo',
        description: 'echoes',
        inputSchema: z.object({ msg: z.string() }),
        execute: async ({ msg }) => ({ data: msg }),
      }),
    )
    const executor = new ToolExecutor(registry)
    const adapter = mockAdapter([
      toolUseResponse('echo', { msg: 'hello' }),
      textResponse('Done'),
    ])

    const runner = new AgentRunner(adapter, registry, executor, {
      model: 'test-model',
      agentName: 'tooler',
    })

    await runner.run(
      [{ role: 'user', content: [{ type: 'text', text: 'test' }] }],
      { onTrace: (e) => traces.push(e), runId: 'run-2', traceAgent: 'tooler' },
    )

    const toolTraces = traces.filter(t => t.type === 'tool_call')
    expect(toolTraces).toHaveLength(1)

    const tool = toolTraces[0]!
    expect(tool.type).toBe('tool_call')
    expect(tool.runId).toBe('run-2')
    expect(tool.agent).toBe('tooler')
    expect(tool.tool).toBe('echo')
    expect(tool.isError).toBe(false)
    expect(tool.durationMs).toBeGreaterThanOrEqual(0)
  })

  it('tool_call trace has isError: true on tool failure', async () => {
    const traces: TraceEvent[] = []
    const registry = new ToolRegistry()
    registry.register(
      defineTool({
        name: 'boom',
        description: 'fails',
        inputSchema: z.object({}),
        execute: async () => { throw new Error('fail') },
      }),
    )
    const executor = new ToolExecutor(registry)
    const adapter = mockAdapter([
      toolUseResponse('boom', {}),
      textResponse('Handled'),
    ])

    const runner = new AgentRunner(adapter, registry, executor, {
      model: 'test-model',
      agentName: 'err-agent',
    })

    await runner.run(
      [{ role: 'user', content: [{ type: 'text', text: 'test' }] }],
      { onTrace: (e) => traces.push(e), runId: 'run-3', traceAgent: 'err-agent' },
    )

    const toolTraces = traces.filter(t => t.type === 'tool_call')
    expect(toolTraces).toHaveLength(1)
    expect(toolTraces[0]!.isError).toBe(true)
  })

  it('does not call Date.now for LLM timing when onTrace is absent', async () => {
    // This test just verifies no errors occur when onTrace is not provided
    const registry = new ToolRegistry()
    const executor = new ToolExecutor(registry)
    const adapter = mockAdapter([textResponse('hi')])

    const runner = new AgentRunner(adapter, registry, executor, {
      model: 'test-model',
    })

    const result = await runner.run(
      [{ role: 'user', content: [{ type: 'text', text: 'test' }] }],
      {},
    )

    expect(result.output).toBe('hi')
  })
})

// ---------------------------------------------------------------------------
// Agent-level trace events
// ---------------------------------------------------------------------------

describe('Agent trace events', () => {
  it('emits agent trace with turns, tokens, and toolCalls', async () => {
    const traces: TraceEvent[] = []
    const config: AgentConfig = {
      name: 'my-agent',
      model: 'mock-model',
      systemPrompt: 'You are a test.',
    }

    const agent = buildMockAgent(config, [textResponse('Hello world')])

    const runOptions: Partial<RunOptions> = {
      onTrace: (e) => traces.push(e),
      runId: 'run-agent-1',
      traceAgent: 'my-agent',
    }

    const result = await agent.run('Say hello', runOptions)
    expect(result.success).toBe(true)

    const agentTraces = traces.filter(t => t.type === 'agent')
    expect(agentTraces).toHaveLength(1)

    const at = agentTraces[0]!
    expect(at.type).toBe('agent')
    expect(at.runId).toBe('run-agent-1')
    expect(at.agent).toBe('my-agent')
    expect(at.turns).toBe(1) // one assistant message
    expect(at.tokens).toEqual({ input_tokens: 10, output_tokens: 20 })
    expect(at.toolCalls).toBe(0)
    expect(at.durationMs).toBeGreaterThanOrEqual(0)
  })

  it('all traces share the same runId', async () => {
    const traces: TraceEvent[] = []
    const registry = new ToolRegistry()
    registry.register(
      defineTool({
        name: 'greet',
        description: 'greets',
        inputSchema: z.object({ name: z.string() }),
        execute: async ({ name }) => ({ data: `Hi ${name}` }),
      }),
    )
    const executor = new ToolExecutor(registry)
    const config: AgentConfig = {
      name: 'multi-trace-agent',
      model: 'mock-model',
      tools: ['greet'],
    }

    const agent = buildMockAgent(
      config,
      [
        toolUseResponse('greet', { name: 'world' }),
        textResponse('Done'),
      ],
      registry,
      executor,
    )

    const runId = 'shared-run-id'
    await agent.run('test', {
      onTrace: (e) => traces.push(e),
      runId,
      traceAgent: 'multi-trace-agent',
    })

    // Should have: 2 llm_call, 1 tool_call, 1 agent
    expect(traces.length).toBeGreaterThanOrEqual(4)

    for (const trace of traces) {
      expect(trace.runId).toBe(runId)
    }
  })

  it('onTrace error does not break agent execution', async () => {
    const config: AgentConfig = {
      name: 'resilient-agent',
      model: 'mock-model',
    }

    const agent = buildMockAgent(config, [textResponse('OK')])

    const result = await agent.run('test', {
      onTrace: () => { throw new Error('callback exploded') },
      runId: 'run-err',
      traceAgent: 'resilient-agent',
    })

    // The run should still succeed despite the broken callback
    expect(result.success).toBe(true)
    expect(result.output).toBe('OK')
  })

  it('per-turn token usage in llm_call traces', async () => {
    const traces: TraceEvent[] = []
    const registry = new ToolRegistry()
    registry.register(
      defineTool({
        name: 'noop',
        description: 'noop',
        inputSchema: z.object({}),
        execute: async () => ({ data: 'ok' }),
      }),
    )
    const executor = new ToolExecutor(registry)

    // Two LLM calls: first triggers a tool, second is the final response
    const resp1: LLMResponse = {
      id: 'r1',
      content: [{ type: 'tool_use', id: 'tu1', name: 'noop', input: {} }],
      model: 'mock-model',
      stop_reason: 'tool_use',
      usage: { input_tokens: 100, output_tokens: 50 },
    }
    const resp2: LLMResponse = {
      id: 'r2',
      content: [{ type: 'text', text: 'Final answer' }],
      model: 'mock-model',
      stop_reason: 'end_turn',
      usage: { input_tokens: 200, output_tokens: 100 },
    }

    const adapter = mockAdapter([resp1, resp2])
    const runner = new AgentRunner(adapter, registry, executor, {
      model: 'mock-model',
      agentName: 'token-agent',
    })

    await runner.run(
      [{ role: 'user', content: [{ type: 'text', text: 'go' }] }],
      { onTrace: (e) => traces.push(e), runId: 'run-tok', traceAgent: 'token-agent' },
    )

    const llmTraces = traces.filter(t => t.type === 'llm_call')
    expect(llmTraces).toHaveLength(2)

    // Each trace carries its own turn's token usage, not the aggregate
    expect(llmTraces[0]!.tokens).toEqual({ input_tokens: 100, output_tokens: 50 })
    expect(llmTraces[1]!.tokens).toEqual({ input_tokens: 200, output_tokens: 100 })

    // Turn numbers should be sequential
    expect(llmTraces[0]!.turn).toBe(1)
    expect(llmTraces[1]!.turn).toBe(2)
  })
})