feat: add onTrace observability callback (#18)

Add a lightweight onTrace callback to OrchestratorConfig that emits structured span events for LLM calls, tool executions, task lifecycle, and agent lifecycle. Zero overhead when not subscribed, zero new dependencies.
2026-04-03 15:07:09 +08:00 · 2026-04-03 15:07:09 +08:00 · f4d3bb4e8d
parent 17546fd93e
commit f4d3bb4e8d
8 changed files with 678 additions and 18 deletions
--- a/src/agent/agent.ts
+++ b/src/agent/agent.ts
@ -32,6 +32,7 @@ import type {
  TokenUsage,
  ToolUseContext,
 } from '../types.js'
+import { emitTrace } from '../utils/trace.js'
 import type { ToolDefinition as FrameworkToolDefinition, ToolRegistry } from '../tool/framework.js'
 import type { ToolExecutor } from '../tool/executor.js'
 import { createAdapter } from '../llm/adapter.js'
@ -158,12 +159,12 @@ export class Agent {
   *
   * Use this for one-shot queries where past context is irrelevant.
   */
-  async run(prompt: string): Promise<AgentRunResult> {
+  async run(prompt: string, runOptions?: Partial<RunOptions>): Promise<AgentRunResult> {
    const messages: LLMMessage[] = [
      { role: 'user', content: [{ type: 'text', text: prompt }] },
    ]

-    return this.executeRun(messages)
+    return this.executeRun(messages, runOptions)
  }

  /**
@ -266,15 +267,23 @@ export class Agent {
   * Shared execution path used by both `run` and `prompt`.
   * Handles state transitions and error wrapping.
   */
-  private async executeRun(messages: LLMMessage[]): Promise<AgentRunResult> {
+  private async executeRun(
+    messages: LLMMessage[],
+    callerOptions?: Partial<RunOptions>,
+  ): Promise<AgentRunResult> {
    this.transitionTo('running')

+    const agentStartMs = callerOptions?.onTrace ? Date.now() : 0
+
    try {
      const runner = await this.getRunner()
+      const internalOnMessage = (msg: LLMMessage) => {
+        this.state.messages.push(msg)
+        callerOptions?.onMessage?.(msg)
+      }
      const runOptions: RunOptions = {
-        onMessage: msg => {
-          this.state.messages.push(msg)
-        },
+        ...callerOptions,
+        onMessage: internalOnMessage,
      }

      const result = await runner.run(messages, runOptions)
@ -282,21 +291,25 @@ export class Agent {

      // --- Structured output validation ---
      if (this.config.outputSchema) {
-        return this.validateStructuredOutput(
+        const validated = await this.validateStructuredOutput(
          messages,
          result,
          runner,
          runOptions,
        )
+        this.emitAgentTrace(callerOptions, agentStartMs, validated)
+        return validated
      }

      this.transitionTo('completed')
-      return this.toAgentRunResult(result, true)
+      const agentResult = this.toAgentRunResult(result, true)
+      this.emitAgentTrace(callerOptions, agentStartMs, agentResult)
+      return agentResult
    } catch (err) {
      const error = err instanceof Error ? err : new Error(String(err))
      this.transitionToError(error)

-      return {
+      const errorResult: AgentRunResult = {
        success: false,
        output: error.message,
        messages: [],
@ -304,9 +317,33 @@ export class Agent {
        toolCalls: [],
        structured: undefined,
      }
+      this.emitAgentTrace(callerOptions, agentStartMs, errorResult)
+      return errorResult
    }
  }

+  /** Emit an `agent` trace event if `onTrace` is provided. */
+  private emitAgentTrace(
+    options: Partial<RunOptions> | undefined,
+    startMs: number,
+    result: AgentRunResult,
+  ): void {
+    if (!options?.onTrace) return
+    const endMs = Date.now()
+    emitTrace(options.onTrace, {
+      type: 'agent',
+      runId: options.runId ?? '',
+      taskId: options.taskId,
+      agent: options.traceAgent ?? this.name,
+      turns: result.messages.filter(m => m.role === 'assistant').length,
+      tokens: result.tokenUsage,
+      toolCalls: result.toolCalls.length,
+      startMs,
+      endMs,
+      durationMs: endMs - startMs,
+    })
+  }
+
  /**
   * Validate agent output against the configured `outputSchema`.
   * On first validation failure, retry once with error feedback.
--- a/src/agent/pool.ts
+++ b/src/agent/pool.ts
@ -21,6 +21,7 @@
 */

 import type { AgentRunResult } from '../types.js'
+import type { RunOptions } from './runner.js'
 import type { Agent } from './agent.js'
 import { Semaphore } from '../utils/semaphore.js'

@ -123,12 +124,16 @@ export class AgentPool {
   *
   * @throws {Error} If the agent name is not found.
   */
-  async run(agentName: string, prompt: string): Promise<AgentRunResult> {
+  async run(
+    agentName: string,
+    prompt: string,
+    runOptions?: Partial<RunOptions>,
+  ): Promise<AgentRunResult> {
    const agent = this.requireAgent(agentName)

    await this.semaphore.acquire()
    try {
-      return await agent.run(prompt)
+      return await agent.run(prompt, runOptions)
    } finally {
      this.semaphore.release()
    }
--- a/src/agent/runner.ts
+++ b/src/agent/runner.ts
@ -25,7 +25,9 @@ import type {
  ToolUseContext,
  LLMAdapter,
  LLMChatOptions,
+  TraceEvent,
 } from '../types.js'
+import { emitTrace } from '../utils/trace.js'
 import type { ToolRegistry } from '../tool/framework.js'
 import type { ToolExecutor } from '../tool/executor.js'

@ -76,6 +78,14 @@ export interface RunOptions {
  readonly onToolResult?: (name: string, result: ToolResult) => void
  /** Fired after each complete {@link LLMMessage} is appended. */
  readonly onMessage?: (message: LLMMessage) => void
+  /** Trace callback for observability spans. */
+  readonly onTrace?: (event: TraceEvent) => void
+  /** Run ID for trace correlation. */
+  readonly runId?: string
+  /** Task ID for trace correlation. */
+  readonly taskId?: string
+  /** Agent name for trace correlation (overrides RunnerOptions.agentName). */
+  readonly traceAgent?: string
 }

 /** The aggregated result returned when a full run completes. */
@ -254,7 +264,24 @@ export class AgentRunner {
        // ------------------------------------------------------------------
        // Step 1: Call the LLM and collect the full response for this turn.
        // ------------------------------------------------------------------
+        const llmStartMs = options.onTrace ? Date.now() : 0
        const response = await this.adapter.chat(conversationMessages, baseChatOptions)
+        if (options.onTrace) {
+          const llmEndMs = Date.now()
+          const agentName = options.traceAgent ?? this.options.agentName ?? 'unknown'
+          emitTrace(options.onTrace, {
+            type: 'llm_call',
+            runId: options.runId ?? '',
+            taskId: options.taskId,
+            agent: agentName,
+            model: this.options.model,
+            turn: turns,
+            tokens: response.usage,
+            startMs: llmStartMs,
+            endMs: llmEndMs,
+            durationMs: llmEndMs - llmStartMs,
+          })
+        }

        totalUsage = addTokenUsage(totalUsage, response.usage)

@ -319,10 +346,26 @@ export class AgentRunner {
            result = { data: message, isError: true }
          }

-          const duration = Date.now() - startTime
+          const endTime = Date.now()
+          const duration = endTime - startTime

          options.onToolResult?.(block.name, result)

+          if (options.onTrace) {
+            const agentName = options.traceAgent ?? this.options.agentName ?? 'unknown'
+            emitTrace(options.onTrace, {
+              type: 'tool_call',
+              runId: options.runId ?? '',
+              taskId: options.taskId,
+              agent: agentName,
+              tool: block.name,
+              isError: result.isError ?? false,
+              startMs: startTime,
+              endMs: endTime,
+              durationMs: duration,
+            })
+          }
+
          const record: ToolCallRecord = {
            toolName: block.name,
            input: block.input,
--- a/src/index.ts
+++ b/src/index.ts
@ -161,7 +161,16 @@ export type {
  OrchestratorConfig,
  OrchestratorEvent,

+  // Trace
+  TraceEvent,
+  LLMCallTrace,
+  ToolCallTrace,
+  TaskTrace,
+  AgentTrace,
+
  // Memory
  MemoryEntry,
  MemoryStore,
 } from './types.js'
+
+export { emitTrace, generateRunId } from './utils/trace.js'
--- a/src/orchestrator/orchestrator.ts
+++ b/src/orchestrator/orchestrator.ts
@ -52,8 +52,10 @@ import type {
  TeamRunResult,
  TokenUsage,
 } from '../types.js'
+import type { RunOptions } from '../agent/runner.js'
 import { Agent } from '../agent/agent.js'
 import { AgentPool } from '../agent/pool.js'
+import { emitTrace, generateRunId } from '../utils/trace.js'
 import { ToolRegistry } from '../tool/framework.js'
 import { ToolExecutor } from '../tool/executor.js'
 import { registerBuiltInTools } from '../tool/built-in/index.js'
@ -260,6 +262,8 @@ interface RunContext {
  readonly scheduler: Scheduler
  readonly agentResults: Map<string, AgentRunResult>
  readonly config: OrchestratorConfig
+  /** Trace run ID, present when `onTrace` is configured. */
+  readonly runId?: string
 }

 /**
@ -338,10 +342,19 @@ async function executeQueue(
      // Build the prompt: inject shared memory context + task description
      const prompt = await buildTaskPrompt(task, team)

+      // Build trace context for this task's agent run
+      const traceOptions: Partial<RunOptions> | undefined = config.onTrace
+        ? { onTrace: config.onTrace, runId: ctx.runId ?? '', taskId: task.id, traceAgent: assignee }
+        : undefined
+
+      const taskStartMs = config.onTrace ? Date.now() : 0
+      let retryCount = 0
+
      const result = await executeWithRetry(
-        () => pool.run(assignee, prompt),
+        () => pool.run(assignee, prompt, traceOptions),
        task,
        (retryData) => {
+          retryCount = retryData.attempt
          config.onProgress?.({
            type: 'task_retry',
            task: task.id,
@ -351,6 +364,23 @@ async function executeQueue(
        },
      )

+      // Emit task trace
+      if (config.onTrace) {
+        const taskEndMs = Date.now()
+        emitTrace(config.onTrace, {
+          type: 'task',
+          runId: ctx.runId ?? '',
+          taskId: task.id,
+          taskTitle: task.title,
+          agent: assignee,
+          success: result.success,
+          retries: retryCount,
+          startMs: taskStartMs,
+          endMs: taskEndMs,
+          durationMs: taskEndMs - taskStartMs,
+        })
+      }
+
      ctx.agentResults.set(`${assignee}:${task.id}`, result)

      if (result.success) {
@ -441,8 +471,8 @@ async function buildTaskPrompt(task: Task, team: Team): Promise<string> {
 */
 export class OpenMultiAgent {
  private readonly config: Required<
-    Omit<OrchestratorConfig, 'onProgress' | 'defaultBaseURL' | 'defaultApiKey'>
-  > & Pick<OrchestratorConfig, 'onProgress' | 'defaultBaseURL' | 'defaultApiKey'>
+    Omit<OrchestratorConfig, 'onProgress' | 'onTrace' | 'defaultBaseURL' | 'defaultApiKey'>
+  > & Pick<OrchestratorConfig, 'onProgress' | 'onTrace' | 'defaultBaseURL' | 'defaultApiKey'>

  private readonly teams: Map<string, Team> = new Map()
  private completedTaskCount = 0
@ -463,6 +493,7 @@ export class OpenMultiAgent {
      defaultBaseURL: config.defaultBaseURL,
      defaultApiKey: config.defaultApiKey,
      onProgress: config.onProgress,
+      onTrace: config.onTrace,
    }
  }

@ -520,7 +551,11 @@ export class OpenMultiAgent {
      data: { prompt },
    })

-    const result = await agent.run(prompt)
+    const traceOptions: Partial<RunOptions> | undefined = this.config.onTrace
+      ? { onTrace: this.config.onTrace, runId: generateRunId(), traceAgent: config.name }
+      : undefined
+
+    const result = await agent.run(prompt, traceOptions)

    this.config.onProgress?.({
      type: 'agent_complete',
@ -578,6 +613,7 @@ export class OpenMultiAgent {

    const decompositionPrompt = this.buildDecompositionPrompt(goal, agentConfigs)
    const coordinatorAgent = buildAgent(coordinatorConfig)
+    const runId = this.config.onTrace ? generateRunId() : undefined

    this.config.onProgress?.({
      type: 'agent_start',
@ -585,7 +621,10 @@ export class OpenMultiAgent {
      data: { phase: 'decomposition', goal },
    })

-    const decompositionResult = await coordinatorAgent.run(decompositionPrompt)
+    const decompTraceOptions: Partial<RunOptions> | undefined = this.config.onTrace
+      ? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator' }
+      : undefined
+    const decompositionResult = await coordinatorAgent.run(decompositionPrompt, decompTraceOptions)
    const agentResults = new Map<string, AgentRunResult>()
    agentResults.set('coordinator:decompose', decompositionResult)

@ -629,6 +668,7 @@ export class OpenMultiAgent {
      scheduler,
      agentResults,
      config: this.config,
+      runId,
    }

    await executeQueue(queue, ctx)
@ -637,7 +677,10 @@ export class OpenMultiAgent {
    // Step 5: Coordinator synthesises final result
    // ------------------------------------------------------------------
    const synthesisPrompt = await this.buildSynthesisPrompt(goal, queue.list(), team)
-    const synthesisResult = await coordinatorAgent.run(synthesisPrompt)
+    const synthTraceOptions: Partial<RunOptions> | undefined = this.config.onTrace
+      ? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator' }
+      : undefined
+    const synthesisResult = await coordinatorAgent.run(synthesisPrompt, synthTraceOptions)
    agentResults.set('coordinator', synthesisResult)

    this.config.onProgress?.({
@ -707,6 +750,7 @@ export class OpenMultiAgent {
      scheduler,
      agentResults,
      config: this.config,
+      runId: this.config.onTrace ? generateRunId() : undefined,
    }

    await executeQueue(queue, ctx)
--- a/src/types.ts
+++ b/src/types.ts
@ -316,8 +316,69 @@ export interface OrchestratorConfig {
  readonly defaultBaseURL?: string
  readonly defaultApiKey?: string
  onProgress?: (event: OrchestratorEvent) => void
+  onTrace?: (event: TraceEvent) => void
 }

+// ---------------------------------------------------------------------------
+// Trace events — lightweight observability spans
+// ---------------------------------------------------------------------------
+
+/** Shared fields present on every trace event. */
+interface TraceEventBase {
+  /** Unique identifier for the entire run (runTeam / runTasks / runAgent call). */
+  readonly runId: string
+  readonly type: string
+  /** Unix epoch ms when the span started. */
+  readonly startMs: number
+  /** Unix epoch ms when the span ended. */
+  readonly endMs: number
+  /** Wall-clock duration in milliseconds (`endMs - startMs`). */
+  readonly durationMs: number
+  /** Agent name associated with this span. */
+  readonly agent?: string
+  /** Task ID associated with this span. */
+  readonly taskId?: string
+}
+
+/** Emitted for each LLM API call (one per agent turn). */
+export interface LLMCallTrace extends TraceEventBase {
+  readonly type: 'llm_call'
+  readonly agent: string
+  readonly model?: string
+  readonly turn: number
+  readonly tokens: TokenUsage
+}
+
+/** Emitted for each tool execution. */
+export interface ToolCallTrace extends TraceEventBase {
+  readonly type: 'tool_call'
+  readonly agent: string
+  readonly tool: string
+  readonly isError: boolean
+}
+
+/** Emitted when a task completes (wraps the full retry sequence). */
+export interface TaskTrace extends TraceEventBase {
+  readonly type: 'task'
+  readonly taskId: string
+  readonly taskTitle: string
+  readonly agent: string
+  readonly success: boolean
+  readonly retries: number
+}
+
+/** Emitted when an agent run completes (wraps the full conversation loop). */
+export interface AgentTrace extends TraceEventBase {
+  readonly type: 'agent'
+  readonly agent: string
+  readonly turns: number
+  readonly tokens: TokenUsage
+  readonly toolCalls: number
+}
+
+/** Discriminated union of all trace event types. */
+export type TraceEvent = LLMCallTrace | ToolCallTrace | TaskTrace | AgentTrace
+
 // ---------------------------------------------------------------------------
 // Memory
 // ---------------------------------------------------------------------------
--- a/src/utils/trace.ts
+++ b/src/utils/trace.ts
@ -0,0 +1,27 @@
+/**
+ * @fileoverview Trace emission utilities for the observability layer.
+ */
+
+import { randomUUID } from 'node:crypto'
+import type { TraceEvent } from '../types.js'
+
+/**
+ * Safely emit a trace event. Swallows callback errors so a broken
+ * subscriber never crashes agent execution.
+ */
+export function emitTrace(
+  fn: ((event: TraceEvent) => void) | undefined,
+  event: TraceEvent,
+): void {
+  if (!fn) return
+  try {
+    fn(event)
+  } catch {
+    // Intentionally swallowed — observability must never break execution.
+  }
+}
+
+/** Generate a unique run ID for trace correlation. */
+export function generateRunId(): string {
+  return randomUUID()
+}
--- a/tests/trace.test.ts
+++ b/tests/trace.test.ts
@ -0,0 +1,434 @@
+import { describe, it, expect, vi } from 'vitest'
+import { z } from 'zod'
+import { Agent } from '../src/agent/agent.js'
+import { AgentRunner, type RunOptions } from '../src/agent/runner.js'
+import { ToolRegistry, defineTool } from '../src/tool/framework.js'
+import { ToolExecutor } from '../src/tool/executor.js'
+import { executeWithRetry } from '../src/orchestrator/orchestrator.js'
+import { emitTrace, generateRunId } from '../src/utils/trace.js'
+import { createTask } from '../src/task/task.js'
+import type {
+  AgentConfig,
+  AgentRunResult,
+  LLMAdapter,
+  LLMResponse,
+  TraceEvent,
+} from '../src/types.js'
+
+// ---------------------------------------------------------------------------
+// Mock adapters
+// ---------------------------------------------------------------------------
+
+function mockAdapter(responses: LLMResponse[]): LLMAdapter {
+  let callIndex = 0
+  return {
+    name: 'mock',
+    async chat() {
+      return responses[callIndex++]!
+    },
+    async *stream() {
+      /* unused */
+    },
+  }
+}
+
+function textResponse(text: string): LLMResponse {
+  return {
+    id: `resp-${Math.random().toString(36).slice(2)}`,
+    content: [{ type: 'text' as const, text }],
+    model: 'mock-model',
+    stop_reason: 'end_turn',
+    usage: { input_tokens: 10, output_tokens: 20 },
+  }
+}
+
+function toolUseResponse(toolName: string, input: Record<string, unknown>): LLMResponse {
+  return {
+    id: `resp-${Math.random().toString(36).slice(2)}`,
+    content: [
+      {
+        type: 'tool_use' as const,
+        id: `tu-${Math.random().toString(36).slice(2)}`,
+        name: toolName,
+        input,
+      },
+    ],
+    model: 'mock-model',
+    stop_reason: 'tool_use',
+    usage: { input_tokens: 15, output_tokens: 25 },
+  }
+}
+
+function buildMockAgent(
+  config: AgentConfig,
+  responses: LLMResponse[],
+  registry?: ToolRegistry,
+  executor?: ToolExecutor,
+): Agent {
+  const reg = registry ?? new ToolRegistry()
+  const exec = executor ?? new ToolExecutor(reg)
+  const adapter = mockAdapter(responses)
+  const agent = new Agent(config, reg, exec)
+
+  const runner = new AgentRunner(adapter, reg, exec, {
+    model: config.model,
+    systemPrompt: config.systemPrompt,
+    maxTurns: config.maxTurns,
+    maxTokens: config.maxTokens,
+    temperature: config.temperature,
+    agentName: config.name,
+  })
+  ;(agent as any).runner = runner
+
+  return agent
+}
+
+// ---------------------------------------------------------------------------
+// emitTrace helper
+// ---------------------------------------------------------------------------
+
+describe('emitTrace', () => {
+  it('does nothing when fn is undefined', () => {
+    // Should not throw
+    emitTrace(undefined, {
+      type: 'agent',
+      runId: 'r1',
+      agent: 'a',
+      turns: 1,
+      tokens: { input_tokens: 0, output_tokens: 0 },
+      toolCalls: 0,
+      startMs: 0,
+      endMs: 0,
+      durationMs: 0,
+    })
+  })
+
+  it('calls fn with the event', () => {
+    const fn = vi.fn()
+    const event: TraceEvent = {
+      type: 'agent',
+      runId: 'r1',
+      agent: 'a',
+      turns: 1,
+      tokens: { input_tokens: 0, output_tokens: 0 },
+      toolCalls: 0,
+      startMs: 0,
+      endMs: 0,
+      durationMs: 0,
+    }
+    emitTrace(fn, event)
+    expect(fn).toHaveBeenCalledWith(event)
+  })
+
+  it('swallows errors thrown by callback', () => {
+    const fn = () => { throw new Error('boom') }
+    expect(() =>
+      emitTrace(fn, {
+        type: 'agent',
+        runId: 'r1',
+        agent: 'a',
+        turns: 1,
+        tokens: { input_tokens: 0, output_tokens: 0 },
+        toolCalls: 0,
+        startMs: 0,
+        endMs: 0,
+        durationMs: 0,
+      }),
+    ).not.toThrow()
+  })
+})
+
+describe('generateRunId', () => {
+  it('returns a UUID string', () => {
+    const id = generateRunId()
+    expect(id).toMatch(/^[0-9a-f-]{36}$/)
+  })
+
+  it('returns unique IDs', () => {
+    const ids = new Set(Array.from({ length: 100 }, generateRunId))
+    expect(ids.size).toBe(100)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// AgentRunner trace events
+// ---------------------------------------------------------------------------
+
+describe('AgentRunner trace events', () => {
+  it('emits llm_call trace for each LLM turn', async () => {
+    const traces: TraceEvent[] = []
+    const registry = new ToolRegistry()
+    const executor = new ToolExecutor(registry)
+    const adapter = mockAdapter([textResponse('Hello!')])
+
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'test-model',
+      agentName: 'test-agent',
+    })
+
+    const runOptions: RunOptions = {
+      onTrace: (e) => traces.push(e),
+      runId: 'run-1',
+      traceAgent: 'test-agent',
+    }
+
+    await runner.run(
+      [{ role: 'user', content: [{ type: 'text', text: 'hi' }] }],
+      runOptions,
+    )
+
+    const llmTraces = traces.filter(t => t.type === 'llm_call')
+    expect(llmTraces).toHaveLength(1)
+
+    const llm = llmTraces[0]!
+    expect(llm.type).toBe('llm_call')
+    expect(llm.runId).toBe('run-1')
+    expect(llm.agent).toBe('test-agent')
+    expect(llm.model).toBe('test-model')
+    expect(llm.turn).toBe(1)
+    expect(llm.tokens).toEqual({ input_tokens: 10, output_tokens: 20 })
+    expect(llm.durationMs).toBeGreaterThanOrEqual(0)
+    expect(llm.startMs).toBeLessThanOrEqual(llm.endMs)
+  })
+
+  it('emits tool_call trace with correct fields', async () => {
+    const traces: TraceEvent[] = []
+    const registry = new ToolRegistry()
+    registry.register(
+      defineTool({
+        name: 'echo',
+        description: 'echoes',
+        inputSchema: z.object({ msg: z.string() }),
+        execute: async ({ msg }) => ({ data: msg }),
+      }),
+    )
+    const executor = new ToolExecutor(registry)
+    const adapter = mockAdapter([
+      toolUseResponse('echo', { msg: 'hello' }),
+      textResponse('Done'),
+    ])
+
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'test-model',
+      agentName: 'tooler',
+    })
+
+    await runner.run(
+      [{ role: 'user', content: [{ type: 'text', text: 'test' }] }],
+      { onTrace: (e) => traces.push(e), runId: 'run-2', traceAgent: 'tooler' },
+    )
+
+    const toolTraces = traces.filter(t => t.type === 'tool_call')
+    expect(toolTraces).toHaveLength(1)
+
+    const tool = toolTraces[0]!
+    expect(tool.type).toBe('tool_call')
+    expect(tool.runId).toBe('run-2')
+    expect(tool.agent).toBe('tooler')
+    expect(tool.tool).toBe('echo')
+    expect(tool.isError).toBe(false)
+    expect(tool.durationMs).toBeGreaterThanOrEqual(0)
+  })
+
+  it('tool_call trace has isError: true on tool failure', async () => {
+    const traces: TraceEvent[] = []
+    const registry = new ToolRegistry()
+    registry.register(
+      defineTool({
+        name: 'boom',
+        description: 'fails',
+        inputSchema: z.object({}),
+        execute: async () => { throw new Error('fail') },
+      }),
+    )
+    const executor = new ToolExecutor(registry)
+    const adapter = mockAdapter([
+      toolUseResponse('boom', {}),
+      textResponse('Handled'),
+    ])
+
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'test-model',
+      agentName: 'err-agent',
+    })
+
+    await runner.run(
+      [{ role: 'user', content: [{ type: 'text', text: 'test' }] }],
+      { onTrace: (e) => traces.push(e), runId: 'run-3', traceAgent: 'err-agent' },
+    )
+
+    const toolTraces = traces.filter(t => t.type === 'tool_call')
+    expect(toolTraces).toHaveLength(1)
+    expect(toolTraces[0]!.isError).toBe(true)
+  })
+
+  it('does not call Date.now for LLM timing when onTrace is absent', async () => {
+    // This test just verifies no errors occur when onTrace is not provided
+    const registry = new ToolRegistry()
+    const executor = new ToolExecutor(registry)
+    const adapter = mockAdapter([textResponse('hi')])
+
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'test-model',
+    })
+
+    const result = await runner.run(
+      [{ role: 'user', content: [{ type: 'text', text: 'test' }] }],
+      {},
+    )
+
+    expect(result.output).toBe('hi')
+  })
+})
+
+// ---------------------------------------------------------------------------
+// Agent-level trace events
+// ---------------------------------------------------------------------------
+
+describe('Agent trace events', () => {
+  it('emits agent trace with turns, tokens, and toolCalls', async () => {
+    const traces: TraceEvent[] = []
+    const config: AgentConfig = {
+      name: 'my-agent',
+      model: 'mock-model',
+      systemPrompt: 'You are a test.',
+    }
+
+    const agent = buildMockAgent(config, [textResponse('Hello world')])
+
+    const runOptions: Partial<RunOptions> = {
+      onTrace: (e) => traces.push(e),
+      runId: 'run-agent-1',
+      traceAgent: 'my-agent',
+    }
+
+    const result = await agent.run('Say hello', runOptions)
+    expect(result.success).toBe(true)
+
+    const agentTraces = traces.filter(t => t.type === 'agent')
+    expect(agentTraces).toHaveLength(1)
+
+    const at = agentTraces[0]!
+    expect(at.type).toBe('agent')
+    expect(at.runId).toBe('run-agent-1')
+    expect(at.agent).toBe('my-agent')
+    expect(at.turns).toBe(1) // one assistant message
+    expect(at.tokens).toEqual({ input_tokens: 10, output_tokens: 20 })
+    expect(at.toolCalls).toBe(0)
+    expect(at.durationMs).toBeGreaterThanOrEqual(0)
+  })
+
+  it('all traces share the same runId', async () => {
+    const traces: TraceEvent[] = []
+    const registry = new ToolRegistry()
+    registry.register(
+      defineTool({
+        name: 'greet',
+        description: 'greets',
+        inputSchema: z.object({ name: z.string() }),
+        execute: async ({ name }) => ({ data: `Hi ${name}` }),
+      }),
+    )
+    const executor = new ToolExecutor(registry)
+    const config: AgentConfig = {
+      name: 'multi-trace-agent',
+      model: 'mock-model',
+      tools: ['greet'],
+    }
+
+    const agent = buildMockAgent(
+      config,
+      [
+        toolUseResponse('greet', { name: 'world' }),
+        textResponse('Done'),
+      ],
+      registry,
+      executor,
+    )
+
+    const runId = 'shared-run-id'
+    await agent.run('test', {
+      onTrace: (e) => traces.push(e),
+      runId,
+      traceAgent: 'multi-trace-agent',
+    })
+
+    // Should have: 2 llm_call, 1 tool_call, 1 agent
+    expect(traces.length).toBeGreaterThanOrEqual(4)
+
+    for (const trace of traces) {
+      expect(trace.runId).toBe(runId)
+    }
+  })
+
+  it('onTrace error does not break agent execution', async () => {
+    const config: AgentConfig = {
+      name: 'resilient-agent',
+      model: 'mock-model',
+    }
+
+    const agent = buildMockAgent(config, [textResponse('OK')])
+
+    const result = await agent.run('test', {
+      onTrace: () => { throw new Error('callback exploded') },
+      runId: 'run-err',
+      traceAgent: 'resilient-agent',
+    })
+
+    // The run should still succeed despite the broken callback
+    expect(result.success).toBe(true)
+    expect(result.output).toBe('OK')
+  })
+
+  it('per-turn token usage in llm_call traces', async () => {
+    const traces: TraceEvent[] = []
+    const registry = new ToolRegistry()
+    registry.register(
+      defineTool({
+        name: 'noop',
+        description: 'noop',
+        inputSchema: z.object({}),
+        execute: async () => ({ data: 'ok' }),
+      }),
+    )
+    const executor = new ToolExecutor(registry)
+
+    // Two LLM calls: first triggers a tool, second is the final response
+    const resp1: LLMResponse = {
+      id: 'r1',
+      content: [{ type: 'tool_use', id: 'tu1', name: 'noop', input: {} }],
+      model: 'mock-model',
+      stop_reason: 'tool_use',
+      usage: { input_tokens: 100, output_tokens: 50 },
+    }
+    const resp2: LLMResponse = {
+      id: 'r2',
+      content: [{ type: 'text', text: 'Final answer' }],
+      model: 'mock-model',
+      stop_reason: 'end_turn',
+      usage: { input_tokens: 200, output_tokens: 100 },
+    }
+
+    const adapter = mockAdapter([resp1, resp2])
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'mock-model',
+      agentName: 'token-agent',
+    })
+
+    await runner.run(
+      [{ role: 'user', content: [{ type: 'text', text: 'go' }] }],
+      { onTrace: (e) => traces.push(e), runId: 'run-tok', traceAgent: 'token-agent' },
+    )
+
+    const llmTraces = traces.filter(t => t.type === 'llm_call')
+    expect(llmTraces).toHaveLength(2)
+
+    // Each trace carries its own turn's token usage, not the aggregate
+    expect(llmTraces[0]!.tokens).toEqual({ input_tokens: 100, output_tokens: 50 })
+    expect(llmTraces[1]!.tokens).toEqual({ input_tokens: 200, output_tokens: 100 })
+
+    // Turn numbers should be sequential
+    expect(llmTraces[0]!.turn).toBe(1)
+    expect(llmTraces[1]!.turn).toBe(2)
+  })
+})