diff --git a/src/agent/agent.ts b/src/agent/agent.ts index df6b7df..7b38df5 100644 --- a/src/agent/agent.ts +++ b/src/agent/agent.ts @@ -32,6 +32,7 @@ import type { TokenUsage, ToolUseContext, } from '../types.js' +import { emitTrace } from '../utils/trace.js' import type { ToolDefinition as FrameworkToolDefinition, ToolRegistry } from '../tool/framework.js' import type { ToolExecutor } from '../tool/executor.js' import { createAdapter } from '../llm/adapter.js' @@ -158,12 +159,12 @@ export class Agent { * * Use this for one-shot queries where past context is irrelevant. */ - async run(prompt: string): Promise { + async run(prompt: string, runOptions?: Partial): Promise { const messages: LLMMessage[] = [ { role: 'user', content: [{ type: 'text', text: prompt }] }, ] - return this.executeRun(messages) + return this.executeRun(messages, runOptions) } /** @@ -266,15 +267,23 @@ export class Agent { * Shared execution path used by both `run` and `prompt`. * Handles state transitions and error wrapping. */ - private async executeRun(messages: LLMMessage[]): Promise { + private async executeRun( + messages: LLMMessage[], + callerOptions?: Partial, + ): Promise { this.transitionTo('running') + const agentStartMs = callerOptions?.onTrace ? Date.now() : 0 + try { const runner = await this.getRunner() + const internalOnMessage = (msg: LLMMessage) => { + this.state.messages.push(msg) + callerOptions?.onMessage?.(msg) + } const runOptions: RunOptions = { - onMessage: msg => { - this.state.messages.push(msg) - }, + ...callerOptions, + onMessage: internalOnMessage, } const result = await runner.run(messages, runOptions) @@ -282,21 +291,25 @@ export class Agent { // --- Structured output validation --- if (this.config.outputSchema) { - return this.validateStructuredOutput( + const validated = await this.validateStructuredOutput( messages, result, runner, runOptions, ) + this.emitAgentTrace(callerOptions, agentStartMs, validated) + return validated } this.transitionTo('completed') - return this.toAgentRunResult(result, true) + const agentResult = this.toAgentRunResult(result, true) + this.emitAgentTrace(callerOptions, agentStartMs, agentResult) + return agentResult } catch (err) { const error = err instanceof Error ? err : new Error(String(err)) this.transitionToError(error) - return { + const errorResult: AgentRunResult = { success: false, output: error.message, messages: [], @@ -304,9 +317,33 @@ export class Agent { toolCalls: [], structured: undefined, } + this.emitAgentTrace(callerOptions, agentStartMs, errorResult) + return errorResult } } + /** Emit an `agent` trace event if `onTrace` is provided. */ + private emitAgentTrace( + options: Partial | undefined, + startMs: number, + result: AgentRunResult, + ): void { + if (!options?.onTrace) return + const endMs = Date.now() + emitTrace(options.onTrace, { + type: 'agent', + runId: options.runId ?? '', + taskId: options.taskId, + agent: options.traceAgent ?? this.name, + turns: result.messages.filter(m => m.role === 'assistant').length, + tokens: result.tokenUsage, + toolCalls: result.toolCalls.length, + startMs, + endMs, + durationMs: endMs - startMs, + }) + } + /** * Validate agent output against the configured `outputSchema`. * On first validation failure, retry once with error feedback. diff --git a/src/agent/pool.ts b/src/agent/pool.ts index 915f361..aaf1fe3 100644 --- a/src/agent/pool.ts +++ b/src/agent/pool.ts @@ -21,6 +21,7 @@ */ import type { AgentRunResult } from '../types.js' +import type { RunOptions } from './runner.js' import type { Agent } from './agent.js' import { Semaphore } from '../utils/semaphore.js' @@ -123,12 +124,16 @@ export class AgentPool { * * @throws {Error} If the agent name is not found. */ - async run(agentName: string, prompt: string): Promise { + async run( + agentName: string, + prompt: string, + runOptions?: Partial, + ): Promise { const agent = this.requireAgent(agentName) await this.semaphore.acquire() try { - return await agent.run(prompt) + return await agent.run(prompt, runOptions) } finally { this.semaphore.release() } diff --git a/src/agent/runner.ts b/src/agent/runner.ts index 13667db..f2b0600 100644 --- a/src/agent/runner.ts +++ b/src/agent/runner.ts @@ -25,7 +25,9 @@ import type { ToolUseContext, LLMAdapter, LLMChatOptions, + TraceEvent, } from '../types.js' +import { emitTrace } from '../utils/trace.js' import type { ToolRegistry } from '../tool/framework.js' import type { ToolExecutor } from '../tool/executor.js' @@ -76,6 +78,14 @@ export interface RunOptions { readonly onToolResult?: (name: string, result: ToolResult) => void /** Fired after each complete {@link LLMMessage} is appended. */ readonly onMessage?: (message: LLMMessage) => void + /** Trace callback for observability spans. */ + readonly onTrace?: (event: TraceEvent) => void + /** Run ID for trace correlation. */ + readonly runId?: string + /** Task ID for trace correlation. */ + readonly taskId?: string + /** Agent name for trace correlation (overrides RunnerOptions.agentName). */ + readonly traceAgent?: string } /** The aggregated result returned when a full run completes. */ @@ -254,7 +264,24 @@ export class AgentRunner { // ------------------------------------------------------------------ // Step 1: Call the LLM and collect the full response for this turn. // ------------------------------------------------------------------ + const llmStartMs = options.onTrace ? Date.now() : 0 const response = await this.adapter.chat(conversationMessages, baseChatOptions) + if (options.onTrace) { + const llmEndMs = Date.now() + const agentName = options.traceAgent ?? this.options.agentName ?? 'unknown' + emitTrace(options.onTrace, { + type: 'llm_call', + runId: options.runId ?? '', + taskId: options.taskId, + agent: agentName, + model: this.options.model, + turn: turns, + tokens: response.usage, + startMs: llmStartMs, + endMs: llmEndMs, + durationMs: llmEndMs - llmStartMs, + }) + } totalUsage = addTokenUsage(totalUsage, response.usage) @@ -319,10 +346,26 @@ export class AgentRunner { result = { data: message, isError: true } } - const duration = Date.now() - startTime + const endTime = Date.now() + const duration = endTime - startTime options.onToolResult?.(block.name, result) + if (options.onTrace) { + const agentName = options.traceAgent ?? this.options.agentName ?? 'unknown' + emitTrace(options.onTrace, { + type: 'tool_call', + runId: options.runId ?? '', + taskId: options.taskId, + agent: agentName, + tool: block.name, + isError: result.isError ?? false, + startMs: startTime, + endMs: endTime, + durationMs: duration, + }) + } + const record: ToolCallRecord = { toolName: block.name, input: block.input, diff --git a/src/index.ts b/src/index.ts index f624707..7518ec3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -161,7 +161,16 @@ export type { OrchestratorConfig, OrchestratorEvent, + // Trace + TraceEvent, + LLMCallTrace, + ToolCallTrace, + TaskTrace, + AgentTrace, + // Memory MemoryEntry, MemoryStore, } from './types.js' + +export { emitTrace, generateRunId } from './utils/trace.js' diff --git a/src/orchestrator/orchestrator.ts b/src/orchestrator/orchestrator.ts index 848bfde..f03dc75 100644 --- a/src/orchestrator/orchestrator.ts +++ b/src/orchestrator/orchestrator.ts @@ -52,8 +52,10 @@ import type { TeamRunResult, TokenUsage, } from '../types.js' +import type { RunOptions } from '../agent/runner.js' import { Agent } from '../agent/agent.js' import { AgentPool } from '../agent/pool.js' +import { emitTrace, generateRunId } from '../utils/trace.js' import { ToolRegistry } from '../tool/framework.js' import { ToolExecutor } from '../tool/executor.js' import { registerBuiltInTools } from '../tool/built-in/index.js' @@ -260,6 +262,8 @@ interface RunContext { readonly scheduler: Scheduler readonly agentResults: Map readonly config: OrchestratorConfig + /** Trace run ID, present when `onTrace` is configured. */ + readonly runId?: string } /** @@ -338,10 +342,19 @@ async function executeQueue( // Build the prompt: inject shared memory context + task description const prompt = await buildTaskPrompt(task, team) + // Build trace context for this task's agent run + const traceOptions: Partial | undefined = config.onTrace + ? { onTrace: config.onTrace, runId: ctx.runId ?? '', taskId: task.id, traceAgent: assignee } + : undefined + + const taskStartMs = config.onTrace ? Date.now() : 0 + let retryCount = 0 + const result = await executeWithRetry( - () => pool.run(assignee, prompt), + () => pool.run(assignee, prompt, traceOptions), task, (retryData) => { + retryCount = retryData.attempt config.onProgress?.({ type: 'task_retry', task: task.id, @@ -351,6 +364,23 @@ async function executeQueue( }, ) + // Emit task trace + if (config.onTrace) { + const taskEndMs = Date.now() + emitTrace(config.onTrace, { + type: 'task', + runId: ctx.runId ?? '', + taskId: task.id, + taskTitle: task.title, + agent: assignee, + success: result.success, + retries: retryCount, + startMs: taskStartMs, + endMs: taskEndMs, + durationMs: taskEndMs - taskStartMs, + }) + } + ctx.agentResults.set(`${assignee}:${task.id}`, result) if (result.success) { @@ -441,8 +471,8 @@ async function buildTaskPrompt(task: Task, team: Team): Promise { */ export class OpenMultiAgent { private readonly config: Required< - Omit - > & Pick + Omit + > & Pick private readonly teams: Map = new Map() private completedTaskCount = 0 @@ -463,6 +493,7 @@ export class OpenMultiAgent { defaultBaseURL: config.defaultBaseURL, defaultApiKey: config.defaultApiKey, onProgress: config.onProgress, + onTrace: config.onTrace, } } @@ -520,7 +551,11 @@ export class OpenMultiAgent { data: { prompt }, }) - const result = await agent.run(prompt) + const traceOptions: Partial | undefined = this.config.onTrace + ? { onTrace: this.config.onTrace, runId: generateRunId(), traceAgent: config.name } + : undefined + + const result = await agent.run(prompt, traceOptions) this.config.onProgress?.({ type: 'agent_complete', @@ -578,6 +613,7 @@ export class OpenMultiAgent { const decompositionPrompt = this.buildDecompositionPrompt(goal, agentConfigs) const coordinatorAgent = buildAgent(coordinatorConfig) + const runId = this.config.onTrace ? generateRunId() : undefined this.config.onProgress?.({ type: 'agent_start', @@ -585,7 +621,10 @@ export class OpenMultiAgent { data: { phase: 'decomposition', goal }, }) - const decompositionResult = await coordinatorAgent.run(decompositionPrompt) + const decompTraceOptions: Partial | undefined = this.config.onTrace + ? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator' } + : undefined + const decompositionResult = await coordinatorAgent.run(decompositionPrompt, decompTraceOptions) const agentResults = new Map() agentResults.set('coordinator:decompose', decompositionResult) @@ -629,6 +668,7 @@ export class OpenMultiAgent { scheduler, agentResults, config: this.config, + runId, } await executeQueue(queue, ctx) @@ -637,7 +677,10 @@ export class OpenMultiAgent { // Step 5: Coordinator synthesises final result // ------------------------------------------------------------------ const synthesisPrompt = await this.buildSynthesisPrompt(goal, queue.list(), team) - const synthesisResult = await coordinatorAgent.run(synthesisPrompt) + const synthTraceOptions: Partial | undefined = this.config.onTrace + ? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator' } + : undefined + const synthesisResult = await coordinatorAgent.run(synthesisPrompt, synthTraceOptions) agentResults.set('coordinator', synthesisResult) this.config.onProgress?.({ @@ -707,6 +750,7 @@ export class OpenMultiAgent { scheduler, agentResults, config: this.config, + runId: this.config.onTrace ? generateRunId() : undefined, } await executeQueue(queue, ctx) diff --git a/src/types.ts b/src/types.ts index bd2ce64..6d69270 100644 --- a/src/types.ts +++ b/src/types.ts @@ -316,8 +316,69 @@ export interface OrchestratorConfig { readonly defaultBaseURL?: string readonly defaultApiKey?: string onProgress?: (event: OrchestratorEvent) => void + onTrace?: (event: TraceEvent) => void } +// --------------------------------------------------------------------------- +// Trace events — lightweight observability spans +// --------------------------------------------------------------------------- + +/** Shared fields present on every trace event. */ +interface TraceEventBase { + /** Unique identifier for the entire run (runTeam / runTasks / runAgent call). */ + readonly runId: string + readonly type: string + /** Unix epoch ms when the span started. */ + readonly startMs: number + /** Unix epoch ms when the span ended. */ + readonly endMs: number + /** Wall-clock duration in milliseconds (`endMs - startMs`). */ + readonly durationMs: number + /** Agent name associated with this span. */ + readonly agent?: string + /** Task ID associated with this span. */ + readonly taskId?: string +} + +/** Emitted for each LLM API call (one per agent turn). */ +export interface LLMCallTrace extends TraceEventBase { + readonly type: 'llm_call' + readonly agent: string + readonly model?: string + readonly turn: number + readonly tokens: TokenUsage +} + +/** Emitted for each tool execution. */ +export interface ToolCallTrace extends TraceEventBase { + readonly type: 'tool_call' + readonly agent: string + readonly tool: string + readonly isError: boolean +} + +/** Emitted when a task completes (wraps the full retry sequence). */ +export interface TaskTrace extends TraceEventBase { + readonly type: 'task' + readonly taskId: string + readonly taskTitle: string + readonly agent: string + readonly success: boolean + readonly retries: number +} + +/** Emitted when an agent run completes (wraps the full conversation loop). */ +export interface AgentTrace extends TraceEventBase { + readonly type: 'agent' + readonly agent: string + readonly turns: number + readonly tokens: TokenUsage + readonly toolCalls: number +} + +/** Discriminated union of all trace event types. */ +export type TraceEvent = LLMCallTrace | ToolCallTrace | TaskTrace | AgentTrace + // --------------------------------------------------------------------------- // Memory // --------------------------------------------------------------------------- diff --git a/src/utils/trace.ts b/src/utils/trace.ts new file mode 100644 index 0000000..f5c2345 --- /dev/null +++ b/src/utils/trace.ts @@ -0,0 +1,27 @@ +/** + * @fileoverview Trace emission utilities for the observability layer. + */ + +import { randomUUID } from 'node:crypto' +import type { TraceEvent } from '../types.js' + +/** + * Safely emit a trace event. Swallows callback errors so a broken + * subscriber never crashes agent execution. + */ +export function emitTrace( + fn: ((event: TraceEvent) => void) | undefined, + event: TraceEvent, +): void { + if (!fn) return + try { + fn(event) + } catch { + // Intentionally swallowed — observability must never break execution. + } +} + +/** Generate a unique run ID for trace correlation. */ +export function generateRunId(): string { + return randomUUID() +} diff --git a/tests/trace.test.ts b/tests/trace.test.ts new file mode 100644 index 0000000..7e4884b --- /dev/null +++ b/tests/trace.test.ts @@ -0,0 +1,434 @@ +import { describe, it, expect, vi } from 'vitest' +import { z } from 'zod' +import { Agent } from '../src/agent/agent.js' +import { AgentRunner, type RunOptions } from '../src/agent/runner.js' +import { ToolRegistry, defineTool } from '../src/tool/framework.js' +import { ToolExecutor } from '../src/tool/executor.js' +import { executeWithRetry } from '../src/orchestrator/orchestrator.js' +import { emitTrace, generateRunId } from '../src/utils/trace.js' +import { createTask } from '../src/task/task.js' +import type { + AgentConfig, + AgentRunResult, + LLMAdapter, + LLMResponse, + TraceEvent, +} from '../src/types.js' + +// --------------------------------------------------------------------------- +// Mock adapters +// --------------------------------------------------------------------------- + +function mockAdapter(responses: LLMResponse[]): LLMAdapter { + let callIndex = 0 + return { + name: 'mock', + async chat() { + return responses[callIndex++]! + }, + async *stream() { + /* unused */ + }, + } +} + +function textResponse(text: string): LLMResponse { + return { + id: `resp-${Math.random().toString(36).slice(2)}`, + content: [{ type: 'text' as const, text }], + model: 'mock-model', + stop_reason: 'end_turn', + usage: { input_tokens: 10, output_tokens: 20 }, + } +} + +function toolUseResponse(toolName: string, input: Record): LLMResponse { + return { + id: `resp-${Math.random().toString(36).slice(2)}`, + content: [ + { + type: 'tool_use' as const, + id: `tu-${Math.random().toString(36).slice(2)}`, + name: toolName, + input, + }, + ], + model: 'mock-model', + stop_reason: 'tool_use', + usage: { input_tokens: 15, output_tokens: 25 }, + } +} + +function buildMockAgent( + config: AgentConfig, + responses: LLMResponse[], + registry?: ToolRegistry, + executor?: ToolExecutor, +): Agent { + const reg = registry ?? new ToolRegistry() + const exec = executor ?? new ToolExecutor(reg) + const adapter = mockAdapter(responses) + const agent = new Agent(config, reg, exec) + + const runner = new AgentRunner(adapter, reg, exec, { + model: config.model, + systemPrompt: config.systemPrompt, + maxTurns: config.maxTurns, + maxTokens: config.maxTokens, + temperature: config.temperature, + agentName: config.name, + }) + ;(agent as any).runner = runner + + return agent +} + +// --------------------------------------------------------------------------- +// emitTrace helper +// --------------------------------------------------------------------------- + +describe('emitTrace', () => { + it('does nothing when fn is undefined', () => { + // Should not throw + emitTrace(undefined, { + type: 'agent', + runId: 'r1', + agent: 'a', + turns: 1, + tokens: { input_tokens: 0, output_tokens: 0 }, + toolCalls: 0, + startMs: 0, + endMs: 0, + durationMs: 0, + }) + }) + + it('calls fn with the event', () => { + const fn = vi.fn() + const event: TraceEvent = { + type: 'agent', + runId: 'r1', + agent: 'a', + turns: 1, + tokens: { input_tokens: 0, output_tokens: 0 }, + toolCalls: 0, + startMs: 0, + endMs: 0, + durationMs: 0, + } + emitTrace(fn, event) + expect(fn).toHaveBeenCalledWith(event) + }) + + it('swallows errors thrown by callback', () => { + const fn = () => { throw new Error('boom') } + expect(() => + emitTrace(fn, { + type: 'agent', + runId: 'r1', + agent: 'a', + turns: 1, + tokens: { input_tokens: 0, output_tokens: 0 }, + toolCalls: 0, + startMs: 0, + endMs: 0, + durationMs: 0, + }), + ).not.toThrow() + }) +}) + +describe('generateRunId', () => { + it('returns a UUID string', () => { + const id = generateRunId() + expect(id).toMatch(/^[0-9a-f-]{36}$/) + }) + + it('returns unique IDs', () => { + const ids = new Set(Array.from({ length: 100 }, generateRunId)) + expect(ids.size).toBe(100) + }) +}) + +// --------------------------------------------------------------------------- +// AgentRunner trace events +// --------------------------------------------------------------------------- + +describe('AgentRunner trace events', () => { + it('emits llm_call trace for each LLM turn', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + const executor = new ToolExecutor(registry) + const adapter = mockAdapter([textResponse('Hello!')]) + + const runner = new AgentRunner(adapter, registry, executor, { + model: 'test-model', + agentName: 'test-agent', + }) + + const runOptions: RunOptions = { + onTrace: (e) => traces.push(e), + runId: 'run-1', + traceAgent: 'test-agent', + } + + await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'hi' }] }], + runOptions, + ) + + const llmTraces = traces.filter(t => t.type === 'llm_call') + expect(llmTraces).toHaveLength(1) + + const llm = llmTraces[0]! + expect(llm.type).toBe('llm_call') + expect(llm.runId).toBe('run-1') + expect(llm.agent).toBe('test-agent') + expect(llm.model).toBe('test-model') + expect(llm.turn).toBe(1) + expect(llm.tokens).toEqual({ input_tokens: 10, output_tokens: 20 }) + expect(llm.durationMs).toBeGreaterThanOrEqual(0) + expect(llm.startMs).toBeLessThanOrEqual(llm.endMs) + }) + + it('emits tool_call trace with correct fields', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'echo', + description: 'echoes', + inputSchema: z.object({ msg: z.string() }), + execute: async ({ msg }) => ({ data: msg }), + }), + ) + const executor = new ToolExecutor(registry) + const adapter = mockAdapter([ + toolUseResponse('echo', { msg: 'hello' }), + textResponse('Done'), + ]) + + const runner = new AgentRunner(adapter, registry, executor, { + model: 'test-model', + agentName: 'tooler', + }) + + await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'test' }] }], + { onTrace: (e) => traces.push(e), runId: 'run-2', traceAgent: 'tooler' }, + ) + + const toolTraces = traces.filter(t => t.type === 'tool_call') + expect(toolTraces).toHaveLength(1) + + const tool = toolTraces[0]! + expect(tool.type).toBe('tool_call') + expect(tool.runId).toBe('run-2') + expect(tool.agent).toBe('tooler') + expect(tool.tool).toBe('echo') + expect(tool.isError).toBe(false) + expect(tool.durationMs).toBeGreaterThanOrEqual(0) + }) + + it('tool_call trace has isError: true on tool failure', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'boom', + description: 'fails', + inputSchema: z.object({}), + execute: async () => { throw new Error('fail') }, + }), + ) + const executor = new ToolExecutor(registry) + const adapter = mockAdapter([ + toolUseResponse('boom', {}), + textResponse('Handled'), + ]) + + const runner = new AgentRunner(adapter, registry, executor, { + model: 'test-model', + agentName: 'err-agent', + }) + + await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'test' }] }], + { onTrace: (e) => traces.push(e), runId: 'run-3', traceAgent: 'err-agent' }, + ) + + const toolTraces = traces.filter(t => t.type === 'tool_call') + expect(toolTraces).toHaveLength(1) + expect(toolTraces[0]!.isError).toBe(true) + }) + + it('does not call Date.now for LLM timing when onTrace is absent', async () => { + // This test just verifies no errors occur when onTrace is not provided + const registry = new ToolRegistry() + const executor = new ToolExecutor(registry) + const adapter = mockAdapter([textResponse('hi')]) + + const runner = new AgentRunner(adapter, registry, executor, { + model: 'test-model', + }) + + const result = await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'test' }] }], + {}, + ) + + expect(result.output).toBe('hi') + }) +}) + +// --------------------------------------------------------------------------- +// Agent-level trace events +// --------------------------------------------------------------------------- + +describe('Agent trace events', () => { + it('emits agent trace with turns, tokens, and toolCalls', async () => { + const traces: TraceEvent[] = [] + const config: AgentConfig = { + name: 'my-agent', + model: 'mock-model', + systemPrompt: 'You are a test.', + } + + const agent = buildMockAgent(config, [textResponse('Hello world')]) + + const runOptions: Partial = { + onTrace: (e) => traces.push(e), + runId: 'run-agent-1', + traceAgent: 'my-agent', + } + + const result = await agent.run('Say hello', runOptions) + expect(result.success).toBe(true) + + const agentTraces = traces.filter(t => t.type === 'agent') + expect(agentTraces).toHaveLength(1) + + const at = agentTraces[0]! + expect(at.type).toBe('agent') + expect(at.runId).toBe('run-agent-1') + expect(at.agent).toBe('my-agent') + expect(at.turns).toBe(1) // one assistant message + expect(at.tokens).toEqual({ input_tokens: 10, output_tokens: 20 }) + expect(at.toolCalls).toBe(0) + expect(at.durationMs).toBeGreaterThanOrEqual(0) + }) + + it('all traces share the same runId', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'greet', + description: 'greets', + inputSchema: z.object({ name: z.string() }), + execute: async ({ name }) => ({ data: `Hi ${name}` }), + }), + ) + const executor = new ToolExecutor(registry) + const config: AgentConfig = { + name: 'multi-trace-agent', + model: 'mock-model', + tools: ['greet'], + } + + const agent = buildMockAgent( + config, + [ + toolUseResponse('greet', { name: 'world' }), + textResponse('Done'), + ], + registry, + executor, + ) + + const runId = 'shared-run-id' + await agent.run('test', { + onTrace: (e) => traces.push(e), + runId, + traceAgent: 'multi-trace-agent', + }) + + // Should have: 2 llm_call, 1 tool_call, 1 agent + expect(traces.length).toBeGreaterThanOrEqual(4) + + for (const trace of traces) { + expect(trace.runId).toBe(runId) + } + }) + + it('onTrace error does not break agent execution', async () => { + const config: AgentConfig = { + name: 'resilient-agent', + model: 'mock-model', + } + + const agent = buildMockAgent(config, [textResponse('OK')]) + + const result = await agent.run('test', { + onTrace: () => { throw new Error('callback exploded') }, + runId: 'run-err', + traceAgent: 'resilient-agent', + }) + + // The run should still succeed despite the broken callback + expect(result.success).toBe(true) + expect(result.output).toBe('OK') + }) + + it('per-turn token usage in llm_call traces', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'noop', + description: 'noop', + inputSchema: z.object({}), + execute: async () => ({ data: 'ok' }), + }), + ) + const executor = new ToolExecutor(registry) + + // Two LLM calls: first triggers a tool, second is the final response + const resp1: LLMResponse = { + id: 'r1', + content: [{ type: 'tool_use', id: 'tu1', name: 'noop', input: {} }], + model: 'mock-model', + stop_reason: 'tool_use', + usage: { input_tokens: 100, output_tokens: 50 }, + } + const resp2: LLMResponse = { + id: 'r2', + content: [{ type: 'text', text: 'Final answer' }], + model: 'mock-model', + stop_reason: 'end_turn', + usage: { input_tokens: 200, output_tokens: 100 }, + } + + const adapter = mockAdapter([resp1, resp2]) + const runner = new AgentRunner(adapter, registry, executor, { + model: 'mock-model', + agentName: 'token-agent', + }) + + await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'go' }] }], + { onTrace: (e) => traces.push(e), runId: 'run-tok', traceAgent: 'token-agent' }, + ) + + const llmTraces = traces.filter(t => t.type === 'llm_call') + expect(llmTraces).toHaveLength(2) + + // Each trace carries its own turn's token usage, not the aggregate + expect(llmTraces[0]!.tokens).toEqual({ input_tokens: 100, output_tokens: 50 }) + expect(llmTraces[1]!.tokens).toEqual({ input_tokens: 200, output_tokens: 100 }) + + // Turn numbers should be sequential + expect(llmTraces[0]!.turn).toBe(1) + expect(llmTraces[1]!.turn).toBe(2) + }) +})