feat: add onTrace observability callback (#18)
Add a lightweight onTrace callback to OrchestratorConfig that emits structured span events for LLM calls, tool executions, task lifecycle, and agent lifecycle. Zero overhead when not subscribed, zero new dependencies.
This commit is contained in:
parent
17546fd93e
commit
f4d3bb4e8d
|
|
@ -32,6 +32,7 @@ import type {
|
|||
TokenUsage,
|
||||
ToolUseContext,
|
||||
} from '../types.js'
|
||||
import { emitTrace } from '../utils/trace.js'
|
||||
import type { ToolDefinition as FrameworkToolDefinition, ToolRegistry } from '../tool/framework.js'
|
||||
import type { ToolExecutor } from '../tool/executor.js'
|
||||
import { createAdapter } from '../llm/adapter.js'
|
||||
|
|
@ -158,12 +159,12 @@ export class Agent {
|
|||
*
|
||||
* Use this for one-shot queries where past context is irrelevant.
|
||||
*/
|
||||
async run(prompt: string): Promise<AgentRunResult> {
|
||||
async run(prompt: string, runOptions?: Partial<RunOptions>): Promise<AgentRunResult> {
|
||||
const messages: LLMMessage[] = [
|
||||
{ role: 'user', content: [{ type: 'text', text: prompt }] },
|
||||
]
|
||||
|
||||
return this.executeRun(messages)
|
||||
return this.executeRun(messages, runOptions)
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -266,15 +267,23 @@ export class Agent {
|
|||
* Shared execution path used by both `run` and `prompt`.
|
||||
* Handles state transitions and error wrapping.
|
||||
*/
|
||||
private async executeRun(messages: LLMMessage[]): Promise<AgentRunResult> {
|
||||
private async executeRun(
|
||||
messages: LLMMessage[],
|
||||
callerOptions?: Partial<RunOptions>,
|
||||
): Promise<AgentRunResult> {
|
||||
this.transitionTo('running')
|
||||
|
||||
const agentStartMs = callerOptions?.onTrace ? Date.now() : 0
|
||||
|
||||
try {
|
||||
const runner = await this.getRunner()
|
||||
const internalOnMessage = (msg: LLMMessage) => {
|
||||
this.state.messages.push(msg)
|
||||
callerOptions?.onMessage?.(msg)
|
||||
}
|
||||
const runOptions: RunOptions = {
|
||||
onMessage: msg => {
|
||||
this.state.messages.push(msg)
|
||||
},
|
||||
...callerOptions,
|
||||
onMessage: internalOnMessage,
|
||||
}
|
||||
|
||||
const result = await runner.run(messages, runOptions)
|
||||
|
|
@ -282,21 +291,25 @@ export class Agent {
|
|||
|
||||
// --- Structured output validation ---
|
||||
if (this.config.outputSchema) {
|
||||
return this.validateStructuredOutput(
|
||||
const validated = await this.validateStructuredOutput(
|
||||
messages,
|
||||
result,
|
||||
runner,
|
||||
runOptions,
|
||||
)
|
||||
this.emitAgentTrace(callerOptions, agentStartMs, validated)
|
||||
return validated
|
||||
}
|
||||
|
||||
this.transitionTo('completed')
|
||||
return this.toAgentRunResult(result, true)
|
||||
const agentResult = this.toAgentRunResult(result, true)
|
||||
this.emitAgentTrace(callerOptions, agentStartMs, agentResult)
|
||||
return agentResult
|
||||
} catch (err) {
|
||||
const error = err instanceof Error ? err : new Error(String(err))
|
||||
this.transitionToError(error)
|
||||
|
||||
return {
|
||||
const errorResult: AgentRunResult = {
|
||||
success: false,
|
||||
output: error.message,
|
||||
messages: [],
|
||||
|
|
@ -304,9 +317,33 @@ export class Agent {
|
|||
toolCalls: [],
|
||||
structured: undefined,
|
||||
}
|
||||
this.emitAgentTrace(callerOptions, agentStartMs, errorResult)
|
||||
return errorResult
|
||||
}
|
||||
}
|
||||
|
||||
/** Emit an `agent` trace event if `onTrace` is provided. */
|
||||
private emitAgentTrace(
|
||||
options: Partial<RunOptions> | undefined,
|
||||
startMs: number,
|
||||
result: AgentRunResult,
|
||||
): void {
|
||||
if (!options?.onTrace) return
|
||||
const endMs = Date.now()
|
||||
emitTrace(options.onTrace, {
|
||||
type: 'agent',
|
||||
runId: options.runId ?? '',
|
||||
taskId: options.taskId,
|
||||
agent: options.traceAgent ?? this.name,
|
||||
turns: result.messages.filter(m => m.role === 'assistant').length,
|
||||
tokens: result.tokenUsage,
|
||||
toolCalls: result.toolCalls.length,
|
||||
startMs,
|
||||
endMs,
|
||||
durationMs: endMs - startMs,
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate agent output against the configured `outputSchema`.
|
||||
* On first validation failure, retry once with error feedback.
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
*/
|
||||
|
||||
import type { AgentRunResult } from '../types.js'
|
||||
import type { RunOptions } from './runner.js'
|
||||
import type { Agent } from './agent.js'
|
||||
import { Semaphore } from '../utils/semaphore.js'
|
||||
|
||||
|
|
@ -123,12 +124,16 @@ export class AgentPool {
|
|||
*
|
||||
* @throws {Error} If the agent name is not found.
|
||||
*/
|
||||
async run(agentName: string, prompt: string): Promise<AgentRunResult> {
|
||||
async run(
|
||||
agentName: string,
|
||||
prompt: string,
|
||||
runOptions?: Partial<RunOptions>,
|
||||
): Promise<AgentRunResult> {
|
||||
const agent = this.requireAgent(agentName)
|
||||
|
||||
await this.semaphore.acquire()
|
||||
try {
|
||||
return await agent.run(prompt)
|
||||
return await agent.run(prompt, runOptions)
|
||||
} finally {
|
||||
this.semaphore.release()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,9 @@ import type {
|
|||
ToolUseContext,
|
||||
LLMAdapter,
|
||||
LLMChatOptions,
|
||||
TraceEvent,
|
||||
} from '../types.js'
|
||||
import { emitTrace } from '../utils/trace.js'
|
||||
import type { ToolRegistry } from '../tool/framework.js'
|
||||
import type { ToolExecutor } from '../tool/executor.js'
|
||||
|
||||
|
|
@ -76,6 +78,14 @@ export interface RunOptions {
|
|||
readonly onToolResult?: (name: string, result: ToolResult) => void
|
||||
/** Fired after each complete {@link LLMMessage} is appended. */
|
||||
readonly onMessage?: (message: LLMMessage) => void
|
||||
/** Trace callback for observability spans. */
|
||||
readonly onTrace?: (event: TraceEvent) => void
|
||||
/** Run ID for trace correlation. */
|
||||
readonly runId?: string
|
||||
/** Task ID for trace correlation. */
|
||||
readonly taskId?: string
|
||||
/** Agent name for trace correlation (overrides RunnerOptions.agentName). */
|
||||
readonly traceAgent?: string
|
||||
}
|
||||
|
||||
/** The aggregated result returned when a full run completes. */
|
||||
|
|
@ -254,7 +264,24 @@ export class AgentRunner {
|
|||
// ------------------------------------------------------------------
|
||||
// Step 1: Call the LLM and collect the full response for this turn.
|
||||
// ------------------------------------------------------------------
|
||||
const llmStartMs = options.onTrace ? Date.now() : 0
|
||||
const response = await this.adapter.chat(conversationMessages, baseChatOptions)
|
||||
if (options.onTrace) {
|
||||
const llmEndMs = Date.now()
|
||||
const agentName = options.traceAgent ?? this.options.agentName ?? 'unknown'
|
||||
emitTrace(options.onTrace, {
|
||||
type: 'llm_call',
|
||||
runId: options.runId ?? '',
|
||||
taskId: options.taskId,
|
||||
agent: agentName,
|
||||
model: this.options.model,
|
||||
turn: turns,
|
||||
tokens: response.usage,
|
||||
startMs: llmStartMs,
|
||||
endMs: llmEndMs,
|
||||
durationMs: llmEndMs - llmStartMs,
|
||||
})
|
||||
}
|
||||
|
||||
totalUsage = addTokenUsage(totalUsage, response.usage)
|
||||
|
||||
|
|
@ -319,10 +346,26 @@ export class AgentRunner {
|
|||
result = { data: message, isError: true }
|
||||
}
|
||||
|
||||
const duration = Date.now() - startTime
|
||||
const endTime = Date.now()
|
||||
const duration = endTime - startTime
|
||||
|
||||
options.onToolResult?.(block.name, result)
|
||||
|
||||
if (options.onTrace) {
|
||||
const agentName = options.traceAgent ?? this.options.agentName ?? 'unknown'
|
||||
emitTrace(options.onTrace, {
|
||||
type: 'tool_call',
|
||||
runId: options.runId ?? '',
|
||||
taskId: options.taskId,
|
||||
agent: agentName,
|
||||
tool: block.name,
|
||||
isError: result.isError ?? false,
|
||||
startMs: startTime,
|
||||
endMs: endTime,
|
||||
durationMs: duration,
|
||||
})
|
||||
}
|
||||
|
||||
const record: ToolCallRecord = {
|
||||
toolName: block.name,
|
||||
input: block.input,
|
||||
|
|
|
|||
|
|
@ -161,7 +161,16 @@ export type {
|
|||
OrchestratorConfig,
|
||||
OrchestratorEvent,
|
||||
|
||||
// Trace
|
||||
TraceEvent,
|
||||
LLMCallTrace,
|
||||
ToolCallTrace,
|
||||
TaskTrace,
|
||||
AgentTrace,
|
||||
|
||||
// Memory
|
||||
MemoryEntry,
|
||||
MemoryStore,
|
||||
} from './types.js'
|
||||
|
||||
export { emitTrace, generateRunId } from './utils/trace.js'
|
||||
|
|
|
|||
|
|
@ -52,8 +52,10 @@ import type {
|
|||
TeamRunResult,
|
||||
TokenUsage,
|
||||
} from '../types.js'
|
||||
import type { RunOptions } from '../agent/runner.js'
|
||||
import { Agent } from '../agent/agent.js'
|
||||
import { AgentPool } from '../agent/pool.js'
|
||||
import { emitTrace, generateRunId } from '../utils/trace.js'
|
||||
import { ToolRegistry } from '../tool/framework.js'
|
||||
import { ToolExecutor } from '../tool/executor.js'
|
||||
import { registerBuiltInTools } from '../tool/built-in/index.js'
|
||||
|
|
@ -260,6 +262,8 @@ interface RunContext {
|
|||
readonly scheduler: Scheduler
|
||||
readonly agentResults: Map<string, AgentRunResult>
|
||||
readonly config: OrchestratorConfig
|
||||
/** Trace run ID, present when `onTrace` is configured. */
|
||||
readonly runId?: string
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -338,10 +342,19 @@ async function executeQueue(
|
|||
// Build the prompt: inject shared memory context + task description
|
||||
const prompt = await buildTaskPrompt(task, team)
|
||||
|
||||
// Build trace context for this task's agent run
|
||||
const traceOptions: Partial<RunOptions> | undefined = config.onTrace
|
||||
? { onTrace: config.onTrace, runId: ctx.runId ?? '', taskId: task.id, traceAgent: assignee }
|
||||
: undefined
|
||||
|
||||
const taskStartMs = config.onTrace ? Date.now() : 0
|
||||
let retryCount = 0
|
||||
|
||||
const result = await executeWithRetry(
|
||||
() => pool.run(assignee, prompt),
|
||||
() => pool.run(assignee, prompt, traceOptions),
|
||||
task,
|
||||
(retryData) => {
|
||||
retryCount = retryData.attempt
|
||||
config.onProgress?.({
|
||||
type: 'task_retry',
|
||||
task: task.id,
|
||||
|
|
@ -351,6 +364,23 @@ async function executeQueue(
|
|||
},
|
||||
)
|
||||
|
||||
// Emit task trace
|
||||
if (config.onTrace) {
|
||||
const taskEndMs = Date.now()
|
||||
emitTrace(config.onTrace, {
|
||||
type: 'task',
|
||||
runId: ctx.runId ?? '',
|
||||
taskId: task.id,
|
||||
taskTitle: task.title,
|
||||
agent: assignee,
|
||||
success: result.success,
|
||||
retries: retryCount,
|
||||
startMs: taskStartMs,
|
||||
endMs: taskEndMs,
|
||||
durationMs: taskEndMs - taskStartMs,
|
||||
})
|
||||
}
|
||||
|
||||
ctx.agentResults.set(`${assignee}:${task.id}`, result)
|
||||
|
||||
if (result.success) {
|
||||
|
|
@ -441,8 +471,8 @@ async function buildTaskPrompt(task: Task, team: Team): Promise<string> {
|
|||
*/
|
||||
export class OpenMultiAgent {
|
||||
private readonly config: Required<
|
||||
Omit<OrchestratorConfig, 'onProgress' | 'defaultBaseURL' | 'defaultApiKey'>
|
||||
> & Pick<OrchestratorConfig, 'onProgress' | 'defaultBaseURL' | 'defaultApiKey'>
|
||||
Omit<OrchestratorConfig, 'onProgress' | 'onTrace' | 'defaultBaseURL' | 'defaultApiKey'>
|
||||
> & Pick<OrchestratorConfig, 'onProgress' | 'onTrace' | 'defaultBaseURL' | 'defaultApiKey'>
|
||||
|
||||
private readonly teams: Map<string, Team> = new Map()
|
||||
private completedTaskCount = 0
|
||||
|
|
@ -463,6 +493,7 @@ export class OpenMultiAgent {
|
|||
defaultBaseURL: config.defaultBaseURL,
|
||||
defaultApiKey: config.defaultApiKey,
|
||||
onProgress: config.onProgress,
|
||||
onTrace: config.onTrace,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -520,7 +551,11 @@ export class OpenMultiAgent {
|
|||
data: { prompt },
|
||||
})
|
||||
|
||||
const result = await agent.run(prompt)
|
||||
const traceOptions: Partial<RunOptions> | undefined = this.config.onTrace
|
||||
? { onTrace: this.config.onTrace, runId: generateRunId(), traceAgent: config.name }
|
||||
: undefined
|
||||
|
||||
const result = await agent.run(prompt, traceOptions)
|
||||
|
||||
this.config.onProgress?.({
|
||||
type: 'agent_complete',
|
||||
|
|
@ -578,6 +613,7 @@ export class OpenMultiAgent {
|
|||
|
||||
const decompositionPrompt = this.buildDecompositionPrompt(goal, agentConfigs)
|
||||
const coordinatorAgent = buildAgent(coordinatorConfig)
|
||||
const runId = this.config.onTrace ? generateRunId() : undefined
|
||||
|
||||
this.config.onProgress?.({
|
||||
type: 'agent_start',
|
||||
|
|
@ -585,7 +621,10 @@ export class OpenMultiAgent {
|
|||
data: { phase: 'decomposition', goal },
|
||||
})
|
||||
|
||||
const decompositionResult = await coordinatorAgent.run(decompositionPrompt)
|
||||
const decompTraceOptions: Partial<RunOptions> | undefined = this.config.onTrace
|
||||
? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator' }
|
||||
: undefined
|
||||
const decompositionResult = await coordinatorAgent.run(decompositionPrompt, decompTraceOptions)
|
||||
const agentResults = new Map<string, AgentRunResult>()
|
||||
agentResults.set('coordinator:decompose', decompositionResult)
|
||||
|
||||
|
|
@ -629,6 +668,7 @@ export class OpenMultiAgent {
|
|||
scheduler,
|
||||
agentResults,
|
||||
config: this.config,
|
||||
runId,
|
||||
}
|
||||
|
||||
await executeQueue(queue, ctx)
|
||||
|
|
@ -637,7 +677,10 @@ export class OpenMultiAgent {
|
|||
// Step 5: Coordinator synthesises final result
|
||||
// ------------------------------------------------------------------
|
||||
const synthesisPrompt = await this.buildSynthesisPrompt(goal, queue.list(), team)
|
||||
const synthesisResult = await coordinatorAgent.run(synthesisPrompt)
|
||||
const synthTraceOptions: Partial<RunOptions> | undefined = this.config.onTrace
|
||||
? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator' }
|
||||
: undefined
|
||||
const synthesisResult = await coordinatorAgent.run(synthesisPrompt, synthTraceOptions)
|
||||
agentResults.set('coordinator', synthesisResult)
|
||||
|
||||
this.config.onProgress?.({
|
||||
|
|
@ -707,6 +750,7 @@ export class OpenMultiAgent {
|
|||
scheduler,
|
||||
agentResults,
|
||||
config: this.config,
|
||||
runId: this.config.onTrace ? generateRunId() : undefined,
|
||||
}
|
||||
|
||||
await executeQueue(queue, ctx)
|
||||
|
|
|
|||
61
src/types.ts
61
src/types.ts
|
|
@ -316,8 +316,69 @@ export interface OrchestratorConfig {
|
|||
readonly defaultBaseURL?: string
|
||||
readonly defaultApiKey?: string
|
||||
onProgress?: (event: OrchestratorEvent) => void
|
||||
onTrace?: (event: TraceEvent) => void
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Trace events — lightweight observability spans
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Shared fields present on every trace event. */
|
||||
interface TraceEventBase {
|
||||
/** Unique identifier for the entire run (runTeam / runTasks / runAgent call). */
|
||||
readonly runId: string
|
||||
readonly type: string
|
||||
/** Unix epoch ms when the span started. */
|
||||
readonly startMs: number
|
||||
/** Unix epoch ms when the span ended. */
|
||||
readonly endMs: number
|
||||
/** Wall-clock duration in milliseconds (`endMs - startMs`). */
|
||||
readonly durationMs: number
|
||||
/** Agent name associated with this span. */
|
||||
readonly agent?: string
|
||||
/** Task ID associated with this span. */
|
||||
readonly taskId?: string
|
||||
}
|
||||
|
||||
/** Emitted for each LLM API call (one per agent turn). */
|
||||
export interface LLMCallTrace extends TraceEventBase {
|
||||
readonly type: 'llm_call'
|
||||
readonly agent: string
|
||||
readonly model?: string
|
||||
readonly turn: number
|
||||
readonly tokens: TokenUsage
|
||||
}
|
||||
|
||||
/** Emitted for each tool execution. */
|
||||
export interface ToolCallTrace extends TraceEventBase {
|
||||
readonly type: 'tool_call'
|
||||
readonly agent: string
|
||||
readonly tool: string
|
||||
readonly isError: boolean
|
||||
}
|
||||
|
||||
/** Emitted when a task completes (wraps the full retry sequence). */
|
||||
export interface TaskTrace extends TraceEventBase {
|
||||
readonly type: 'task'
|
||||
readonly taskId: string
|
||||
readonly taskTitle: string
|
||||
readonly agent: string
|
||||
readonly success: boolean
|
||||
readonly retries: number
|
||||
}
|
||||
|
||||
/** Emitted when an agent run completes (wraps the full conversation loop). */
|
||||
export interface AgentTrace extends TraceEventBase {
|
||||
readonly type: 'agent'
|
||||
readonly agent: string
|
||||
readonly turns: number
|
||||
readonly tokens: TokenUsage
|
||||
readonly toolCalls: number
|
||||
}
|
||||
|
||||
/** Discriminated union of all trace event types. */
|
||||
export type TraceEvent = LLMCallTrace | ToolCallTrace | TaskTrace | AgentTrace
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Memory
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -0,0 +1,27 @@
|
|||
/**
|
||||
* @fileoverview Trace emission utilities for the observability layer.
|
||||
*/
|
||||
|
||||
import { randomUUID } from 'node:crypto'
|
||||
import type { TraceEvent } from '../types.js'
|
||||
|
||||
/**
|
||||
* Safely emit a trace event. Swallows callback errors so a broken
|
||||
* subscriber never crashes agent execution.
|
||||
*/
|
||||
export function emitTrace(
|
||||
fn: ((event: TraceEvent) => void) | undefined,
|
||||
event: TraceEvent,
|
||||
): void {
|
||||
if (!fn) return
|
||||
try {
|
||||
fn(event)
|
||||
} catch {
|
||||
// Intentionally swallowed — observability must never break execution.
|
||||
}
|
||||
}
|
||||
|
||||
/** Generate a unique run ID for trace correlation. */
|
||||
export function generateRunId(): string {
|
||||
return randomUUID()
|
||||
}
|
||||
|
|
@ -0,0 +1,434 @@
|
|||
import { describe, it, expect, vi } from 'vitest'
|
||||
import { z } from 'zod'
|
||||
import { Agent } from '../src/agent/agent.js'
|
||||
import { AgentRunner, type RunOptions } from '../src/agent/runner.js'
|
||||
import { ToolRegistry, defineTool } from '../src/tool/framework.js'
|
||||
import { ToolExecutor } from '../src/tool/executor.js'
|
||||
import { executeWithRetry } from '../src/orchestrator/orchestrator.js'
|
||||
import { emitTrace, generateRunId } from '../src/utils/trace.js'
|
||||
import { createTask } from '../src/task/task.js'
|
||||
import type {
|
||||
AgentConfig,
|
||||
AgentRunResult,
|
||||
LLMAdapter,
|
||||
LLMResponse,
|
||||
TraceEvent,
|
||||
} from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock adapters
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function mockAdapter(responses: LLMResponse[]): LLMAdapter {
|
||||
let callIndex = 0
|
||||
return {
|
||||
name: 'mock',
|
||||
async chat() {
|
||||
return responses[callIndex++]!
|
||||
},
|
||||
async *stream() {
|
||||
/* unused */
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
function textResponse(text: string): LLMResponse {
|
||||
return {
|
||||
id: `resp-${Math.random().toString(36).slice(2)}`,
|
||||
content: [{ type: 'text' as const, text }],
|
||||
model: 'mock-model',
|
||||
stop_reason: 'end_turn',
|
||||
usage: { input_tokens: 10, output_tokens: 20 },
|
||||
}
|
||||
}
|
||||
|
||||
function toolUseResponse(toolName: string, input: Record<string, unknown>): LLMResponse {
|
||||
return {
|
||||
id: `resp-${Math.random().toString(36).slice(2)}`,
|
||||
content: [
|
||||
{
|
||||
type: 'tool_use' as const,
|
||||
id: `tu-${Math.random().toString(36).slice(2)}`,
|
||||
name: toolName,
|
||||
input,
|
||||
},
|
||||
],
|
||||
model: 'mock-model',
|
||||
stop_reason: 'tool_use',
|
||||
usage: { input_tokens: 15, output_tokens: 25 },
|
||||
}
|
||||
}
|
||||
|
||||
function buildMockAgent(
|
||||
config: AgentConfig,
|
||||
responses: LLMResponse[],
|
||||
registry?: ToolRegistry,
|
||||
executor?: ToolExecutor,
|
||||
): Agent {
|
||||
const reg = registry ?? new ToolRegistry()
|
||||
const exec = executor ?? new ToolExecutor(reg)
|
||||
const adapter = mockAdapter(responses)
|
||||
const agent = new Agent(config, reg, exec)
|
||||
|
||||
const runner = new AgentRunner(adapter, reg, exec, {
|
||||
model: config.model,
|
||||
systemPrompt: config.systemPrompt,
|
||||
maxTurns: config.maxTurns,
|
||||
maxTokens: config.maxTokens,
|
||||
temperature: config.temperature,
|
||||
agentName: config.name,
|
||||
})
|
||||
;(agent as any).runner = runner
|
||||
|
||||
return agent
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// emitTrace helper
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('emitTrace', () => {
|
||||
it('does nothing when fn is undefined', () => {
|
||||
// Should not throw
|
||||
emitTrace(undefined, {
|
||||
type: 'agent',
|
||||
runId: 'r1',
|
||||
agent: 'a',
|
||||
turns: 1,
|
||||
tokens: { input_tokens: 0, output_tokens: 0 },
|
||||
toolCalls: 0,
|
||||
startMs: 0,
|
||||
endMs: 0,
|
||||
durationMs: 0,
|
||||
})
|
||||
})
|
||||
|
||||
it('calls fn with the event', () => {
|
||||
const fn = vi.fn()
|
||||
const event: TraceEvent = {
|
||||
type: 'agent',
|
||||
runId: 'r1',
|
||||
agent: 'a',
|
||||
turns: 1,
|
||||
tokens: { input_tokens: 0, output_tokens: 0 },
|
||||
toolCalls: 0,
|
||||
startMs: 0,
|
||||
endMs: 0,
|
||||
durationMs: 0,
|
||||
}
|
||||
emitTrace(fn, event)
|
||||
expect(fn).toHaveBeenCalledWith(event)
|
||||
})
|
||||
|
||||
it('swallows errors thrown by callback', () => {
|
||||
const fn = () => { throw new Error('boom') }
|
||||
expect(() =>
|
||||
emitTrace(fn, {
|
||||
type: 'agent',
|
||||
runId: 'r1',
|
||||
agent: 'a',
|
||||
turns: 1,
|
||||
tokens: { input_tokens: 0, output_tokens: 0 },
|
||||
toolCalls: 0,
|
||||
startMs: 0,
|
||||
endMs: 0,
|
||||
durationMs: 0,
|
||||
}),
|
||||
).not.toThrow()
|
||||
})
|
||||
})
|
||||
|
||||
describe('generateRunId', () => {
|
||||
it('returns a UUID string', () => {
|
||||
const id = generateRunId()
|
||||
expect(id).toMatch(/^[0-9a-f-]{36}$/)
|
||||
})
|
||||
|
||||
it('returns unique IDs', () => {
|
||||
const ids = new Set(Array.from({ length: 100 }, generateRunId))
|
||||
expect(ids.size).toBe(100)
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// AgentRunner trace events
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('AgentRunner trace events', () => {
|
||||
it('emits llm_call trace for each LLM turn', async () => {
|
||||
const traces: TraceEvent[] = []
|
||||
const registry = new ToolRegistry()
|
||||
const executor = new ToolExecutor(registry)
|
||||
const adapter = mockAdapter([textResponse('Hello!')])
|
||||
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
agentName: 'test-agent',
|
||||
})
|
||||
|
||||
const runOptions: RunOptions = {
|
||||
onTrace: (e) => traces.push(e),
|
||||
runId: 'run-1',
|
||||
traceAgent: 'test-agent',
|
||||
}
|
||||
|
||||
await runner.run(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'hi' }] }],
|
||||
runOptions,
|
||||
)
|
||||
|
||||
const llmTraces = traces.filter(t => t.type === 'llm_call')
|
||||
expect(llmTraces).toHaveLength(1)
|
||||
|
||||
const llm = llmTraces[0]!
|
||||
expect(llm.type).toBe('llm_call')
|
||||
expect(llm.runId).toBe('run-1')
|
||||
expect(llm.agent).toBe('test-agent')
|
||||
expect(llm.model).toBe('test-model')
|
||||
expect(llm.turn).toBe(1)
|
||||
expect(llm.tokens).toEqual({ input_tokens: 10, output_tokens: 20 })
|
||||
expect(llm.durationMs).toBeGreaterThanOrEqual(0)
|
||||
expect(llm.startMs).toBeLessThanOrEqual(llm.endMs)
|
||||
})
|
||||
|
||||
it('emits tool_call trace with correct fields', async () => {
|
||||
const traces: TraceEvent[] = []
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(
|
||||
defineTool({
|
||||
name: 'echo',
|
||||
description: 'echoes',
|
||||
inputSchema: z.object({ msg: z.string() }),
|
||||
execute: async ({ msg }) => ({ data: msg }),
|
||||
}),
|
||||
)
|
||||
const executor = new ToolExecutor(registry)
|
||||
const adapter = mockAdapter([
|
||||
toolUseResponse('echo', { msg: 'hello' }),
|
||||
textResponse('Done'),
|
||||
])
|
||||
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
agentName: 'tooler',
|
||||
})
|
||||
|
||||
await runner.run(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'test' }] }],
|
||||
{ onTrace: (e) => traces.push(e), runId: 'run-2', traceAgent: 'tooler' },
|
||||
)
|
||||
|
||||
const toolTraces = traces.filter(t => t.type === 'tool_call')
|
||||
expect(toolTraces).toHaveLength(1)
|
||||
|
||||
const tool = toolTraces[0]!
|
||||
expect(tool.type).toBe('tool_call')
|
||||
expect(tool.runId).toBe('run-2')
|
||||
expect(tool.agent).toBe('tooler')
|
||||
expect(tool.tool).toBe('echo')
|
||||
expect(tool.isError).toBe(false)
|
||||
expect(tool.durationMs).toBeGreaterThanOrEqual(0)
|
||||
})
|
||||
|
||||
it('tool_call trace has isError: true on tool failure', async () => {
|
||||
const traces: TraceEvent[] = []
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(
|
||||
defineTool({
|
||||
name: 'boom',
|
||||
description: 'fails',
|
||||
inputSchema: z.object({}),
|
||||
execute: async () => { throw new Error('fail') },
|
||||
}),
|
||||
)
|
||||
const executor = new ToolExecutor(registry)
|
||||
const adapter = mockAdapter([
|
||||
toolUseResponse('boom', {}),
|
||||
textResponse('Handled'),
|
||||
])
|
||||
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
agentName: 'err-agent',
|
||||
})
|
||||
|
||||
await runner.run(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'test' }] }],
|
||||
{ onTrace: (e) => traces.push(e), runId: 'run-3', traceAgent: 'err-agent' },
|
||||
)
|
||||
|
||||
const toolTraces = traces.filter(t => t.type === 'tool_call')
|
||||
expect(toolTraces).toHaveLength(1)
|
||||
expect(toolTraces[0]!.isError).toBe(true)
|
||||
})
|
||||
|
||||
it('does not call Date.now for LLM timing when onTrace is absent', async () => {
|
||||
// This test just verifies no errors occur when onTrace is not provided
|
||||
const registry = new ToolRegistry()
|
||||
const executor = new ToolExecutor(registry)
|
||||
const adapter = mockAdapter([textResponse('hi')])
|
||||
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
})
|
||||
|
||||
const result = await runner.run(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'test' }] }],
|
||||
{},
|
||||
)
|
||||
|
||||
expect(result.output).toBe('hi')
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Agent-level trace events
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('Agent trace events', () => {
|
||||
it('emits agent trace with turns, tokens, and toolCalls', async () => {
|
||||
const traces: TraceEvent[] = []
|
||||
const config: AgentConfig = {
|
||||
name: 'my-agent',
|
||||
model: 'mock-model',
|
||||
systemPrompt: 'You are a test.',
|
||||
}
|
||||
|
||||
const agent = buildMockAgent(config, [textResponse('Hello world')])
|
||||
|
||||
const runOptions: Partial<RunOptions> = {
|
||||
onTrace: (e) => traces.push(e),
|
||||
runId: 'run-agent-1',
|
||||
traceAgent: 'my-agent',
|
||||
}
|
||||
|
||||
const result = await agent.run('Say hello', runOptions)
|
||||
expect(result.success).toBe(true)
|
||||
|
||||
const agentTraces = traces.filter(t => t.type === 'agent')
|
||||
expect(agentTraces).toHaveLength(1)
|
||||
|
||||
const at = agentTraces[0]!
|
||||
expect(at.type).toBe('agent')
|
||||
expect(at.runId).toBe('run-agent-1')
|
||||
expect(at.agent).toBe('my-agent')
|
||||
expect(at.turns).toBe(1) // one assistant message
|
||||
expect(at.tokens).toEqual({ input_tokens: 10, output_tokens: 20 })
|
||||
expect(at.toolCalls).toBe(0)
|
||||
expect(at.durationMs).toBeGreaterThanOrEqual(0)
|
||||
})
|
||||
|
||||
it('all traces share the same runId', async () => {
|
||||
const traces: TraceEvent[] = []
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(
|
||||
defineTool({
|
||||
name: 'greet',
|
||||
description: 'greets',
|
||||
inputSchema: z.object({ name: z.string() }),
|
||||
execute: async ({ name }) => ({ data: `Hi ${name}` }),
|
||||
}),
|
||||
)
|
||||
const executor = new ToolExecutor(registry)
|
||||
const config: AgentConfig = {
|
||||
name: 'multi-trace-agent',
|
||||
model: 'mock-model',
|
||||
tools: ['greet'],
|
||||
}
|
||||
|
||||
const agent = buildMockAgent(
|
||||
config,
|
||||
[
|
||||
toolUseResponse('greet', { name: 'world' }),
|
||||
textResponse('Done'),
|
||||
],
|
||||
registry,
|
||||
executor,
|
||||
)
|
||||
|
||||
const runId = 'shared-run-id'
|
||||
await agent.run('test', {
|
||||
onTrace: (e) => traces.push(e),
|
||||
runId,
|
||||
traceAgent: 'multi-trace-agent',
|
||||
})
|
||||
|
||||
// Should have: 2 llm_call, 1 tool_call, 1 agent
|
||||
expect(traces.length).toBeGreaterThanOrEqual(4)
|
||||
|
||||
for (const trace of traces) {
|
||||
expect(trace.runId).toBe(runId)
|
||||
}
|
||||
})
|
||||
|
||||
it('onTrace error does not break agent execution', async () => {
|
||||
const config: AgentConfig = {
|
||||
name: 'resilient-agent',
|
||||
model: 'mock-model',
|
||||
}
|
||||
|
||||
const agent = buildMockAgent(config, [textResponse('OK')])
|
||||
|
||||
const result = await agent.run('test', {
|
||||
onTrace: () => { throw new Error('callback exploded') },
|
||||
runId: 'run-err',
|
||||
traceAgent: 'resilient-agent',
|
||||
})
|
||||
|
||||
// The run should still succeed despite the broken callback
|
||||
expect(result.success).toBe(true)
|
||||
expect(result.output).toBe('OK')
|
||||
})
|
||||
|
||||
it('per-turn token usage in llm_call traces', async () => {
|
||||
const traces: TraceEvent[] = []
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(
|
||||
defineTool({
|
||||
name: 'noop',
|
||||
description: 'noop',
|
||||
inputSchema: z.object({}),
|
||||
execute: async () => ({ data: 'ok' }),
|
||||
}),
|
||||
)
|
||||
const executor = new ToolExecutor(registry)
|
||||
|
||||
// Two LLM calls: first triggers a tool, second is the final response
|
||||
const resp1: LLMResponse = {
|
||||
id: 'r1',
|
||||
content: [{ type: 'tool_use', id: 'tu1', name: 'noop', input: {} }],
|
||||
model: 'mock-model',
|
||||
stop_reason: 'tool_use',
|
||||
usage: { input_tokens: 100, output_tokens: 50 },
|
||||
}
|
||||
const resp2: LLMResponse = {
|
||||
id: 'r2',
|
||||
content: [{ type: 'text', text: 'Final answer' }],
|
||||
model: 'mock-model',
|
||||
stop_reason: 'end_turn',
|
||||
usage: { input_tokens: 200, output_tokens: 100 },
|
||||
}
|
||||
|
||||
const adapter = mockAdapter([resp1, resp2])
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
agentName: 'token-agent',
|
||||
})
|
||||
|
||||
await runner.run(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'go' }] }],
|
||||
{ onTrace: (e) => traces.push(e), runId: 'run-tok', traceAgent: 'token-agent' },
|
||||
)
|
||||
|
||||
const llmTraces = traces.filter(t => t.type === 'llm_call')
|
||||
expect(llmTraces).toHaveLength(2)
|
||||
|
||||
// Each trace carries its own turn's token usage, not the aggregate
|
||||
expect(llmTraces[0]!.tokens).toEqual({ input_tokens: 100, output_tokens: 50 })
|
||||
expect(llmTraces[1]!.tokens).toEqual({ input_tokens: 200, output_tokens: 100 })
|
||||
|
||||
// Turn numbers should be sequential
|
||||
expect(llmTraces[0]!.turn).toBe(1)
|
||||
expect(llmTraces[1]!.turn).toBe(2)
|
||||
})
|
||||
})
|
||||
Loading…
Reference in New Issue