From eedfeb17a2bb82242cfdd96903af34923d06cdb9 Mon Sep 17 00:00:00 2001 From: Deathwing Date: Thu, 2 Apr 2026 01:45:43 +0200 Subject: [PATCH 01/38] feat: add GitHub Copilot as LLM provider - Add CopilotAdapter with OAuth2 device flow authentication - Token exchange via /copilot_internal/v2/token with caching - Premium request multiplier system (getCopilotMultiplier) - Full model metadata catalog (COPILOT_MODELS) - Add 'copilot' to SupportedProvider and provider union types - Add example: examples/05-copilot-test.ts --- examples/05-copilot-test.ts | 49 +++ src/llm/adapter.ts | 6 +- src/llm/copilot.ts | 701 ++++++++++++++++++++++++++++++++++++ src/types.ts | 4 +- 4 files changed, 757 insertions(+), 3 deletions(-) create mode 100644 examples/05-copilot-test.ts create mode 100644 src/llm/copilot.ts diff --git a/examples/05-copilot-test.ts b/examples/05-copilot-test.ts new file mode 100644 index 0000000..d027aea --- /dev/null +++ b/examples/05-copilot-test.ts @@ -0,0 +1,49 @@ +/** + * Quick smoke test for the Copilot adapter. + * + * Run: + * npx tsx examples/05-copilot-test.ts + * + * If GITHUB_COPILOT_TOKEN is not set, the adapter will start an interactive + * OAuth2 device flow — you'll be prompted to sign in via your browser. + */ + +import { OpenMultiAgent } from '../src/index.js' +import type { OrchestratorEvent } from '../src/types.js' + +const orchestrator = new OpenMultiAgent({ + defaultModel: 'gpt-4o', + defaultProvider: 'copilot', + onProgress: (event: OrchestratorEvent) => { + if (event.type === 'agent_start') { + console.log(`[start] agent=${event.agent}`) + } else if (event.type === 'agent_complete') { + console.log(`[complete] agent=${event.agent}`) + } + }, +}) + +console.log('Testing Copilot adapter with gpt-4o...\n') + +const result = await orchestrator.runAgent( + { + name: 'assistant', + model: 'gpt-4o', + provider: 'copilot', + systemPrompt: 'You are a helpful assistant. Keep answers brief.', + maxTurns: 1, + maxTokens: 256, + }, + 'What is 2 + 2? Reply in one sentence.', +) + +if (result.success) { + console.log('\nAgent output:') + console.log('─'.repeat(60)) + console.log(result.output) + console.log('─'.repeat(60)) + console.log(`\nTokens: input=${result.tokenUsage.input_tokens}, output=${result.tokenUsage.output_tokens}`) +} else { + console.error('Agent failed:', result.output) + process.exit(1) +} diff --git a/src/llm/adapter.ts b/src/llm/adapter.ts index 979f37c..69754e4 100644 --- a/src/llm/adapter.ts +++ b/src/llm/adapter.ts @@ -37,7 +37,7 @@ import type { LLMAdapter } from '../types.js' * Additional providers can be integrated by implementing {@link LLMAdapter} * directly and bypassing this factory. */ -export type SupportedProvider = 'anthropic' | 'openai' +export type SupportedProvider = 'anthropic' | 'copilot' | 'openai' /** * Instantiate the appropriate {@link LLMAdapter} for the given provider. @@ -61,6 +61,10 @@ export async function createAdapter( const { AnthropicAdapter } = await import('./anthropic.js') return new AnthropicAdapter(apiKey) } + case 'copilot': { + const { CopilotAdapter } = await import('./copilot.js') + return new CopilotAdapter(apiKey) + } case 'openai': { const { OpenAIAdapter } = await import('./openai.js') return new OpenAIAdapter(apiKey) diff --git a/src/llm/copilot.ts b/src/llm/copilot.ts new file mode 100644 index 0000000..07fdc45 --- /dev/null +++ b/src/llm/copilot.ts @@ -0,0 +1,701 @@ +/** + * @fileoverview GitHub Copilot adapter implementing {@link LLMAdapter}. + * + * Uses the OpenAI-compatible Copilot Chat Completions endpoint at + * `https://api.githubcopilot.com`. Authentication requires a GitHub token + * (e.g. from `gh auth token`) which is exchanged for a short-lived Copilot + * session token via the internal token endpoint. + * + * API key resolution order: + * 1. `apiKey` constructor argument + * 2. `GITHUB_TOKEN` environment variable + * + * @example + * ```ts + * import { CopilotAdapter } from './copilot.js' + * + * const adapter = new CopilotAdapter() // uses GITHUB_TOKEN env var + * const response = await adapter.chat(messages, { + * model: 'claude-sonnet-4', + * maxTokens: 4096, + * }) + * ``` + */ + +import OpenAI from 'openai' +import type { + ChatCompletion, + ChatCompletionAssistantMessageParam, + ChatCompletionChunk, + ChatCompletionMessageParam, + ChatCompletionMessageToolCall, + ChatCompletionTool, + ChatCompletionToolMessageParam, + ChatCompletionUserMessageParam, +} from 'openai/resources/chat/completions/index.js' + +import type { + ContentBlock, + LLMAdapter, + LLMChatOptions, + LLMMessage, + LLMResponse, + LLMStreamOptions, + LLMToolDef, + StreamEvent, + TextBlock, + ToolUseBlock, +} from '../types.js' + +// --------------------------------------------------------------------------- +// Copilot auth — OAuth2 device flow + token exchange +// --------------------------------------------------------------------------- + +const COPILOT_TOKEN_URL = 'https://api.github.com/copilot_internal/v2/token' +const DEVICE_CODE_URL = 'https://github.com/login/device/code' +const POLL_URL = 'https://github.com/login/oauth/access_token' +const COPILOT_CLIENT_ID = 'Iv1.b507a08c87ecfe98' + +const COPILOT_HEADERS: Record = { + 'Copilot-Integration-Id': 'vscode-chat', + 'Editor-Version': 'vscode/1.100.0', + 'Editor-Plugin-Version': 'copilot-chat/0.42.2', +} + +interface CopilotTokenResponse { + token: string + expires_at: number +} + +interface DeviceCodeResponse { + device_code: string + user_code: string + verification_uri: string + interval: number + expires_in: number +} + +interface PollResponse { + access_token?: string + error?: string + error_description?: string +} + +/** + * Start the GitHub OAuth2 device code flow with the Copilot client ID. + * + * Prints a user code and URL to stdout, then polls until the user completes + * authorization in their browser. Returns a GitHub OAuth token scoped for + * Copilot access. + */ +async function deviceCodeLogin(): Promise { + // Step 1: Request a device code + const codeRes = await fetch(DEVICE_CODE_URL, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ client_id: COPILOT_CLIENT_ID, scope: 'copilot' }), + }) + + if (!codeRes.ok) { + const body = await codeRes.text().catch(() => '') + throw new Error(`Device code request failed (${codeRes.status}): ${body}`) + } + + const codeData = (await codeRes.json()) as DeviceCodeResponse + + // Step 2: Prompt the user + console.log(`\n┌─────────────────────────────────────────────┐`) + console.log(`│ GitHub Copilot — Sign in │`) + console.log(`│ │`) + console.log(`│ Open: ${codeData.verification_uri.padEnd(35)}│`) + console.log(`│ Code: ${codeData.user_code.padEnd(35)}│`) + console.log(`└─────────────────────────────────────────────┘\n`) + + // Step 3: Poll for the user to complete auth + const interval = (codeData.interval || 5) * 1000 + const deadline = Date.now() + codeData.expires_in * 1000 + + while (Date.now() < deadline) { + await new Promise((resolve) => setTimeout(resolve, interval)) + + const pollRes = await fetch(POLL_URL, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + client_id: COPILOT_CLIENT_ID, + device_code: codeData.device_code, + grant_type: 'urn:ietf:params:oauth:grant-type:device_code', + }), + }) + + const pollData = (await pollRes.json()) as PollResponse + + if (pollData.access_token) { + console.log('✓ Authenticated with GitHub Copilot\n') + return pollData.access_token + } + + if (pollData.error === 'authorization_pending') continue + if (pollData.error === 'slow_down') { + await new Promise((resolve) => setTimeout(resolve, 5000)) + continue + } + + throw new Error( + `OAuth device flow failed: ${pollData.error} — ${pollData.error_description ?? ''}`, + ) + } + + throw new Error('Device code expired. Please try again.') +} + +/** + * Exchange a GitHub OAuth token (from the Copilot device flow) for a + * short-lived Copilot session token. + * + * Note: the token exchange endpoint does NOT require the Copilot-specific + * headers (Editor-Version etc.) — only the chat completions endpoint does. + */ +async function fetchCopilotToken(githubToken: string): Promise { + const res = await fetch(COPILOT_TOKEN_URL, { + method: 'GET', + headers: { + Authorization: `token ${githubToken}`, + Accept: 'application/json', + 'User-Agent': 'GitHubCopilotChat/0.28.0', + }, + }) + + if (!res.ok) { + const body = await res.text().catch(() => '') + throw new Error( + `Copilot token exchange failed (${res.status}): ${body || res.statusText}`, + ) + } + + return (await res.json()) as CopilotTokenResponse +} + +// --------------------------------------------------------------------------- +// Internal helpers — framework → OpenAI (shared with openai.ts pattern) +// --------------------------------------------------------------------------- + +function toOpenAITool(tool: LLMToolDef): ChatCompletionTool { + return { + type: 'function', + function: { + name: tool.name, + description: tool.description, + parameters: tool.inputSchema as Record, + }, + } +} + +function hasToolResults(msg: LLMMessage): boolean { + return msg.content.some((b) => b.type === 'tool_result') +} + +function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageParam[] { + const result: ChatCompletionMessageParam[] = [] + + for (const msg of messages) { + if (msg.role === 'assistant') { + result.push(toOpenAIAssistantMessage(msg)) + } else { + if (!hasToolResults(msg)) { + result.push(toOpenAIUserMessage(msg)) + } else { + const nonToolBlocks = msg.content.filter((b) => b.type !== 'tool_result') + if (nonToolBlocks.length > 0) { + result.push(toOpenAIUserMessage({ role: 'user', content: nonToolBlocks })) + } + for (const block of msg.content) { + if (block.type === 'tool_result') { + const toolMsg: ChatCompletionToolMessageParam = { + role: 'tool', + tool_call_id: block.tool_use_id, + content: block.content, + } + result.push(toolMsg) + } + } + } + } + } + + return result +} + +function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageParam { + if (msg.content.length === 1 && msg.content[0]?.type === 'text') { + return { role: 'user', content: msg.content[0].text } + } + + type ContentPart = OpenAI.Chat.ChatCompletionContentPartText | OpenAI.Chat.ChatCompletionContentPartImage + const parts: ContentPart[] = [] + + for (const block of msg.content) { + if (block.type === 'text') { + parts.push({ type: 'text', text: block.text }) + } else if (block.type === 'image') { + parts.push({ + type: 'image_url', + image_url: { + url: `data:${block.source.media_type};base64,${block.source.data}`, + }, + }) + } + } + + return { role: 'user', content: parts } +} + +function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessageParam { + const toolCalls: ChatCompletionMessageToolCall[] = [] + const textParts: string[] = [] + + for (const block of msg.content) { + if (block.type === 'tool_use') { + toolCalls.push({ + id: block.id, + type: 'function', + function: { + name: block.name, + arguments: JSON.stringify(block.input), + }, + }) + } else if (block.type === 'text') { + textParts.push(block.text) + } + } + + const assistantMsg: ChatCompletionAssistantMessageParam = { + role: 'assistant', + content: textParts.length > 0 ? textParts.join('') : null, + } + + if (toolCalls.length > 0) { + assistantMsg.tool_calls = toolCalls + } + + return assistantMsg +} + +// --------------------------------------------------------------------------- +// Internal helpers — OpenAI → framework +// --------------------------------------------------------------------------- + +function fromOpenAICompletion(completion: ChatCompletion): LLMResponse { + const choice = completion.choices[0] + if (choice === undefined) { + throw new Error('Copilot returned a completion with no choices') + } + + const content: ContentBlock[] = [] + const message = choice.message + + if (message.content !== null && message.content !== undefined) { + const textBlock: TextBlock = { type: 'text', text: message.content } + content.push(textBlock) + } + + for (const toolCall of message.tool_calls ?? []) { + let parsedInput: Record = {} + try { + const parsed: unknown = JSON.parse(toolCall.function.arguments) + if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) { + parsedInput = parsed as Record + } + } catch { + // Malformed arguments — surface as empty object. + } + + const toolUseBlock: ToolUseBlock = { + type: 'tool_use', + id: toolCall.id, + name: toolCall.function.name, + input: parsedInput, + } + content.push(toolUseBlock) + } + + const stopReason = normalizeFinishReason(choice.finish_reason ?? 'stop') + + return { + id: completion.id, + content, + model: completion.model, + stop_reason: stopReason, + usage: { + input_tokens: completion.usage?.prompt_tokens ?? 0, + output_tokens: completion.usage?.completion_tokens ?? 0, + }, + } +} + +function normalizeFinishReason(reason: string): string { + switch (reason) { + case 'stop': return 'end_turn' + case 'tool_calls': return 'tool_use' + case 'length': return 'max_tokens' + case 'content_filter': return 'content_filter' + default: return reason + } +} + +// --------------------------------------------------------------------------- +// Adapter implementation +// --------------------------------------------------------------------------- + +/** + * LLM adapter backed by the GitHub Copilot Chat Completions API. + * + * Authentication options (tried in order): + * 1. `apiKey` constructor arg — a GitHub OAuth token already scoped for Copilot + * 2. `GITHUB_COPILOT_TOKEN` env var — same as above + * 3. Interactive OAuth2 device flow — prompts the user to sign in via browser + * + * The GitHub token is exchanged for a short-lived Copilot session token, which + * is cached and auto-refreshed. + * + * Thread-safe — a single instance may be shared across concurrent agent runs. + */ +export class CopilotAdapter implements LLMAdapter { + readonly name = 'copilot' + + #githubToken: string | null + #cachedToken: string | null = null + #tokenExpiresAt = 0 + + constructor(apiKey?: string) { + this.#githubToken = apiKey + ?? process.env['GITHUB_COPILOT_TOKEN'] + ?? process.env['GITHUB_TOKEN'] + ?? null + } + + /** + * Return a valid Copilot session token, refreshing if necessary. + * If no GitHub token is available, triggers the interactive device flow. + */ + async #getSessionToken(): Promise { + const now = Math.floor(Date.now() / 1000) + if (this.#cachedToken && this.#tokenExpiresAt - 60 > now) { + return this.#cachedToken + } + + // If we don't have a GitHub token yet, do the device flow + if (!this.#githubToken) { + this.#githubToken = await deviceCodeLogin() + } + + const resp = await fetchCopilotToken(this.#githubToken) + this.#cachedToken = resp.token + this.#tokenExpiresAt = resp.expires_at + return resp.token + } + + /** Build a short-lived OpenAI client pointed at the Copilot endpoint. */ + async #createClient(): Promise { + const sessionToken = await this.#getSessionToken() + return new OpenAI({ + apiKey: sessionToken, + baseURL: 'https://api.githubcopilot.com', + defaultHeaders: COPILOT_HEADERS, + }) + } + + // ------------------------------------------------------------------------- + // chat() + // ------------------------------------------------------------------------- + + async chat(messages: LLMMessage[], options: LLMChatOptions): Promise { + const client = await this.#createClient() + const openAIMessages = buildOpenAIMessageList(messages, options.systemPrompt) + + const completion = await client.chat.completions.create( + { + model: options.model, + messages: openAIMessages, + max_tokens: options.maxTokens, + temperature: options.temperature, + tools: options.tools ? options.tools.map(toOpenAITool) : undefined, + stream: false, + }, + { + signal: options.abortSignal, + }, + ) + + return fromOpenAICompletion(completion) + } + + // ------------------------------------------------------------------------- + // stream() + // ------------------------------------------------------------------------- + + async *stream( + messages: LLMMessage[], + options: LLMStreamOptions, + ): AsyncIterable { + const client = await this.#createClient() + const openAIMessages = buildOpenAIMessageList(messages, options.systemPrompt) + + const streamResponse = await client.chat.completions.create( + { + model: options.model, + messages: openAIMessages, + max_tokens: options.maxTokens, + temperature: options.temperature, + tools: options.tools ? options.tools.map(toOpenAITool) : undefined, + stream: true, + stream_options: { include_usage: true }, + }, + { + signal: options.abortSignal, + }, + ) + + let completionId = '' + let completionModel = '' + let finalFinishReason: string = 'stop' + let inputTokens = 0 + let outputTokens = 0 + const toolCallBuffers = new Map< + number, + { id: string; name: string; argsJson: string } + >() + let fullText = '' + + try { + for await (const chunk of streamResponse) { + completionId = chunk.id + completionModel = chunk.model + + if (chunk.usage !== null && chunk.usage !== undefined) { + inputTokens = chunk.usage.prompt_tokens + outputTokens = chunk.usage.completion_tokens + } + + const choice: ChatCompletionChunk.Choice | undefined = chunk.choices[0] + if (choice === undefined) continue + + const delta = choice.delta + + if (delta.content !== null && delta.content !== undefined) { + fullText += delta.content + const textEvent: StreamEvent = { type: 'text', data: delta.content } + yield textEvent + } + + for (const toolCallDelta of delta.tool_calls ?? []) { + const idx = toolCallDelta.index + + if (!toolCallBuffers.has(idx)) { + toolCallBuffers.set(idx, { + id: toolCallDelta.id ?? '', + name: toolCallDelta.function?.name ?? '', + argsJson: '', + }) + } + + const buf = toolCallBuffers.get(idx) + if (buf !== undefined) { + if (toolCallDelta.id) buf.id = toolCallDelta.id + if (toolCallDelta.function?.name) buf.name = toolCallDelta.function.name + if (toolCallDelta.function?.arguments) { + buf.argsJson += toolCallDelta.function.arguments + } + } + } + + if (choice.finish_reason !== null && choice.finish_reason !== undefined) { + finalFinishReason = choice.finish_reason + } + } + + const finalToolUseBlocks: ToolUseBlock[] = [] + for (const buf of toolCallBuffers.values()) { + let parsedInput: Record = {} + try { + const parsed: unknown = JSON.parse(buf.argsJson) + if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) { + parsedInput = parsed as Record + } + } catch { + // Malformed JSON — surface as empty object. + } + + const toolUseBlock: ToolUseBlock = { + type: 'tool_use', + id: buf.id, + name: buf.name, + input: parsedInput, + } + finalToolUseBlocks.push(toolUseBlock) + const toolUseEvent: StreamEvent = { type: 'tool_use', data: toolUseBlock } + yield toolUseEvent + } + + const doneContent: ContentBlock[] = [] + if (fullText.length > 0) { + const textBlock: TextBlock = { type: 'text', text: fullText } + doneContent.push(textBlock) + } + doneContent.push(...finalToolUseBlocks) + + const finalResponse: LLMResponse = { + id: completionId, + content: doneContent, + model: completionModel, + stop_reason: normalizeFinishReason(finalFinishReason), + usage: { input_tokens: inputTokens, output_tokens: outputTokens }, + } + + const doneEvent: StreamEvent = { type: 'done', data: finalResponse } + yield doneEvent + } catch (err) { + const error = err instanceof Error ? err : new Error(String(err)) + const errorEvent: StreamEvent = { type: 'error', data: error } + yield errorEvent + } + } +} + +// --------------------------------------------------------------------------- +// Private utility +// --------------------------------------------------------------------------- + +function buildOpenAIMessageList( + messages: LLMMessage[], + systemPrompt: string | undefined, +): ChatCompletionMessageParam[] { + const result: ChatCompletionMessageParam[] = [] + + if (systemPrompt !== undefined && systemPrompt.length > 0) { + result.push({ role: 'system', content: systemPrompt }) + } + + result.push(...toOpenAIMessages(messages)) + return result +} + +// Re-export types that consumers of this module commonly need alongside the adapter. +export type { + ContentBlock, + LLMAdapter, + LLMChatOptions, + LLMMessage, + LLMResponse, + LLMStreamOptions, + LLMToolDef, + StreamEvent, +} + +// --------------------------------------------------------------------------- +// Premium request multipliers +// --------------------------------------------------------------------------- + +/** + * Model metadata used for display names, context windows, and premium request + * multiplier lookup. + */ +export interface CopilotModelInfo { + readonly id: string + readonly name: string + readonly contextWindow: number +} + +/** + * Return the premium-request multiplier for a Copilot model. + * + * Copilot doesn't charge per-token — instead each request costs + * `multiplier × 1 premium request` from the user's monthly allowance. + * A multiplier of 0 means the model is included at no premium cost. + * + * Based on https://docs.github.com/en/copilot/reference/ai-models/supported-models#model-multipliers + */ +export function getCopilotMultiplier(modelId: string): number { + const id = modelId.toLowerCase() + + // 0x — included models + if (id.includes('gpt-4.1')) return 0 + if (id.includes('gpt-4o')) return 0 + if (id.includes('gpt-5-mini') || id.includes('gpt-5 mini')) return 0 + if (id.includes('raptor')) return 0 + if (id.includes('goldeneye')) return 0 + + // 0.25x + if (id.includes('grok')) return 0.25 + + // 0.33x + if (id.includes('claude-haiku')) return 0.33 + if (id.includes('gemini-3-flash') || id.includes('gemini-3.0-flash')) return 0.33 + if (id.includes('gpt-5.1-codex-mini')) return 0.33 + if (id.includes('gpt-5.4-mini') || id.includes('gpt-5.4 mini')) return 0.33 + + // 1x — standard premium + if (id.includes('claude-sonnet')) return 1 + if (id.includes('gemini-2.5-pro')) return 1 + if (id.includes('gemini-3-pro') || id.includes('gemini-3.0-pro')) return 1 + if (id.includes('gemini-3.1-pro')) return 1 + if (id.includes('gpt-5.1')) return 1 + if (id.includes('gpt-5.2')) return 1 + if (id.includes('gpt-5.3')) return 1 + if (id.includes('gpt-5.4')) return 1 + + // 30x — fast opus + if (id.includes('claude-opus') && id.includes('fast')) return 30 + + // 3x — opus + if (id.includes('claude-opus')) return 3 + + return 1 +} + +/** + * Human-readable string describing the premium-request cost for a model. + * + * Examples: `"included (0×)"`, `"1× premium request"`, `"0.33× premium request"` + */ +export function formatCopilotMultiplier(multiplier: number): string { + if (multiplier === 0) return 'included (0×)' + if (Number.isInteger(multiplier)) return `${multiplier}× premium request` + return `${multiplier}× premium request` +} + +/** Known model metadata for Copilot-available models. */ +export const COPILOT_MODELS: readonly CopilotModelInfo[] = [ + { id: 'gpt-4.1', name: 'GPT-4.1', contextWindow: 128_000 }, + { id: 'gpt-4o', name: 'GPT-4o', contextWindow: 128_000 }, + { id: 'gpt-5-mini', name: 'GPT-5 mini', contextWindow: 200_000 }, + { id: 'gpt-5.1', name: 'GPT-5.1', contextWindow: 200_000 }, + { id: 'gpt-5.1-codex', name: 'GPT-5.1-Codex', contextWindow: 200_000 }, + { id: 'gpt-5.1-codex-mini', name: 'GPT-5.1-Codex-Mini', contextWindow: 200_000 }, + { id: 'gpt-5.1-codex-max', name: 'GPT-5.1-Codex-Max', contextWindow: 200_000 }, + { id: 'gpt-5.2', name: 'GPT-5.2', contextWindow: 200_000 }, + { id: 'gpt-5.2-codex', name: 'GPT-5.2-Codex', contextWindow: 200_000 }, + { id: 'gpt-5.3-codex', name: 'GPT-5.3-Codex', contextWindow: 200_000 }, + { id: 'gpt-5.4', name: 'GPT-5.4', contextWindow: 200_000 }, + { id: 'gpt-5.4-mini', name: 'GPT-5.4 mini', contextWindow: 200_000 }, + { id: 'claude-haiku-4.5', name: 'Claude Haiku 4.5', contextWindow: 200_000 }, + { id: 'claude-opus-4.5', name: 'Claude Opus 4.5', contextWindow: 200_000 }, + { id: 'claude-opus-4.6', name: 'Claude Opus 4.6', contextWindow: 200_000 }, + { id: 'claude-opus-4.6-fast', name: 'Claude Opus 4.6 (fast)', contextWindow: 200_000 }, + { id: 'claude-sonnet-4', name: 'Claude Sonnet 4', contextWindow: 200_000 }, + { id: 'claude-sonnet-4.5', name: 'Claude Sonnet 4.5', contextWindow: 200_000 }, + { id: 'claude-sonnet-4.6', name: 'Claude Sonnet 4.6', contextWindow: 200_000 }, + { id: 'gemini-2.5-pro', name: 'Gemini 2.5 Pro', contextWindow: 1_000_000 }, + { id: 'gemini-3-flash', name: 'Gemini 3 Flash', contextWindow: 1_000_000 }, + { id: 'gemini-3-pro', name: 'Gemini 3 Pro', contextWindow: 1_000_000 }, + { id: 'gemini-3.1-pro', name: 'Gemini 3.1 Pro', contextWindow: 1_000_000 }, + { id: 'grok-code-fast-1', name: 'Grok Code Fast 1', contextWindow: 128_000 }, + { id: 'raptor-mini', name: 'Raptor mini', contextWindow: 128_000 }, + { id: 'goldeneye', name: 'Goldeneye', contextWindow: 128_000 }, +] as const diff --git a/src/types.ts b/src/types.ts index 2875a35..146fb41 100644 --- a/src/types.ts +++ b/src/types.ts @@ -186,7 +186,7 @@ export interface ToolDefinition> { export interface AgentConfig { readonly name: string readonly model: string - readonly provider?: 'anthropic' | 'openai' + readonly provider?: 'anthropic' | 'copilot' | 'openai' readonly systemPrompt?: string /** Names of tools (from the tool registry) available to this agent. */ readonly tools?: readonly string[] @@ -285,7 +285,7 @@ export interface OrchestratorEvent { export interface OrchestratorConfig { readonly maxConcurrency?: number readonly defaultModel?: string - readonly defaultProvider?: 'anthropic' | 'openai' + readonly defaultProvider?: 'anthropic' | 'copilot' | 'openai' onProgress?: (event: OrchestratorEvent) => void } From 8371cdb7c05afeef3d77055dde702378414e52bf Mon Sep 17 00:00:00 2001 From: Deathwing Date: Thu, 2 Apr 2026 02:19:06 +0200 Subject: [PATCH 02/38] refactor: address all 7 PR review comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Fix header comment — document correct env var precedence (apiKey → GITHUB_COPILOT_TOKEN → GITHUB_TOKEN → device flow) 2. Use application/x-www-form-urlencoded for device code endpoint 3. Use application/x-www-form-urlencoded for poll endpoint 4. Add mutex (promise-based) on #getSessionToken to prevent concurrent token refreshes and duplicate device flow prompts 5. Add DeviceCodeCallback + CopilotAdapterOptions so callers can control device flow output instead of hardcoded console.log 6. Extract shared OpenAI wire-format helpers into openai-common.ts, imported by both openai.ts and copilot.ts (-142 lines net) 7. Update createAdapter JSDoc to mention copilot env vars --- src/llm/adapter.ts | 8 +- src/llm/copilot.ts | 308 ++++++++++----------------------------- src/llm/openai-common.ts | 255 ++++++++++++++++++++++++++++++++ src/llm/openai.ts | 263 +-------------------------------- 4 files changed, 346 insertions(+), 488 deletions(-) create mode 100644 src/llm/openai-common.ts diff --git a/src/llm/adapter.ts b/src/llm/adapter.ts index 69754e4..f641edd 100644 --- a/src/llm/adapter.ts +++ b/src/llm/adapter.ts @@ -42,8 +42,12 @@ export type SupportedProvider = 'anthropic' | 'copilot' | 'openai' /** * Instantiate the appropriate {@link LLMAdapter} for the given provider. * - * API keys fall back to the standard environment variables - * (`ANTHROPIC_API_KEY` / `OPENAI_API_KEY`) when not supplied explicitly. + * API keys fall back to the standard environment variables when not supplied + * explicitly: + * - `anthropic` → `ANTHROPIC_API_KEY` + * - `openai` → `OPENAI_API_KEY` + * - `copilot` → `GITHUB_COPILOT_TOKEN` / `GITHUB_TOKEN`, or interactive + * OAuth2 device flow if neither is set * * Adapters are imported lazily so that projects using only one provider * are not forced to install the SDK for the other. diff --git a/src/llm/copilot.ts b/src/llm/copilot.ts index 07fdc45..7e829fe 100644 --- a/src/llm/copilot.ts +++ b/src/llm/copilot.ts @@ -3,18 +3,20 @@ * * Uses the OpenAI-compatible Copilot Chat Completions endpoint at * `https://api.githubcopilot.com`. Authentication requires a GitHub token - * (e.g. from `gh auth token`) which is exchanged for a short-lived Copilot - * session token via the internal token endpoint. + * which is exchanged for a short-lived Copilot session token via the + * internal token endpoint. * * API key resolution order: * 1. `apiKey` constructor argument - * 2. `GITHUB_TOKEN` environment variable + * 2. `GITHUB_COPILOT_TOKEN` environment variable + * 3. `GITHUB_TOKEN` environment variable + * 4. Interactive OAuth2 device flow (prompts the user to sign in) * * @example * ```ts * import { CopilotAdapter } from './copilot.js' * - * const adapter = new CopilotAdapter() // uses GITHUB_TOKEN env var + * const adapter = new CopilotAdapter() // uses GITHUB_COPILOT_TOKEN, falling back to GITHUB_TOKEN * const response = await adapter.chat(messages, { * model: 'claude-sonnet-4', * maxTokens: 4096, @@ -24,14 +26,7 @@ import OpenAI from 'openai' import type { - ChatCompletion, - ChatCompletionAssistantMessageParam, ChatCompletionChunk, - ChatCompletionMessageParam, - ChatCompletionMessageToolCall, - ChatCompletionTool, - ChatCompletionToolMessageParam, - ChatCompletionUserMessageParam, } from 'openai/resources/chat/completions/index.js' import type { @@ -47,6 +42,13 @@ import type { ToolUseBlock, } from '../types.js' +import { + toOpenAITool, + fromOpenAICompletion, + normalizeFinishReason, + buildOpenAIMessageList, +} from './openai-common.js' + // --------------------------------------------------------------------------- // Copilot auth — OAuth2 device flow + token exchange // --------------------------------------------------------------------------- @@ -81,22 +83,38 @@ interface PollResponse { error_description?: string } +/** + * Callback invoked when the OAuth2 device flow needs the user to authorize. + * Receives the verification URI and user code. If not provided, defaults to + * printing them to stdout. + */ +export type DeviceCodeCallback = (verificationUri: string, userCode: string) => void + +const defaultDeviceCodeCallback: DeviceCodeCallback = (uri, code) => { + console.log(`\n┌─────────────────────────────────────────────┐`) + console.log(`│ GitHub Copilot — Sign in │`) + console.log(`│ │`) + console.log(`│ Open: ${uri.padEnd(35)}│`) + console.log(`│ Code: ${code.padEnd(35)}│`) + console.log(`└─────────────────────────────────────────────┘\n`) +} + /** * Start the GitHub OAuth2 device code flow with the Copilot client ID. * - * Prints a user code and URL to stdout, then polls until the user completes - * authorization in their browser. Returns a GitHub OAuth token scoped for - * Copilot access. + * Calls `onDeviceCode` with the verification URI and user code, then polls + * until the user completes authorization. Returns a GitHub OAuth token + * scoped for Copilot access. */ -async function deviceCodeLogin(): Promise { +async function deviceCodeLogin(onDeviceCode: DeviceCodeCallback): Promise { // Step 1: Request a device code const codeRes = await fetch(DEVICE_CODE_URL, { method: 'POST', headers: { Accept: 'application/json', - 'Content-Type': 'application/json', + 'Content-Type': 'application/x-www-form-urlencoded', }, - body: JSON.stringify({ client_id: COPILOT_CLIENT_ID, scope: 'copilot' }), + body: new URLSearchParams({ client_id: COPILOT_CLIENT_ID, scope: 'copilot' }), }) if (!codeRes.ok) { @@ -106,13 +124,8 @@ async function deviceCodeLogin(): Promise { const codeData = (await codeRes.json()) as DeviceCodeResponse - // Step 2: Prompt the user - console.log(`\n┌─────────────────────────────────────────────┐`) - console.log(`│ GitHub Copilot — Sign in │`) - console.log(`│ │`) - console.log(`│ Open: ${codeData.verification_uri.padEnd(35)}│`) - console.log(`│ Code: ${codeData.user_code.padEnd(35)}│`) - console.log(`└─────────────────────────────────────────────┘\n`) + // Step 2: Prompt the user via callback + onDeviceCode(codeData.verification_uri, codeData.user_code) // Step 3: Poll for the user to complete auth const interval = (codeData.interval || 5) * 1000 @@ -125,9 +138,9 @@ async function deviceCodeLogin(): Promise { method: 'POST', headers: { Accept: 'application/json', - 'Content-Type': 'application/json', + 'Content-Type': 'application/x-www-form-urlencoded', }, - body: JSON.stringify({ + body: new URLSearchParams({ client_id: COPILOT_CLIENT_ID, device_code: codeData.device_code, grant_type: 'urn:ietf:params:oauth:grant-type:device_code', @@ -182,189 +195,35 @@ async function fetchCopilotToken(githubToken: string): Promise, - }, - } -} - -function hasToolResults(msg: LLMMessage): boolean { - return msg.content.some((b) => b.type === 'tool_result') -} - -function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageParam[] { - const result: ChatCompletionMessageParam[] = [] - - for (const msg of messages) { - if (msg.role === 'assistant') { - result.push(toOpenAIAssistantMessage(msg)) - } else { - if (!hasToolResults(msg)) { - result.push(toOpenAIUserMessage(msg)) - } else { - const nonToolBlocks = msg.content.filter((b) => b.type !== 'tool_result') - if (nonToolBlocks.length > 0) { - result.push(toOpenAIUserMessage({ role: 'user', content: nonToolBlocks })) - } - for (const block of msg.content) { - if (block.type === 'tool_result') { - const toolMsg: ChatCompletionToolMessageParam = { - role: 'tool', - tool_call_id: block.tool_use_id, - content: block.content, - } - result.push(toolMsg) - } - } - } - } - } - - return result -} - -function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageParam { - if (msg.content.length === 1 && msg.content[0]?.type === 'text') { - return { role: 'user', content: msg.content[0].text } - } - - type ContentPart = OpenAI.Chat.ChatCompletionContentPartText | OpenAI.Chat.ChatCompletionContentPartImage - const parts: ContentPart[] = [] - - for (const block of msg.content) { - if (block.type === 'text') { - parts.push({ type: 'text', text: block.text }) - } else if (block.type === 'image') { - parts.push({ - type: 'image_url', - image_url: { - url: `data:${block.source.media_type};base64,${block.source.data}`, - }, - }) - } - } - - return { role: 'user', content: parts } -} - -function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessageParam { - const toolCalls: ChatCompletionMessageToolCall[] = [] - const textParts: string[] = [] - - for (const block of msg.content) { - if (block.type === 'tool_use') { - toolCalls.push({ - id: block.id, - type: 'function', - function: { - name: block.name, - arguments: JSON.stringify(block.input), - }, - }) - } else if (block.type === 'text') { - textParts.push(block.text) - } - } - - const assistantMsg: ChatCompletionAssistantMessageParam = { - role: 'assistant', - content: textParts.length > 0 ? textParts.join('') : null, - } - - if (toolCalls.length > 0) { - assistantMsg.tool_calls = toolCalls - } - - return assistantMsg -} - -// --------------------------------------------------------------------------- -// Internal helpers — OpenAI → framework -// --------------------------------------------------------------------------- - -function fromOpenAICompletion(completion: ChatCompletion): LLMResponse { - const choice = completion.choices[0] - if (choice === undefined) { - throw new Error('Copilot returned a completion with no choices') - } - - const content: ContentBlock[] = [] - const message = choice.message - - if (message.content !== null && message.content !== undefined) { - const textBlock: TextBlock = { type: 'text', text: message.content } - content.push(textBlock) - } - - for (const toolCall of message.tool_calls ?? []) { - let parsedInput: Record = {} - try { - const parsed: unknown = JSON.parse(toolCall.function.arguments) - if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) { - parsedInput = parsed as Record - } - } catch { - // Malformed arguments — surface as empty object. - } - - const toolUseBlock: ToolUseBlock = { - type: 'tool_use', - id: toolCall.id, - name: toolCall.function.name, - input: parsedInput, - } - content.push(toolUseBlock) - } - - const stopReason = normalizeFinishReason(choice.finish_reason ?? 'stop') - - return { - id: completion.id, - content, - model: completion.model, - stop_reason: stopReason, - usage: { - input_tokens: completion.usage?.prompt_tokens ?? 0, - output_tokens: completion.usage?.completion_tokens ?? 0, - }, - } -} - -function normalizeFinishReason(reason: string): string { - switch (reason) { - case 'stop': return 'end_turn' - case 'tool_calls': return 'tool_use' - case 'length': return 'max_tokens' - case 'content_filter': return 'content_filter' - default: return reason - } -} - // --------------------------------------------------------------------------- // Adapter implementation // --------------------------------------------------------------------------- +/** Options for the {@link CopilotAdapter} constructor. */ +export interface CopilotAdapterOptions { + /** GitHub OAuth token already scoped for Copilot. Falls back to env vars. */ + apiKey?: string + /** + * Callback invoked when the OAuth2 device flow needs user action. + * Defaults to printing the verification URI and user code to stdout. + */ + onDeviceCode?: DeviceCodeCallback +} + /** * LLM adapter backed by the GitHub Copilot Chat Completions API. * * Authentication options (tried in order): * 1. `apiKey` constructor arg — a GitHub OAuth token already scoped for Copilot - * 2. `GITHUB_COPILOT_TOKEN` env var — same as above - * 3. Interactive OAuth2 device flow — prompts the user to sign in via browser + * 2. `GITHUB_COPILOT_TOKEN` env var + * 3. `GITHUB_TOKEN` env var + * 4. Interactive OAuth2 device flow * * The GitHub token is exchanged for a short-lived Copilot session token, which * is cached and auto-refreshed. * * Thread-safe — a single instance may be shared across concurrent agent runs. + * Concurrent token refreshes are serialised via an internal mutex. */ export class CopilotAdapter implements LLMAdapter { readonly name = 'copilot' @@ -372,17 +231,25 @@ export class CopilotAdapter implements LLMAdapter { #githubToken: string | null #cachedToken: string | null = null #tokenExpiresAt = 0 + #refreshPromise: Promise | null = null + readonly #onDeviceCode: DeviceCodeCallback - constructor(apiKey?: string) { - this.#githubToken = apiKey + constructor(apiKeyOrOptions?: string | CopilotAdapterOptions) { + const opts = typeof apiKeyOrOptions === 'string' + ? { apiKey: apiKeyOrOptions } + : apiKeyOrOptions ?? {} + + this.#githubToken = opts.apiKey ?? process.env['GITHUB_COPILOT_TOKEN'] ?? process.env['GITHUB_TOKEN'] ?? null + this.#onDeviceCode = opts.onDeviceCode ?? defaultDeviceCodeCallback } /** * Return a valid Copilot session token, refreshing if necessary. * If no GitHub token is available, triggers the interactive device flow. + * Concurrent calls share a single in-flight refresh to avoid races. */ async #getSessionToken(): Promise { const now = Math.floor(Date.now() / 1000) @@ -390,9 +257,22 @@ export class CopilotAdapter implements LLMAdapter { return this.#cachedToken } - // If we don't have a GitHub token yet, do the device flow + // If another call is already refreshing, piggyback on that promise + if (this.#refreshPromise) { + return this.#refreshPromise + } + + this.#refreshPromise = this.#doRefresh() + try { + return await this.#refreshPromise + } finally { + this.#refreshPromise = null + } + } + + async #doRefresh(): Promise { if (!this.#githubToken) { - this.#githubToken = await deviceCodeLogin() + this.#githubToken = await deviceCodeLogin(this.#onDeviceCode) } const resp = await fetchCopilotToken(this.#githubToken) @@ -568,36 +448,6 @@ export class CopilotAdapter implements LLMAdapter { } } -// --------------------------------------------------------------------------- -// Private utility -// --------------------------------------------------------------------------- - -function buildOpenAIMessageList( - messages: LLMMessage[], - systemPrompt: string | undefined, -): ChatCompletionMessageParam[] { - const result: ChatCompletionMessageParam[] = [] - - if (systemPrompt !== undefined && systemPrompt.length > 0) { - result.push({ role: 'system', content: systemPrompt }) - } - - result.push(...toOpenAIMessages(messages)) - return result -} - -// Re-export types that consumers of this module commonly need alongside the adapter. -export type { - ContentBlock, - LLMAdapter, - LLMChatOptions, - LLMMessage, - LLMResponse, - LLMStreamOptions, - LLMToolDef, - StreamEvent, -} - // --------------------------------------------------------------------------- // Premium request multipliers // --------------------------------------------------------------------------- diff --git a/src/llm/openai-common.ts b/src/llm/openai-common.ts new file mode 100644 index 0000000..46fc67a --- /dev/null +++ b/src/llm/openai-common.ts @@ -0,0 +1,255 @@ +/** + * @fileoverview Shared OpenAI wire-format conversion helpers. + * + * Both the OpenAI and Copilot adapters use the OpenAI Chat Completions API + * format. This module contains the common conversion logic so it isn't + * duplicated across adapters. + */ + +import OpenAI from 'openai' +import type { + ChatCompletion, + ChatCompletionAssistantMessageParam, + ChatCompletionMessageParam, + ChatCompletionMessageToolCall, + ChatCompletionTool, + ChatCompletionToolMessageParam, + ChatCompletionUserMessageParam, +} from 'openai/resources/chat/completions/index.js' + +import type { + ContentBlock, + LLMMessage, + LLMResponse, + LLMToolDef, + TextBlock, + ToolUseBlock, +} from '../types.js' + +// --------------------------------------------------------------------------- +// Framework → OpenAI +// --------------------------------------------------------------------------- + +/** + * Convert a framework {@link LLMToolDef} to an OpenAI {@link ChatCompletionTool}. + */ +export function toOpenAITool(tool: LLMToolDef): ChatCompletionTool { + return { + type: 'function', + function: { + name: tool.name, + description: tool.description, + parameters: tool.inputSchema as Record, + }, + } +} + +/** + * Determine whether a framework message contains any `tool_result` content + * blocks, which must be serialised as separate OpenAI `tool`-role messages. + */ +function hasToolResults(msg: LLMMessage): boolean { + return msg.content.some((b) => b.type === 'tool_result') +} + +/** + * Convert framework {@link LLMMessage}s into OpenAI + * {@link ChatCompletionMessageParam} entries. + * + * `tool_result` blocks are expanded into top-level `tool`-role messages + * because OpenAI uses a dedicated role for tool results rather than embedding + * them inside user-content arrays. + */ +export function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageParam[] { + const result: ChatCompletionMessageParam[] = [] + + for (const msg of messages) { + if (msg.role === 'assistant') { + result.push(toOpenAIAssistantMessage(msg)) + } else { + // user role + if (!hasToolResults(msg)) { + result.push(toOpenAIUserMessage(msg)) + } else { + const nonToolBlocks = msg.content.filter((b) => b.type !== 'tool_result') + if (nonToolBlocks.length > 0) { + result.push(toOpenAIUserMessage({ role: 'user', content: nonToolBlocks })) + } + + for (const block of msg.content) { + if (block.type === 'tool_result') { + const toolMsg: ChatCompletionToolMessageParam = { + role: 'tool', + tool_call_id: block.tool_use_id, + content: block.content, + } + result.push(toolMsg) + } + } + } + } + } + + return result +} + +/** + * Convert a `user`-role framework message into an OpenAI user message. + * Image blocks are converted to the OpenAI image_url content part format. + */ +function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageParam { + if (msg.content.length === 1 && msg.content[0]?.type === 'text') { + return { role: 'user', content: msg.content[0].text } + } + + type ContentPart = OpenAI.Chat.ChatCompletionContentPartText | OpenAI.Chat.ChatCompletionContentPartImage + const parts: ContentPart[] = [] + + for (const block of msg.content) { + if (block.type === 'text') { + parts.push({ type: 'text', text: block.text }) + } else if (block.type === 'image') { + parts.push({ + type: 'image_url', + image_url: { + url: `data:${block.source.media_type};base64,${block.source.data}`, + }, + }) + } + // tool_result blocks are handled by the caller (toOpenAIMessages); skip here. + } + + return { role: 'user', content: parts } +} + +/** + * Convert an `assistant`-role framework message into an OpenAI assistant message. + * `tool_use` blocks become `tool_calls`; `text` blocks become message content. + */ +function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessageParam { + const toolCalls: ChatCompletionMessageToolCall[] = [] + const textParts: string[] = [] + + for (const block of msg.content) { + if (block.type === 'tool_use') { + toolCalls.push({ + id: block.id, + type: 'function', + function: { + name: block.name, + arguments: JSON.stringify(block.input), + }, + }) + } else if (block.type === 'text') { + textParts.push(block.text) + } + } + + const assistantMsg: ChatCompletionAssistantMessageParam = { + role: 'assistant', + content: textParts.length > 0 ? textParts.join('') : null, + } + + if (toolCalls.length > 0) { + assistantMsg.tool_calls = toolCalls + } + + return assistantMsg +} + +// --------------------------------------------------------------------------- +// OpenAI → Framework +// --------------------------------------------------------------------------- + +/** + * Convert an OpenAI {@link ChatCompletion} into a framework {@link LLMResponse}. + * + * Takes only the first choice (index 0), consistent with how the framework + * is designed for single-output agents. + */ +export function fromOpenAICompletion(completion: ChatCompletion): LLMResponse { + const choice = completion.choices[0] + if (choice === undefined) { + throw new Error('OpenAI returned a completion with no choices') + } + + const content: ContentBlock[] = [] + const message = choice.message + + if (message.content !== null && message.content !== undefined) { + const textBlock: TextBlock = { type: 'text', text: message.content } + content.push(textBlock) + } + + for (const toolCall of message.tool_calls ?? []) { + let parsedInput: Record = {} + try { + const parsed: unknown = JSON.parse(toolCall.function.arguments) + if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) { + parsedInput = parsed as Record + } + } catch { + // Malformed arguments from the model — surface as empty object. + } + + const toolUseBlock: ToolUseBlock = { + type: 'tool_use', + id: toolCall.id, + name: toolCall.function.name, + input: parsedInput, + } + content.push(toolUseBlock) + } + + const stopReason = normalizeFinishReason(choice.finish_reason ?? 'stop') + + return { + id: completion.id, + content, + model: completion.model, + stop_reason: stopReason, + usage: { + input_tokens: completion.usage?.prompt_tokens ?? 0, + output_tokens: completion.usage?.completion_tokens ?? 0, + }, + } +} + +/** + * Normalize an OpenAI `finish_reason` string to the framework's canonical + * stop-reason vocabulary. + * + * Mapping: + * - `'stop'` → `'end_turn'` + * - `'tool_calls'` → `'tool_use'` + * - `'length'` → `'max_tokens'` + * - `'content_filter'` → `'content_filter'` + * - anything else → passed through unchanged + */ +export function normalizeFinishReason(reason: string): string { + switch (reason) { + case 'stop': return 'end_turn' + case 'tool_calls': return 'tool_use' + case 'length': return 'max_tokens' + case 'content_filter': return 'content_filter' + default: return reason + } +} + +/** + * Prepend a system message when `systemPrompt` is provided, then append the + * converted conversation messages. + */ +export function buildOpenAIMessageList( + messages: LLMMessage[], + systemPrompt: string | undefined, +): ChatCompletionMessageParam[] { + const result: ChatCompletionMessageParam[] = [] + + if (systemPrompt !== undefined && systemPrompt.length > 0) { + result.push({ role: 'system', content: systemPrompt }) + } + + result.push(...toOpenAIMessages(messages)) + return result +} diff --git a/src/llm/openai.ts b/src/llm/openai.ts index b99ddfd..a3c2ab9 100644 --- a/src/llm/openai.ts +++ b/src/llm/openai.ts @@ -32,14 +32,7 @@ import OpenAI from 'openai' import type { - ChatCompletion, - ChatCompletionAssistantMessageParam, ChatCompletionChunk, - ChatCompletionMessageParam, - ChatCompletionMessageToolCall, - ChatCompletionTool, - ChatCompletionToolMessageParam, - ChatCompletionUserMessageParam, } from 'openai/resources/chat/completions/index.js' import type { @@ -55,231 +48,12 @@ import type { ToolUseBlock, } from '../types.js' -// --------------------------------------------------------------------------- -// Internal helpers — framework → OpenAI -// --------------------------------------------------------------------------- - -/** - * Convert a framework {@link LLMToolDef} to an OpenAI {@link ChatCompletionTool}. - * - * OpenAI wraps the function definition inside a `function` key and a `type` - * discriminant. The `inputSchema` is already a JSON Schema object. - */ -function toOpenAITool(tool: LLMToolDef): ChatCompletionTool { - return { - type: 'function', - function: { - name: tool.name, - description: tool.description, - parameters: tool.inputSchema as Record, - }, - } -} - -/** - * Determine whether a framework message contains any `tool_result` content - * blocks, which must be serialised as separate OpenAI `tool`-role messages. - */ -function hasToolResults(msg: LLMMessage): boolean { - return msg.content.some((b) => b.type === 'tool_result') -} - -/** - * Convert a single framework {@link LLMMessage} into one or more OpenAI - * {@link ChatCompletionMessageParam} entries. - * - * The expansion is necessary because OpenAI represents tool results as - * top-level messages with role `tool`, whereas in our model they are content - * blocks inside a `user` message. - * - * Expansion rules: - * - A `user` message containing only text/image blocks → single user message - * - A `user` message containing `tool_result` blocks → one `tool` message per - * tool_result block; any remaining text/image blocks are folded into an - * additional user message prepended to the group - * - An `assistant` message → single assistant message with optional tool_calls - */ -function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageParam[] { - const result: ChatCompletionMessageParam[] = [] - - for (const msg of messages) { - if (msg.role === 'assistant') { - result.push(toOpenAIAssistantMessage(msg)) - } else { - // user role - if (!hasToolResults(msg)) { - result.push(toOpenAIUserMessage(msg)) - } else { - // Split: text/image blocks become a user message (if any exist), then - // each tool_result block becomes an independent tool message. - const nonToolBlocks = msg.content.filter((b) => b.type !== 'tool_result') - if (nonToolBlocks.length > 0) { - result.push(toOpenAIUserMessage({ role: 'user', content: nonToolBlocks })) - } - - for (const block of msg.content) { - if (block.type === 'tool_result') { - const toolMsg: ChatCompletionToolMessageParam = { - role: 'tool', - tool_call_id: block.tool_use_id, - content: block.content, - } - result.push(toolMsg) - } - } - } - } - } - - return result -} - -/** - * Convert a `user`-role framework message into an OpenAI user message. - * Image blocks are converted to the OpenAI image_url content part format. - */ -function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageParam { - // If the entire content is a single text block, use the compact string form - // to keep the request payload smaller. - if (msg.content.length === 1 && msg.content[0]?.type === 'text') { - return { role: 'user', content: msg.content[0].text } - } - - type ContentPart = OpenAI.Chat.ChatCompletionContentPartText | OpenAI.Chat.ChatCompletionContentPartImage - const parts: ContentPart[] = [] - - for (const block of msg.content) { - if (block.type === 'text') { - parts.push({ type: 'text', text: block.text }) - } else if (block.type === 'image') { - parts.push({ - type: 'image_url', - image_url: { - url: `data:${block.source.media_type};base64,${block.source.data}`, - }, - }) - } - // tool_result blocks are handled by the caller (toOpenAIMessages); skip here. - } - - return { role: 'user', content: parts } -} - -/** - * Convert an `assistant`-role framework message into an OpenAI assistant message. - * - * Any `tool_use` blocks become `tool_calls`; `text` blocks become the message content. - */ -function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessageParam { - const toolCalls: ChatCompletionMessageToolCall[] = [] - const textParts: string[] = [] - - for (const block of msg.content) { - if (block.type === 'tool_use') { - toolCalls.push({ - id: block.id, - type: 'function', - function: { - name: block.name, - arguments: JSON.stringify(block.input), - }, - }) - } else if (block.type === 'text') { - textParts.push(block.text) - } - } - - const assistantMsg: ChatCompletionAssistantMessageParam = { - role: 'assistant', - content: textParts.length > 0 ? textParts.join('') : null, - } - - if (toolCalls.length > 0) { - assistantMsg.tool_calls = toolCalls - } - - return assistantMsg -} - -// --------------------------------------------------------------------------- -// Internal helpers — OpenAI → framework -// --------------------------------------------------------------------------- - -/** - * Convert an OpenAI {@link ChatCompletion} into a framework {@link LLMResponse}. - * - * We take only the first choice (index 0), consistent with how the framework - * is designed for single-output agents. - */ -function fromOpenAICompletion(completion: ChatCompletion): LLMResponse { - const choice = completion.choices[0] - if (choice === undefined) { - throw new Error('OpenAI returned a completion with no choices') - } - - const content: ContentBlock[] = [] - const message = choice.message - - if (message.content !== null && message.content !== undefined) { - const textBlock: TextBlock = { type: 'text', text: message.content } - content.push(textBlock) - } - - for (const toolCall of message.tool_calls ?? []) { - let parsedInput: Record = {} - try { - const parsed: unknown = JSON.parse(toolCall.function.arguments) - if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) { - parsedInput = parsed as Record - } - } catch { - // Malformed arguments from the model — surface as empty object. - } - - const toolUseBlock: ToolUseBlock = { - type: 'tool_use', - id: toolCall.id, - name: toolCall.function.name, - input: parsedInput, - } - content.push(toolUseBlock) - } - - const stopReason = normalizeFinishReason(choice.finish_reason ?? 'stop') - - return { - id: completion.id, - content, - model: completion.model, - stop_reason: stopReason, - usage: { - input_tokens: completion.usage?.prompt_tokens ?? 0, - output_tokens: completion.usage?.completion_tokens ?? 0, - }, - } -} - -/** - * Normalize an OpenAI `finish_reason` string to the framework's canonical - * stop-reason vocabulary so consumers never need to branch on provider-specific - * strings. - * - * Mapping: - * - `'stop'` → `'end_turn'` - * - `'tool_calls'` → `'tool_use'` - * - `'length'` → `'max_tokens'` - * - `'content_filter'` → `'content_filter'` - * - anything else → passed through unchanged - */ -function normalizeFinishReason(reason: string): string { - switch (reason) { - case 'stop': return 'end_turn' - case 'tool_calls': return 'tool_use' - case 'length': return 'max_tokens' - case 'content_filter': return 'content_filter' - default: return reason - } -} +import { + toOpenAITool, + fromOpenAICompletion, + normalizeFinishReason, + buildOpenAIMessageList, +} from './openai-common.js' // --------------------------------------------------------------------------- // Adapter implementation @@ -484,31 +258,6 @@ export class OpenAIAdapter implements LLMAdapter { } } -// --------------------------------------------------------------------------- -// Private utility -// --------------------------------------------------------------------------- - -/** - * Prepend a system message when `systemPrompt` is provided, then append the - * converted conversation messages. - * - * OpenAI represents system instructions as a message with `role: 'system'` - * at the top of the array, not as a separate API parameter. - */ -function buildOpenAIMessageList( - messages: LLMMessage[], - systemPrompt: string | undefined, -): ChatCompletionMessageParam[] { - const result: ChatCompletionMessageParam[] = [] - - if (systemPrompt !== undefined && systemPrompt.length > 0) { - result.push({ role: 'system', content: systemPrompt }) - } - - result.push(...toOpenAIMessages(messages)) - return result -} - // Re-export types that consumers of this module commonly need alongside the adapter. export type { ContentBlock, From 7acd450707a0642417bd9fea3f1bec7b9c2f360c Mon Sep 17 00:00:00 2001 From: JackChen Date: Thu, 2 Apr 2026 12:20:21 +0800 Subject: [PATCH 03/38] Update READMEs: add Copilot adapter, contributors section --- README.md | 13 ++++++++++--- README_zh.md | 13 ++++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 31d3509..fdcea40 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Build AI agent teams that work together. One agent plans, another implements, a npm install @jackchen_me/open-multi-agent ``` -Set `ANTHROPIC_API_KEY` (and optionally `OPENAI_API_KEY`) in your environment. +Set `ANTHROPIC_API_KEY` (and optionally `OPENAI_API_KEY` or `GITHUB_TOKEN` for Copilot) in your environment. ```typescript import { OpenMultiAgent } from '@jackchen_me/open-multi-agent' @@ -160,7 +160,7 @@ const result = await agent.run('Find the three most recent TypeScript releases.'
-Multi-Model Teams — mix Claude and GPT in one workflow +Multi-Model Teams — mix Claude, GPT, and Copilot in one workflow ```typescript const claudeAgent: AgentConfig = { @@ -246,6 +246,7 @@ for await (const event of agent.stream('Explain monads in two sentences.')) { │ - prompt() │───►│ LLMAdapter │ │ - stream() │ │ - AnthropicAdapter │ └────────┬──────────┘ │ - OpenAIAdapter │ + │ │ - CopilotAdapter │ │ └──────────────────────┘ ┌────────▼──────────┐ │ AgentRunner │ ┌──────────────────────┐ @@ -269,7 +270,7 @@ for await (const event of agent.stream('Explain monads in two sentences.')) { Issues, feature requests, and PRs are welcome. Some areas where contributions would be especially valuable: -- **LLM Adapters** — Ollama, llama.cpp, vLLM, Gemini. The `LLMAdapter` interface requires just two methods: `chat()` and `stream()`. +- **LLM Adapters** — Copilot is now supported out of the box. Additional adapters for Ollama, llama.cpp, vLLM, and Gemini are welcome. The `LLMAdapter` interface requires just two methods: `chat()` and `stream()`. - **Examples** — Real-world workflows and use cases. - **Documentation** — Guides, tutorials, and API docs. @@ -277,6 +278,12 @@ Issues, feature requests, and PRs are welcome. Some areas where contributions wo [![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260402)](https://star-history.com/#JackChen-me/open-multi-agent&Date) +## Contributors + + + + + ## License MIT diff --git a/README_zh.md b/README_zh.md index e9a3f00..f78016a 100644 --- a/README_zh.md +++ b/README_zh.md @@ -21,7 +21,7 @@ npm install @jackchen_me/open-multi-agent ``` -在环境变量中设置 `ANTHROPIC_API_KEY`(以及可选的 `OPENAI_API_KEY`)。 +在环境变量中设置 `ANTHROPIC_API_KEY`(以及可选的 `OPENAI_API_KEY` 或用于 Copilot 的 `GITHUB_TOKEN`)。 ```typescript import { OpenMultiAgent } from '@jackchen_me/open-multi-agent' @@ -160,7 +160,7 @@ const result = await agent.run('Find the three most recent TypeScript releases.'
-多模型团队 — 在一个工作流中混合使用 Claude 和 GPT +多模型团队 — 在一个工作流中混合使用 Claude、GPT 和 Copilot ```typescript const claudeAgent: AgentConfig = { @@ -246,6 +246,7 @@ for await (const event of agent.stream('Explain monads in two sentences.')) { │ - prompt() │───►│ LLMAdapter │ │ - stream() │ │ - AnthropicAdapter │ └────────┬──────────┘ │ - OpenAIAdapter │ + │ │ - CopilotAdapter │ │ └──────────────────────┘ ┌────────▼──────────┐ │ AgentRunner │ ┌──────────────────────┐ @@ -269,7 +270,7 @@ for await (const event of agent.stream('Explain monads in two sentences.')) { 欢迎提 Issue、功能需求和 PR。以下方向的贡献尤其有价值: -- **LLM 适配器** — Ollama、llama.cpp、vLLM、Gemini。`LLMAdapter` 接口只需实现两个方法:`chat()` 和 `stream()`。 +- **LLM 适配器** — Copilot 已原生支持。欢迎继续贡献 Ollama、llama.cpp、vLLM、Gemini 等适配器。`LLMAdapter` 接口只需实现两个方法:`chat()` 和 `stream()`。 - **示例** — 真实场景的工作流和用例。 - **文档** — 指南、教程和 API 文档。 @@ -277,6 +278,12 @@ for await (const event of agent.stream('Explain monads in two sentences.')) { [![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260402)](https://star-history.com/#JackChen-me/open-multi-agent&Date) +## 贡献者 + + + + + ## 许可证 MIT From 62d6fa9e26223f325dd13170e1559586f390e02f Mon Sep 17 00:00:00 2001 From: JackChen Date: Thu, 2 Apr 2026 19:33:10 +0800 Subject: [PATCH 04/38] feat: add baseURL and apiKey support for OpenAI-compatible APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable connecting to any OpenAI-compatible API (Ollama, vLLM, LM Studio, etc.) by adding baseURL and apiKey fields to AgentConfig and OrchestratorConfig, threaded through to adapter constructors. - OpenAIAdapter and AnthropicAdapter accept optional baseURL - createAdapter() forwards baseURL to both adapters, warns if used with copilot - All execution paths (runAgent, runTeam coordinator, buildPool) merge defaults - Fully backward compatible — omitting new fields preserves existing behavior --- src/agent/agent.ts | 2 +- src/llm/adapter.ts | 9 +++++++-- src/llm/anthropic.ts | 3 ++- src/llm/openai.ts | 3 ++- src/orchestrator/orchestrator.ts | 18 +++++++++++++++--- src/types.ts | 10 ++++++++++ 6 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/agent/agent.ts b/src/agent/agent.ts index 1dc530d..4ef392e 100644 --- a/src/agent/agent.ts +++ b/src/agent/agent.ts @@ -109,7 +109,7 @@ export class Agent { } const provider = this.config.provider ?? 'anthropic' - const adapter = await createAdapter(provider) + const adapter = await createAdapter(provider, this.config.apiKey, this.config.baseURL) const runnerOptions: RunnerOptions = { model: this.config.model, diff --git a/src/llm/adapter.ts b/src/llm/adapter.ts index f641edd..cbe5b4f 100644 --- a/src/llm/adapter.ts +++ b/src/llm/adapter.ts @@ -54,24 +54,29 @@ export type SupportedProvider = 'anthropic' | 'copilot' | 'openai' * * @param provider - Which LLM provider to target. * @param apiKey - Optional API key override; falls back to env var. + * @param baseURL - Optional base URL for OpenAI-compatible APIs (Ollama, vLLM, etc.). * @throws {Error} When the provider string is not recognised. */ export async function createAdapter( provider: SupportedProvider, apiKey?: string, + baseURL?: string, ): Promise { switch (provider) { case 'anthropic': { const { AnthropicAdapter } = await import('./anthropic.js') - return new AnthropicAdapter(apiKey) + return new AnthropicAdapter(apiKey, baseURL) } case 'copilot': { + if (baseURL) { + console.warn('[open-multi-agent] baseURL is not supported for the copilot provider and will be ignored.') + } const { CopilotAdapter } = await import('./copilot.js') return new CopilotAdapter(apiKey) } case 'openai': { const { OpenAIAdapter } = await import('./openai.js') - return new OpenAIAdapter(apiKey) + return new OpenAIAdapter(apiKey, baseURL) } default: { // The `never` cast here makes TypeScript enforce exhaustiveness. diff --git a/src/llm/anthropic.ts b/src/llm/anthropic.ts index 6b91fd4..fd912d5 100644 --- a/src/llm/anthropic.ts +++ b/src/llm/anthropic.ts @@ -189,9 +189,10 @@ export class AnthropicAdapter implements LLMAdapter { readonly #client: Anthropic - constructor(apiKey?: string) { + constructor(apiKey?: string, baseURL?: string) { this.#client = new Anthropic({ apiKey: apiKey ?? process.env['ANTHROPIC_API_KEY'], + baseURL, }) } diff --git a/src/llm/openai.ts b/src/llm/openai.ts index a3c2ab9..568f94e 100644 --- a/src/llm/openai.ts +++ b/src/llm/openai.ts @@ -69,9 +69,10 @@ export class OpenAIAdapter implements LLMAdapter { readonly #client: OpenAI - constructor(apiKey?: string) { + constructor(apiKey?: string, baseURL?: string) { this.#client = new OpenAI({ apiKey: apiKey ?? process.env['OPENAI_API_KEY'], + baseURL, }) } diff --git a/src/orchestrator/orchestrator.ts b/src/orchestrator/orchestrator.ts index 0332969..1da8fb5 100644 --- a/src/orchestrator/orchestrator.ts +++ b/src/orchestrator/orchestrator.ts @@ -341,8 +341,8 @@ async function buildTaskPrompt(task: Task, team: Team): Promise { */ export class OpenMultiAgent { private readonly config: Required< - Omit - > & Pick + Omit + > & Pick private readonly teams: Map = new Map() private completedTaskCount = 0 @@ -360,6 +360,8 @@ export class OpenMultiAgent { maxConcurrency: config.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY, defaultModel: config.defaultModel ?? DEFAULT_MODEL, defaultProvider: config.defaultProvider ?? 'anthropic', + defaultBaseURL: config.defaultBaseURL, + defaultApiKey: config.defaultApiKey, onProgress: config.onProgress, } } @@ -405,7 +407,13 @@ export class OpenMultiAgent { * @param prompt - The user prompt to send. */ async runAgent(config: AgentConfig, prompt: string): Promise { - const agent = buildAgent(config) + const effective: AgentConfig = { + ...config, + provider: config.provider ?? this.config.defaultProvider, + baseURL: config.baseURL ?? this.config.defaultBaseURL, + apiKey: config.apiKey ?? this.config.defaultApiKey, + } + const agent = buildAgent(effective) this.config.onProgress?.({ type: 'agent_start', agent: config.name, @@ -462,6 +470,8 @@ export class OpenMultiAgent { name: 'coordinator', model: this.config.defaultModel, provider: this.config.defaultProvider, + baseURL: this.config.defaultBaseURL, + apiKey: this.config.defaultApiKey, systemPrompt: this.buildCoordinatorSystemPrompt(agentConfigs), maxTurns: 3, } @@ -792,6 +802,8 @@ export class OpenMultiAgent { ...config, model: config.model, provider: config.provider ?? this.config.defaultProvider, + baseURL: config.baseURL ?? this.config.defaultBaseURL, + apiKey: config.apiKey ?? this.config.defaultApiKey, } pool.add(buildAgent(effective)) } diff --git a/src/types.ts b/src/types.ts index 146fb41..bd44065 100644 --- a/src/types.ts +++ b/src/types.ts @@ -187,6 +187,14 @@ export interface AgentConfig { readonly name: string readonly model: string readonly provider?: 'anthropic' | 'copilot' | 'openai' + /** + * Custom base URL for OpenAI-compatible APIs (Ollama, vLLM, LM Studio, etc.). + * Note: local servers that don't require auth still need `apiKey` set to a + * non-empty placeholder (e.g. `'ollama'`) because the OpenAI SDK validates it. + */ + readonly baseURL?: string + /** API key override; falls back to the provider's standard env var. */ + readonly apiKey?: string readonly systemPrompt?: string /** Names of tools (from the tool registry) available to this agent. */ readonly tools?: readonly string[] @@ -286,6 +294,8 @@ export interface OrchestratorConfig { readonly maxConcurrency?: number readonly defaultModel?: string readonly defaultProvider?: 'anthropic' | 'copilot' | 'openai' + readonly defaultBaseURL?: string + readonly defaultApiKey?: string onProgress?: (event: OrchestratorEvent) => void } From a32440728fab4d60515e4e441684d4c68260dd85 Mon Sep 17 00:00:00 2001 From: JackChen Date: Thu, 2 Apr 2026 19:42:59 +0800 Subject: [PATCH 05/38] docs: update READMEs for baseURL support and local model examples - Add Ollama/local model agent example in multi-model teams section - Update "Model Agnostic" description to mention local models and baseURL - Update contributing section to reflect built-in OpenAI-compatible support - Add author block with Xiaohongshu link in Chinese README --- README.md | 19 +++++++++++++++---- README_zh.md | 23 +++++++++++++++++++---- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index fdcea40..fc9ce2e 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Build AI agent teams that work together. One agent plans, another implements, a - **Multi-Agent Teams** — Define agents with different roles, tools, and even different models. They collaborate through a message bus and shared memory. - **Task DAG Scheduling** — Tasks have dependencies. The framework resolves them topologically — dependent tasks wait, independent tasks run in parallel. -- **Model Agnostic** — Claude and GPT in the same team. Swap models per agent. Bring your own adapter for any LLM. +- **Model Agnostic** — Claude, GPT, and local models (Ollama, vLLM, LM Studio) in the same team. Swap models per agent via `baseURL`. - **In-Process Execution** — No subprocess overhead. Everything runs in one Node.js process. Deploy to serverless, Docker, CI/CD. ## Quick Start @@ -160,7 +160,7 @@ const result = await agent.run('Find the three most recent TypeScript releases.'
-Multi-Model Teams — mix Claude, GPT, and Copilot in one workflow +Multi-Model Teams — mix Claude, GPT, and local models in one workflow ```typescript const claudeAgent: AgentConfig = { @@ -179,9 +179,20 @@ const gptAgent: AgentConfig = { tools: ['bash', 'file_read', 'file_write'], } +// Any OpenAI-compatible API — Ollama, vLLM, LM Studio, etc. +const localAgent: AgentConfig = { + name: 'reviewer', + model: 'llama3.1', + provider: 'openai', + baseURL: 'http://localhost:11434/v1', + apiKey: 'ollama', + systemPrompt: 'You review code for correctness and clarity.', + tools: ['file_read', 'grep'], +} + const team = orchestrator.createTeam('mixed-team', { name: 'mixed-team', - agents: [claudeAgent, gptAgent], + agents: [claudeAgent, gptAgent, localAgent], sharedMemory: true, }) @@ -270,7 +281,7 @@ for await (const event of agent.stream('Explain monads in two sentences.')) { Issues, feature requests, and PRs are welcome. Some areas where contributions would be especially valuable: -- **LLM Adapters** — Copilot is now supported out of the box. Additional adapters for Ollama, llama.cpp, vLLM, and Gemini are welcome. The `LLMAdapter` interface requires just two methods: `chat()` and `stream()`. +- **LLM Adapters** — Anthropic, OpenAI, and Copilot are supported out of the box. Any OpenAI-compatible API (Ollama, vLLM, LM Studio, etc.) works via `baseURL`. Additional adapters for Gemini and other providers are welcome. The `LLMAdapter` interface requires just two methods: `chat()` and `stream()`. - **Examples** — Real-world workflows and use cases. - **Documentation** — Guides, tutorials, and API docs. diff --git a/README_zh.md b/README_zh.md index f78016a..7109564 100644 --- a/README_zh.md +++ b/README_zh.md @@ -12,7 +12,7 @@ - **多智能体团队** — 定义不同角色、工具甚至不同模型的智能体。它们通过消息总线和共享内存协作。 - **任务 DAG 调度** — 任务之间存在依赖关系。框架进行拓扑排序——有依赖的任务等待,无依赖的任务并行执行。 -- **模型无关** — Claude 和 GPT 可以在同一个团队中使用。每个智能体可以单独配置模型。你也可以为任何 LLM 编写自己的适配器。 +- **模型无关** — Claude、GPT 和本地模型(Ollama、vLLM、LM Studio)可以在同一个团队中使用。通过 `baseURL` 即可接入任何 OpenAI 兼容服务。 - **进程内执行** — 没有子进程开销。所有内容在一个 Node.js 进程中运行。可部署到 Serverless、Docker、CI/CD。 ## 快速开始 @@ -41,6 +41,10 @@ const result = await orchestrator.runAgent( console.log(result.output) ``` +## 作者 + +> JackChen — 前 WPS 产品经理,现独立创业者。关注小红书[「杰克西|硅基杠杆」](https://www.xiaohongshu.com/user/profile/5a1bdc1e4eacab4aa39ea6d6),持续获取我的 AI Agent 观点和思考。 + ## 多智能体团队 这才是有意思的地方。三个智能体,一个目标: @@ -160,7 +164,7 @@ const result = await agent.run('Find the three most recent TypeScript releases.'
-多模型团队 — 在一个工作流中混合使用 Claude、GPT 和 Copilot +多模型团队 — 在一个工作流中混合使用 Claude、GPT 和本地模型 ```typescript const claudeAgent: AgentConfig = { @@ -179,9 +183,20 @@ const gptAgent: AgentConfig = { tools: ['bash', 'file_read', 'file_write'], } +// 任何 OpenAI 兼容 API — Ollama、vLLM、LM Studio 等 +const localAgent: AgentConfig = { + name: 'reviewer', + model: 'llama3.1', + provider: 'openai', + baseURL: 'http://localhost:11434/v1', + apiKey: 'ollama', + systemPrompt: 'You review code for correctness and clarity.', + tools: ['file_read', 'grep'], +} + const team = orchestrator.createTeam('mixed-team', { name: 'mixed-team', - agents: [claudeAgent, gptAgent], + agents: [claudeAgent, gptAgent, localAgent], sharedMemory: true, }) @@ -270,7 +285,7 @@ for await (const event of agent.stream('Explain monads in two sentences.')) { 欢迎提 Issue、功能需求和 PR。以下方向的贡献尤其有价值: -- **LLM 适配器** — Copilot 已原生支持。欢迎继续贡献 Ollama、llama.cpp、vLLM、Gemini 等适配器。`LLMAdapter` 接口只需实现两个方法:`chat()` 和 `stream()`。 +- **LLM 适配器** — Anthropic、OpenAI、Copilot 已原生支持。任何 OpenAI 兼容 API(Ollama、vLLM、LM Studio 等)可通过 `baseURL` 直接使用。欢迎贡献 Gemini 等其他适配器。`LLMAdapter` 接口只需实现两个方法:`chat()` 和 `stream()`。 - **示例** — 真实场景的工作流和用例。 - **文档** — 指南、教程和 API 文档。 From 9f9f4e95d4617e55ebaf49d785d7e664d8a1918d Mon Sep 17 00:00:00 2001 From: JackChen Date: Thu, 2 Apr 2026 21:55:04 +0800 Subject: [PATCH 06/38] docs: highlight auto task decomposition, add run modes table, move contributors up - Rewrite headline to emphasize automatic goal-to-task decomposition - Add "Auto Task Decomposition" as first item in Why section - Add "Three Ways to Run" table (runAgent / runTeam / runTasks) - Move Contributors section higher for visibility - Bust star-history cache to refresh chart --- README.md | 25 +++++++++++++++++-------- README_zh.md | 25 +++++++++++++++++-------- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index fc9ce2e..143c3df 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Open Multi-Agent -Build AI agent teams that work together. One agent plans, another implements, a third reviews — the framework handles task scheduling, dependencies, and communication automatically. +Build AI agent teams that decompose goals into tasks automatically. Define agents with roles and tools, describe a goal — the framework plans the task graph, schedules dependencies, and runs everything in parallel. [![GitHub stars](https://img.shields.io/github/stars/JackChen-me/open-multi-agent)](https://github.com/JackChen-me/open-multi-agent/stargazers) [![license](https://img.shields.io/github/license/JackChen-me/open-multi-agent)](./LICENSE) @@ -10,6 +10,7 @@ Build AI agent teams that work together. One agent plans, another implements, a ## Why Open Multi-Agent? +- **Auto Task Decomposition** — Describe a goal in plain text. A built-in coordinator agent breaks it into a task DAG with dependencies and assignees — no manual orchestration needed. - **Multi-Agent Teams** — Define agents with different roles, tools, and even different models. They collaborate through a message bus and shared memory. - **Task DAG Scheduling** — Tasks have dependencies. The framework resolves them topologically — dependent tasks wait, independent tasks run in parallel. - **Model Agnostic** — Claude, GPT, and local models (Ollama, vLLM, LM Studio) in the same team. Swap models per agent via `baseURL`. @@ -88,6 +89,20 @@ console.log(`Success: ${result.success}`) console.log(`Tokens: ${result.totalTokenUsage.output_tokens} output tokens`) ``` +## Three Ways to Run + +| Mode | Method | When to use | +|------|--------|-------------| +| Single agent | `runAgent()` | One agent, one prompt — simplest entry point | +| Auto-orchestrated team | `runTeam()` | Give a goal, framework plans and executes | +| Explicit pipeline | `runTasks()` | You define the task graph and assignments | + +## Contributors + + + + + ## More Examples
@@ -287,13 +302,7 @@ Issues, feature requests, and PRs are welcome. Some areas where contributions wo ## Star History -[![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260402)](https://star-history.com/#JackChen-me/open-multi-agent&Date) - -## Contributors - - - - +[![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260402b)](https://star-history.com/#JackChen-me/open-multi-agent&Date) ## License diff --git a/README_zh.md b/README_zh.md index 7109564..99d6352 100644 --- a/README_zh.md +++ b/README_zh.md @@ -1,6 +1,6 @@ # Open Multi-Agent -构建能协同工作的 AI 智能体团队。一个智能体负责规划,一个负责实现,一个负责审查——框架自动处理任务调度、依赖关系和智能体间通信。 +构建能自动拆解目标的 AI 智能体团队。定义智能体的角色和工具,描述一个目标——框架自动规划任务图、调度依赖、并行执行。 [![GitHub stars](https://img.shields.io/github/stars/JackChen-me/open-multi-agent)](https://github.com/JackChen-me/open-multi-agent/stargazers) [![license](https://img.shields.io/github/license/JackChen-me/open-multi-agent)](./LICENSE) @@ -10,6 +10,7 @@ ## 为什么选择 Open Multi-Agent? +- **自动任务拆解** — 用自然语言描述目标,内置的协调者智能体自动将其拆解为带依赖关系和分配的任务图——无需手动编排。 - **多智能体团队** — 定义不同角色、工具甚至不同模型的智能体。它们通过消息总线和共享内存协作。 - **任务 DAG 调度** — 任务之间存在依赖关系。框架进行拓扑排序——有依赖的任务等待,无依赖的任务并行执行。 - **模型无关** — Claude、GPT 和本地模型(Ollama、vLLM、LM Studio)可以在同一个团队中使用。通过 `baseURL` 即可接入任何 OpenAI 兼容服务。 @@ -92,6 +93,20 @@ console.log(`成功: ${result.success}`) console.log(`Token 用量: ${result.totalTokenUsage.output_tokens} output tokens`) ``` +## 三种运行模式 + +| 模式 | 方法 | 适用场景 | +|------|------|----------| +| 单智能体 | `runAgent()` | 一个智能体,一个提示词——最简入口 | +| 自动编排团队 | `runTeam()` | 给一个目标,框架自动规划和执行 | +| 显式任务管线 | `runTasks()` | 你自己定义任务图和分配 | + +## 贡献者 + + + + + ## 更多示例
@@ -291,13 +306,7 @@ for await (const event of agent.stream('Explain monads in two sentences.')) { ## Star 趋势 -[![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260402)](https://star-history.com/#JackChen-me/open-multi-agent&Date) - -## 贡献者 - - - - +[![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260402b)](https://star-history.com/#JackChen-me/open-multi-agent&Date) ## 许可证 From 736121fe1050acff29ac7114e532d3b4ccf06727 Mon Sep 17 00:00:00 2001 From: JackChen Date: Thu, 2 Apr 2026 22:24:08 +0800 Subject: [PATCH 07/38] docs: add lightweight positioning tagline to README headers --- README.md | 2 ++ README_zh.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index 143c3df..bb0c8f0 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ Build AI agent teams that decompose goals into tasks automatically. Define agents with roles and tools, describe a goal — the framework plans the task graph, schedules dependencies, and runs everything in parallel. +3 runtime dependencies. 20 source files. One `runTeam()` call from goal to result. + [![GitHub stars](https://img.shields.io/github/stars/JackChen-me/open-multi-agent)](https://github.com/JackChen-me/open-multi-agent/stargazers) [![license](https://img.shields.io/github/license/JackChen-me/open-multi-agent)](./LICENSE) [![TypeScript](https://img.shields.io/badge/TypeScript-5.6-blue)](https://www.typescriptlang.org/) diff --git a/README_zh.md b/README_zh.md index 99d6352..27fc6c4 100644 --- a/README_zh.md +++ b/README_zh.md @@ -2,6 +2,8 @@ 构建能自动拆解目标的 AI 智能体团队。定义智能体的角色和工具,描述一个目标——框架自动规划任务图、调度依赖、并行执行。 +3 个运行时依赖,20 个源文件,一次 `runTeam()` 调用从目标到结果。 + [![GitHub stars](https://img.shields.io/github/stars/JackChen-me/open-multi-agent)](https://github.com/JackChen-me/open-multi-agent/stargazers) [![license](https://img.shields.io/github/license/JackChen-me/open-multi-agent)](./LICENSE) [![TypeScript](https://img.shields.io/badge/TypeScript-5.6-blue)](https://www.typescriptlang.org/) From 01b93d9897133a8804e623866e4188dd0be80165 Mon Sep 17 00:00:00 2001 From: JackChen Date: Thu, 2 Apr 2026 22:35:45 +0800 Subject: [PATCH 08/38] docs: lead Quick Start with runTeam, add output example and Node version --- README.md | 63 ++++++++++++++++++++++++++++++---------------- README_zh.md | 71 ++++++++++++++++++++++++++++++++++------------------ 2 files changed, 88 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index bb0c8f0..cc7400e 100644 --- a/README.md +++ b/README.md @@ -20,33 +20,15 @@ Build AI agent teams that decompose goals into tasks automatically. Define agent ## Quick Start +Requires Node.js >= 18. + ```bash npm install @jackchen_me/open-multi-agent ``` Set `ANTHROPIC_API_KEY` (and optionally `OPENAI_API_KEY` or `GITHUB_TOKEN` for Copilot) in your environment. -```typescript -import { OpenMultiAgent } from '@jackchen_me/open-multi-agent' - -const orchestrator = new OpenMultiAgent({ defaultModel: 'claude-sonnet-4-6' }) - -// One agent, one task -const result = await orchestrator.runAgent( - { - name: 'coder', - model: 'claude-sonnet-4-6', - tools: ['bash', 'file_write'], - }, - 'Write a TypeScript function that reverses a string, save it to /tmp/reverse.ts, and run it.', -) - -console.log(result.output) -``` - -## Multi-Agent Team - -This is where it gets interesting. Three agents, one goal: +Three agents, one goal — the framework handles the rest: ```typescript import { OpenMultiAgent } from '@jackchen_me/open-multi-agent' @@ -91,6 +73,23 @@ console.log(`Success: ${result.success}`) console.log(`Tokens: ${result.totalTokenUsage.output_tokens} output tokens`) ``` +What happens under the hood: + +``` +agent_start coordinator +task_start architect +task_complete architect +task_start developer +task_start developer // independent tasks run in parallel +task_complete developer +task_start reviewer // unblocked after implementation +task_complete developer +task_complete reviewer +agent_complete coordinator // synthesizes final result +Success: true +Tokens: 12847 output tokens +``` + ## Three Ways to Run | Mode | Method | When to use | @@ -107,6 +106,28 @@ console.log(`Tokens: ${result.totalTokenUsage.output_tokens} output tokens`) ## More Examples +
+Single Agent — one agent, one prompt + +```typescript +import { OpenMultiAgent } from '@jackchen_me/open-multi-agent' + +const orchestrator = new OpenMultiAgent({ defaultModel: 'claude-sonnet-4-6' }) + +const result = await orchestrator.runAgent( + { + name: 'coder', + model: 'claude-sonnet-4-6', + tools: ['bash', 'file_write'], + }, + 'Write a TypeScript function that reverses a string, save it to /tmp/reverse.ts, and run it.', +) + +console.log(result.output) +``` + +
+
Task Pipeline — explicit control over task graph and assignments diff --git a/README_zh.md b/README_zh.md index 27fc6c4..42129b0 100644 --- a/README_zh.md +++ b/README_zh.md @@ -20,37 +20,15 @@ ## 快速开始 +需要 Node.js >= 18。 + ```bash npm install @jackchen_me/open-multi-agent ``` 在环境变量中设置 `ANTHROPIC_API_KEY`(以及可选的 `OPENAI_API_KEY` 或用于 Copilot 的 `GITHUB_TOKEN`)。 -```typescript -import { OpenMultiAgent } from '@jackchen_me/open-multi-agent' - -const orchestrator = new OpenMultiAgent({ defaultModel: 'claude-sonnet-4-6' }) - -// 一个智能体,一个任务 -const result = await orchestrator.runAgent( - { - name: 'coder', - model: 'claude-sonnet-4-6', - tools: ['bash', 'file_write'], - }, - 'Write a TypeScript function that reverses a string, save it to /tmp/reverse.ts, and run it.', -) - -console.log(result.output) -``` - -## 作者 - -> JackChen — 前 WPS 产品经理,现独立创业者。关注小红书[「杰克西|硅基杠杆」](https://www.xiaohongshu.com/user/profile/5a1bdc1e4eacab4aa39ea6d6),持续获取我的 AI Agent 观点和思考。 - -## 多智能体团队 - -这才是有意思的地方。三个智能体,一个目标: +三个智能体,一个目标——框架处理剩下的一切: ```typescript import { OpenMultiAgent } from '@jackchen_me/open-multi-agent' @@ -95,6 +73,27 @@ console.log(`成功: ${result.success}`) console.log(`Token 用量: ${result.totalTokenUsage.output_tokens} output tokens`) ``` +执行过程: + +``` +agent_start coordinator +task_start architect +task_complete architect +task_start developer +task_start developer // 无依赖的任务并行执行 +task_complete developer +task_start reviewer // 实现完成后自动解锁 +task_complete developer +task_complete reviewer +agent_complete coordinator // 综合所有结果 +Success: true +Tokens: 12847 output tokens +``` + +## 作者 + +> JackChen — 前 WPS 产品经理,现独立创业者。关注小红书[「杰克西|硅基杠杆」](https://www.xiaohongshu.com/user/profile/5a1bdc1e4eacab4aa39ea6d6),持续获取我的 AI Agent 观点和思考。 + ## 三种运行模式 | 模式 | 方法 | 适用场景 | @@ -111,6 +110,28 @@ console.log(`Token 用量: ${result.totalTokenUsage.output_tokens} output tokens ## 更多示例 +
+单智能体 — 一个智能体,一个提示词 + +```typescript +import { OpenMultiAgent } from '@jackchen_me/open-multi-agent' + +const orchestrator = new OpenMultiAgent({ defaultModel: 'claude-sonnet-4-6' }) + +const result = await orchestrator.runAgent( + { + name: 'coder', + model: 'claude-sonnet-4-6', + tools: ['bash', 'file_write'], + }, + 'Write a TypeScript function that reverses a string, save it to /tmp/reverse.ts, and run it.', +) + +console.log(result.output) +``` + +
+
任务流水线 — 显式控制任务图和分配 From 80a8c1dcffb877cb31681a35d03c36500149e5b8 Mon Sep 17 00:00:00 2001 From: JackChen Date: Thu, 2 Apr 2026 23:43:49 +0800 Subject: [PATCH 09/38] fix: blocked tasks never unblocked when dependencies complete isTaskReady() rejects non-pending tasks on its first line, but unblockDependents() passed blocked tasks directly to it. This meant dependent tasks stayed blocked forever after their dependencies completed, breaking any workflow with task dependencies. Fix: pass a pending-status copy so isTaskReady only checks the dependency condition. --- src/task/queue.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/task/queue.ts b/src/task/queue.ts index 60149ff..8888c09 100644 --- a/src/task/queue.ts +++ b/src/task/queue.ts @@ -356,7 +356,7 @@ export class TaskQueue { // Re-check against the current state of the whole task set. // Pass the pre-built map to avoid rebuilding it for every candidate task. - if (isTaskReady(task, allTasks, taskById)) { + if (isTaskReady({ ...task, status: 'pending' }, allTasks, taskById)) { const unblocked: Task = { ...task, status: 'pending', From a772312a689bc572f2ff59dd939aec40eaa58c2e Mon Sep 17 00:00:00 2001 From: JackChen Date: Thu, 2 Apr 2026 23:43:54 +0800 Subject: [PATCH 10/38] chore: add tests, CI, contributing guide, and PR template - 5 test files, 61 test cases covering TaskQueue, SharedMemory, ToolExecutor, ToolRegistry, and Semaphore - GitHub Actions CI running lint + test on Node 18/20/22 - CONTRIBUTING.md with setup, commands, and PR workflow - Pull request template with checklist --- .github/pull_request_template.md | 14 ++ .github/workflows/ci.yml | 23 +++ CONTRIBUTING.md | 72 +++++++++ tests/semaphore.test.ts | 57 ++++++++ tests/shared-memory.test.ts | 122 ++++++++++++++++ tests/task-queue.test.ts | 244 +++++++++++++++++++++++++++++++ tests/task-utils.test.ts | 155 ++++++++++++++++++++ tests/tool-executor.test.ts | 193 ++++++++++++++++++++++++ 8 files changed, 880 insertions(+) create mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/ci.yml create mode 100644 CONTRIBUTING.md create mode 100644 tests/semaphore.test.ts create mode 100644 tests/shared-memory.test.ts create mode 100644 tests/task-queue.test.ts create mode 100644 tests/task-utils.test.ts create mode 100644 tests/tool-executor.test.ts diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..739d91d --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,14 @@ +## What + + + +## Why + + + +## Checklist + +- [ ] `npm run lint` passes +- [ ] `npm test` passes +- [ ] Added/updated tests for changed behavior +- [ ] No new runtime dependencies (or justified in the PR description) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..6f5b577 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,23 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + node-version: [18, 20, 22] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node-version }} + cache: npm + - run: npm ci + - run: npm run lint + - run: npm test diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..e17dd36 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,72 @@ +# Contributing + +Thanks for your interest in contributing to Open Multi-Agent! This guide covers the basics to get you started. + +## Setup + +```bash +git clone https://github.com/JackChen-me/open-multi-agent.git +cd open-multi-agent +npm install +``` + +Requires Node.js >= 18. + +## Development Commands + +```bash +npm run build # Compile TypeScript (src/ → dist/) +npm run dev # Watch mode compilation +npm run lint # Type-check (tsc --noEmit) +npm test # Run all tests (vitest) +npm run test:watch # Vitest watch mode +``` + +## Running Tests + +All tests live in `tests/`. They test core modules (TaskQueue, SharedMemory, ToolExecutor, Semaphore) without requiring API keys or network access. + +```bash +npm test +``` + +Every PR must pass `npm run lint && npm test`. CI runs both automatically on Node 18, 20, and 22. + +## Making a Pull Request + +1. Fork the repo and create a branch from `main` +2. Make your changes +3. Add or update tests if you changed behavior +4. Run `npm run lint && npm test` locally +5. Open a PR against `main` + +### PR Checklist + +- [ ] `npm run lint` passes +- [ ] `npm test` passes +- [ ] New behavior has test coverage +- [ ] Linked to a relevant issue (if one exists) + +## Code Style + +- TypeScript strict mode, ES modules (`.js` extensions in imports) +- No additional linter/formatter configured — follow existing patterns +- Keep dependencies minimal (currently 3 runtime deps: `@anthropic-ai/sdk`, `openai`, `zod`) + +## Architecture Overview + +See the [README](./README.md#architecture) for an architecture diagram. Key entry points: + +- **Orchestrator**: `src/orchestrator/orchestrator.ts` — top-level API +- **Task system**: `src/task/queue.ts`, `src/task/task.ts` — dependency DAG +- **Agent**: `src/agent/runner.ts` — conversation loop +- **Tools**: `src/tool/framework.ts`, `src/tool/executor.ts` — tool registry and execution +- **LLM adapters**: `src/llm/` — Anthropic, OpenAI, Copilot + +## Where to Contribute + +Check the [issues](https://github.com/JackChen-me/open-multi-agent/issues) page. Issues labeled `good first issue` are scoped and approachable. Issues labeled `help wanted` are larger but well-defined. + +## License + +By contributing, you agree that your contributions will be licensed under the MIT License. diff --git a/tests/semaphore.test.ts b/tests/semaphore.test.ts new file mode 100644 index 0000000..ddc1b34 --- /dev/null +++ b/tests/semaphore.test.ts @@ -0,0 +1,57 @@ +import { describe, it, expect } from 'vitest' +import { Semaphore } from '../src/utils/semaphore.js' + +describe('Semaphore', () => { + it('throws on max < 1', () => { + expect(() => new Semaphore(0)).toThrow() + }) + + it('allows up to max concurrent holders', async () => { + const sem = new Semaphore(2) + let running = 0 + let peak = 0 + + const work = async () => { + await sem.acquire() + running++ + peak = Math.max(peak, running) + await new Promise((r) => setTimeout(r, 30)) + running-- + sem.release() + } + + await Promise.all([work(), work(), work(), work()]) + expect(peak).toBeLessThanOrEqual(2) + }) + + it('run() auto-releases on success', async () => { + const sem = new Semaphore(1) + const result = await sem.run(async () => 42) + expect(result).toBe(42) + expect(sem.active).toBe(0) + }) + + it('run() auto-releases on error', async () => { + const sem = new Semaphore(1) + await expect(sem.run(async () => { throw new Error('oops') })).rejects.toThrow('oops') + expect(sem.active).toBe(0) + }) + + it('tracks active and pending counts', async () => { + const sem = new Semaphore(1) + await sem.acquire() + expect(sem.active).toBe(1) + + // This will queue + const p = sem.acquire() + expect(sem.pending).toBe(1) + + sem.release() + await p + expect(sem.active).toBe(1) + expect(sem.pending).toBe(0) + + sem.release() + expect(sem.active).toBe(0) + }) +}) diff --git a/tests/shared-memory.test.ts b/tests/shared-memory.test.ts new file mode 100644 index 0000000..1467c95 --- /dev/null +++ b/tests/shared-memory.test.ts @@ -0,0 +1,122 @@ +import { describe, it, expect } from 'vitest' +import { SharedMemory } from '../src/memory/shared.js' + +describe('SharedMemory', () => { + // ------------------------------------------------------------------------- + // Write & read + // ------------------------------------------------------------------------- + + it('writes and reads a value under a namespaced key', async () => { + const mem = new SharedMemory() + await mem.write('researcher', 'findings', 'TS 5.5 ships const type params') + + const entry = await mem.read('researcher/findings') + expect(entry).not.toBeNull() + expect(entry!.value).toBe('TS 5.5 ships const type params') + }) + + it('returns null for a non-existent key', async () => { + const mem = new SharedMemory() + expect(await mem.read('nope/nothing')).toBeNull() + }) + + // ------------------------------------------------------------------------- + // Namespace isolation + // ------------------------------------------------------------------------- + + it('isolates writes between agents', async () => { + const mem = new SharedMemory() + await mem.write('alice', 'plan', 'plan A') + await mem.write('bob', 'plan', 'plan B') + + const alice = await mem.read('alice/plan') + const bob = await mem.read('bob/plan') + expect(alice!.value).toBe('plan A') + expect(bob!.value).toBe('plan B') + }) + + it('listByAgent returns only that agent\'s entries', async () => { + const mem = new SharedMemory() + await mem.write('alice', 'a1', 'v1') + await mem.write('alice', 'a2', 'v2') + await mem.write('bob', 'b1', 'v3') + + const aliceEntries = await mem.listByAgent('alice') + expect(aliceEntries).toHaveLength(2) + expect(aliceEntries.every((e) => e.key.startsWith('alice/'))).toBe(true) + }) + + // ------------------------------------------------------------------------- + // Overwrite + // ------------------------------------------------------------------------- + + it('overwrites a value and preserves createdAt', async () => { + const mem = new SharedMemory() + await mem.write('agent', 'key', 'first') + const first = await mem.read('agent/key') + + await mem.write('agent', 'key', 'second') + const second = await mem.read('agent/key') + + expect(second!.value).toBe('second') + expect(second!.createdAt.getTime()).toBe(first!.createdAt.getTime()) + }) + + // ------------------------------------------------------------------------- + // Metadata + // ------------------------------------------------------------------------- + + it('stores metadata alongside the value', async () => { + const mem = new SharedMemory() + await mem.write('agent', 'key', 'val', { priority: 'high' }) + + const entry = await mem.read('agent/key') + expect(entry!.metadata).toMatchObject({ priority: 'high', agent: 'agent' }) + }) + + // ------------------------------------------------------------------------- + // Summary + // ------------------------------------------------------------------------- + + it('returns empty string for an empty store', async () => { + const mem = new SharedMemory() + expect(await mem.getSummary()).toBe('') + }) + + it('produces a markdown summary grouped by agent', async () => { + const mem = new SharedMemory() + await mem.write('researcher', 'findings', 'result A') + await mem.write('coder', 'plan', 'implement X') + + const summary = await mem.getSummary() + expect(summary).toContain('## Shared Team Memory') + expect(summary).toContain('### researcher') + expect(summary).toContain('### coder') + expect(summary).toContain('findings: result A') + expect(summary).toContain('plan: implement X') + }) + + it('truncates long values in the summary', async () => { + const mem = new SharedMemory() + const longValue = 'x'.repeat(300) + await mem.write('agent', 'big', longValue) + + const summary = await mem.getSummary() + // Summary truncates at 200 chars → 197 + '…' + expect(summary.length).toBeLessThan(longValue.length) + expect(summary).toContain('…') + }) + + // ------------------------------------------------------------------------- + // listAll + // ------------------------------------------------------------------------- + + it('listAll returns entries from all agents', async () => { + const mem = new SharedMemory() + await mem.write('a', 'k1', 'v1') + await mem.write('b', 'k2', 'v2') + + const all = await mem.listAll() + expect(all).toHaveLength(2) + }) +}) diff --git a/tests/task-queue.test.ts b/tests/task-queue.test.ts new file mode 100644 index 0000000..87a2500 --- /dev/null +++ b/tests/task-queue.test.ts @@ -0,0 +1,244 @@ +import { describe, it, expect, vi } from 'vitest' +import { TaskQueue } from '../src/task/queue.js' +import { createTask } from '../src/task/task.js' + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** Create a simple task with a predictable id. */ +function task(id: string, opts: { dependsOn?: string[]; assignee?: string } = {}) { + const t = createTask({ title: id, description: `task ${id}`, assignee: opts.assignee }) + // Override the random UUID so tests can reference tasks by name. + return { ...t, id, dependsOn: opts.dependsOn } as ReturnType +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('TaskQueue', () => { + // ------------------------------------------------------------------------- + // Basic add & query + // ------------------------------------------------------------------------- + + it('adds a task and lists it', () => { + const q = new TaskQueue() + q.add(task('a')) + expect(q.list()).toHaveLength(1) + expect(q.list()[0].id).toBe('a') + }) + + it('fires task:ready for a task with no dependencies', () => { + const q = new TaskQueue() + const handler = vi.fn() + q.on('task:ready', handler) + + q.add(task('a')) + expect(handler).toHaveBeenCalledTimes(1) + expect(handler.mock.calls[0][0].id).toBe('a') + }) + + it('blocks a task whose dependency is not yet completed', () => { + const q = new TaskQueue() + q.add(task('a')) + q.add(task('b', { dependsOn: ['a'] })) + + const b = q.list().find((t) => t.id === 'b')! + expect(b.status).toBe('blocked') + }) + + // ------------------------------------------------------------------------- + // Dependency resolution + // ------------------------------------------------------------------------- + + it('unblocks a dependent task when its dependency completes', () => { + const q = new TaskQueue() + const readyHandler = vi.fn() + q.on('task:ready', readyHandler) + + q.add(task('a')) + q.add(task('b', { dependsOn: ['a'] })) + + // 'a' fires task:ready, 'b' is blocked + expect(readyHandler).toHaveBeenCalledTimes(1) + + q.complete('a', 'done') + + // 'b' should now be unblocked → fires task:ready + expect(readyHandler).toHaveBeenCalledTimes(2) + expect(readyHandler.mock.calls[1][0].id).toBe('b') + expect(q.list().find((t) => t.id === 'b')!.status).toBe('pending') + }) + + it('keeps a task blocked until ALL dependencies complete', () => { + const q = new TaskQueue() + q.add(task('a')) + q.add(task('b')) + q.add(task('c', { dependsOn: ['a', 'b'] })) + + q.complete('a') + + const cAfterA = q.list().find((t) => t.id === 'c')! + expect(cAfterA.status).toBe('blocked') + + q.complete('b') + + const cAfterB = q.list().find((t) => t.id === 'c')! + expect(cAfterB.status).toBe('pending') + }) + + // ------------------------------------------------------------------------- + // Cascade failure + // ------------------------------------------------------------------------- + + it('cascades failure to direct dependents', () => { + const q = new TaskQueue() + const failHandler = vi.fn() + q.on('task:failed', failHandler) + + q.add(task('a')) + q.add(task('b', { dependsOn: ['a'] })) + + q.fail('a', 'boom') + + expect(failHandler).toHaveBeenCalledTimes(2) // a + b + expect(q.list().find((t) => t.id === 'b')!.status).toBe('failed') + expect(q.list().find((t) => t.id === 'b')!.result).toContain('dependency') + }) + + it('cascades failure transitively (a → b → c)', () => { + const q = new TaskQueue() + q.add(task('a')) + q.add(task('b', { dependsOn: ['a'] })) + q.add(task('c', { dependsOn: ['b'] })) + + q.fail('a', 'boom') + + expect(q.list().every((t) => t.status === 'failed')).toBe(true) + }) + + it('does not cascade failure to independent tasks', () => { + const q = new TaskQueue() + q.add(task('a')) + q.add(task('b')) + q.add(task('c', { dependsOn: ['a'] })) + + q.fail('a', 'boom') + + expect(q.list().find((t) => t.id === 'b')!.status).toBe('pending') + expect(q.list().find((t) => t.id === 'c')!.status).toBe('failed') + }) + + // ------------------------------------------------------------------------- + // Completion + // ------------------------------------------------------------------------- + + it('fires all:complete when every task reaches a terminal state', () => { + const q = new TaskQueue() + const allComplete = vi.fn() + q.on('all:complete', allComplete) + + q.add(task('a')) + q.add(task('b')) + + q.complete('a') + expect(allComplete).not.toHaveBeenCalled() + + q.complete('b') + expect(allComplete).toHaveBeenCalledTimes(1) + }) + + it('fires all:complete when mix of completed and failed', () => { + const q = new TaskQueue() + const allComplete = vi.fn() + q.on('all:complete', allComplete) + + q.add(task('a')) + q.add(task('b', { dependsOn: ['a'] })) + + q.fail('a', 'err') // cascades to b + expect(allComplete).toHaveBeenCalledTimes(1) + }) + + it('isComplete returns true for an empty queue', () => { + const q = new TaskQueue() + expect(q.isComplete()).toBe(true) + }) + + // ------------------------------------------------------------------------- + // Query: next / nextAvailable + // ------------------------------------------------------------------------- + + it('next() returns a pending task for the given assignee', () => { + const q = new TaskQueue() + q.add(task('a', { assignee: 'alice' })) + q.add(task('b', { assignee: 'bob' })) + + expect(q.next('bob')?.id).toBe('b') + }) + + it('next() returns undefined when no pending task matches', () => { + const q = new TaskQueue() + q.add(task('a', { assignee: 'alice' })) + expect(q.next('bob')).toBeUndefined() + }) + + it('nextAvailable() prefers unassigned tasks', () => { + const q = new TaskQueue() + q.add(task('assigned', { assignee: 'alice' })) + q.add(task('unassigned')) + + expect(q.nextAvailable()?.id).toBe('unassigned') + }) + + // ------------------------------------------------------------------------- + // Progress + // ------------------------------------------------------------------------- + + it('getProgress() returns correct counts', () => { + const q = new TaskQueue() + q.add(task('a')) + q.add(task('b')) + q.add(task('c', { dependsOn: ['a'] })) + + q.complete('a') + + const p = q.getProgress() + expect(p.total).toBe(3) + expect(p.completed).toBe(1) + expect(p.pending).toBe(2) // b + c (unblocked) + expect(p.blocked).toBe(0) + }) + + // ------------------------------------------------------------------------- + // Event unsubscribe + // ------------------------------------------------------------------------- + + it('unsubscribe stops receiving events', () => { + const q = new TaskQueue() + const handler = vi.fn() + const off = q.on('task:ready', handler) + + q.add(task('a')) + expect(handler).toHaveBeenCalledTimes(1) + + off() + q.add(task('b')) + expect(handler).toHaveBeenCalledTimes(1) // no new call + }) + + // ------------------------------------------------------------------------- + // Error cases + // ------------------------------------------------------------------------- + + it('throws when completing a non-existent task', () => { + const q = new TaskQueue() + expect(() => q.complete('ghost')).toThrow('not found') + }) + + it('throws when failing a non-existent task', () => { + const q = new TaskQueue() + expect(() => q.fail('ghost', 'err')).toThrow('not found') + }) +}) diff --git a/tests/task-utils.test.ts b/tests/task-utils.test.ts new file mode 100644 index 0000000..7c3a8f5 --- /dev/null +++ b/tests/task-utils.test.ts @@ -0,0 +1,155 @@ +import { describe, it, expect } from 'vitest' +import { + createTask, + isTaskReady, + getTaskDependencyOrder, + validateTaskDependencies, +} from '../src/task/task.js' +import type { Task } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function task(id: string, opts: { dependsOn?: string[]; status?: Task['status'] } = {}): Task { + const t = createTask({ title: id, description: `task ${id}` }) + return { ...t, id, dependsOn: opts.dependsOn, status: opts.status ?? 'pending' } +} + +// --------------------------------------------------------------------------- +// createTask +// --------------------------------------------------------------------------- + +describe('createTask', () => { + it('creates a task with pending status and timestamps', () => { + const t = createTask({ title: 'Test', description: 'A test task' }) + expect(t.id).toBeDefined() + expect(t.status).toBe('pending') + expect(t.createdAt).toBeInstanceOf(Date) + expect(t.updatedAt).toBeInstanceOf(Date) + }) + + it('copies dependsOn array (no shared reference)', () => { + const deps = ['a'] + const t = createTask({ title: 'T', description: 'D', dependsOn: deps }) + deps.push('b') + expect(t.dependsOn).toEqual(['a']) + }) +}) + +// --------------------------------------------------------------------------- +// isTaskReady +// --------------------------------------------------------------------------- + +describe('isTaskReady', () => { + it('returns true for a pending task with no dependencies', () => { + const t = task('a') + expect(isTaskReady(t, [t])).toBe(true) + }) + + it('returns false for a non-pending task', () => { + const t = task('a', { status: 'blocked' }) + expect(isTaskReady(t, [t])).toBe(false) + }) + + it('returns true when all dependencies are completed', () => { + const dep = task('dep', { status: 'completed' }) + const t = task('a', { dependsOn: ['dep'] }) + expect(isTaskReady(t, [dep, t])).toBe(true) + }) + + it('returns false when a dependency is not yet completed', () => { + const dep = task('dep', { status: 'in_progress' }) + const t = task('a', { dependsOn: ['dep'] }) + expect(isTaskReady(t, [dep, t])).toBe(false) + }) + + it('returns false when a dependency is missing from the task set', () => { + const t = task('a', { dependsOn: ['ghost'] }) + expect(isTaskReady(t, [t])).toBe(false) + }) +}) + +// --------------------------------------------------------------------------- +// getTaskDependencyOrder +// --------------------------------------------------------------------------- + +describe('getTaskDependencyOrder', () => { + it('returns empty array for empty input', () => { + expect(getTaskDependencyOrder([])).toEqual([]) + }) + + it('returns tasks with no deps first', () => { + const a = task('a') + const b = task('b', { dependsOn: ['a'] }) + const ordered = getTaskDependencyOrder([b, a]) + expect(ordered[0].id).toBe('a') + expect(ordered[1].id).toBe('b') + }) + + it('handles a diamond dependency (a → b,c → d)', () => { + const a = task('a') + const b = task('b', { dependsOn: ['a'] }) + const c = task('c', { dependsOn: ['a'] }) + const d = task('d', { dependsOn: ['b', 'c'] }) + + const ordered = getTaskDependencyOrder([d, c, b, a]) + const ids = ordered.map((t) => t.id) + + // a must come before b and c; b and c must come before d + expect(ids.indexOf('a')).toBeLessThan(ids.indexOf('b')) + expect(ids.indexOf('a')).toBeLessThan(ids.indexOf('c')) + expect(ids.indexOf('b')).toBeLessThan(ids.indexOf('d')) + expect(ids.indexOf('c')).toBeLessThan(ids.indexOf('d')) + }) + + it('returns partial result when a cycle exists', () => { + const a = task('a', { dependsOn: ['b'] }) + const b = task('b', { dependsOn: ['a'] }) + const ordered = getTaskDependencyOrder([a, b]) + // Neither can be ordered — result should be empty (or partial) + expect(ordered.length).toBeLessThan(2) + }) +}) + +// --------------------------------------------------------------------------- +// validateTaskDependencies +// --------------------------------------------------------------------------- + +describe('validateTaskDependencies', () => { + it('returns valid for tasks with no deps', () => { + const result = validateTaskDependencies([task('a'), task('b')]) + expect(result.valid).toBe(true) + expect(result.errors).toHaveLength(0) + }) + + it('detects self-dependency', () => { + const t = task('a', { dependsOn: ['a'] }) + const result = validateTaskDependencies([t]) + expect(result.valid).toBe(false) + expect(result.errors[0]).toContain('depends on itself') + }) + + it('detects unknown dependency', () => { + const t = task('a', { dependsOn: ['ghost'] }) + const result = validateTaskDependencies([t]) + expect(result.valid).toBe(false) + expect(result.errors[0]).toContain('unknown dependency') + }) + + it('detects a cycle (a → b → a)', () => { + const a = task('a', { dependsOn: ['b'] }) + const b = task('b', { dependsOn: ['a'] }) + const result = validateTaskDependencies([a, b]) + expect(result.valid).toBe(false) + expect(result.errors.some((e) => e.toLowerCase().includes('cyclic'))).toBe(true) + }) + + it('detects a longer cycle (a → b → c → a)', () => { + const a = task('a', { dependsOn: ['c'] }) + const b = task('b', { dependsOn: ['a'] }) + const c = task('c', { dependsOn: ['b'] }) + const result = validateTaskDependencies([a, b, c]) + expect(result.valid).toBe(false) + }) +}) diff --git a/tests/tool-executor.test.ts b/tests/tool-executor.test.ts new file mode 100644 index 0000000..afa7cb6 --- /dev/null +++ b/tests/tool-executor.test.ts @@ -0,0 +1,193 @@ +import { describe, it, expect, vi } from 'vitest' +import { z } from 'zod' +import { ToolRegistry, defineTool } from '../src/tool/framework.js' +import { ToolExecutor } from '../src/tool/executor.js' +import type { ToolUseContext } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +const dummyContext: ToolUseContext = { + agent: { name: 'test-agent', role: 'tester', model: 'test-model' }, +} + +function echoTool() { + return defineTool({ + name: 'echo', + description: 'Echoes the message.', + inputSchema: z.object({ message: z.string() }), + execute: async ({ message }) => ({ data: message, isError: false }), + }) +} + +function failTool() { + return defineTool({ + name: 'fail', + description: 'Always throws.', + inputSchema: z.object({}), + execute: async () => { + throw new Error('intentional failure') + }, + }) +} + +function makeExecutor(...tools: ReturnType[]) { + const registry = new ToolRegistry() + for (const t of tools) registry.register(t) + return { executor: new ToolExecutor(registry), registry } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('ToolExecutor', () => { + // ------------------------------------------------------------------------- + // Single execution + // ------------------------------------------------------------------------- + + it('executes a tool and returns its result', async () => { + const { executor } = makeExecutor(echoTool()) + const result = await executor.execute('echo', { message: 'hello' }, dummyContext) + expect(result.data).toBe('hello') + expect(result.isError).toBeFalsy() + }) + + it('returns an error result for an unknown tool', async () => { + const { executor } = makeExecutor() + const result = await executor.execute('ghost', {}, dummyContext) + expect(result.isError).toBe(true) + expect(result.data).toContain('not registered') + }) + + it('returns an error result when Zod validation fails', async () => { + const { executor } = makeExecutor(echoTool()) + // 'message' is required but missing + const result = await executor.execute('echo', {}, dummyContext) + expect(result.isError).toBe(true) + expect(result.data).toContain('Invalid input') + }) + + it('catches tool execution errors and returns them as error results', async () => { + const { executor } = makeExecutor(failTool()) + const result = await executor.execute('fail', {}, dummyContext) + expect(result.isError).toBe(true) + expect(result.data).toContain('intentional failure') + }) + + it('returns an error result when aborted before execution', async () => { + const { executor } = makeExecutor(echoTool()) + const controller = new AbortController() + controller.abort() + + const result = await executor.execute( + 'echo', + { message: 'hi' }, + { ...dummyContext, abortSignal: controller.signal }, + ) + expect(result.isError).toBe(true) + expect(result.data).toContain('aborted') + }) + + // ------------------------------------------------------------------------- + // Batch execution + // ------------------------------------------------------------------------- + + it('executeBatch runs multiple tools and returns a map of results', async () => { + const { executor } = makeExecutor(echoTool()) + const results = await executor.executeBatch( + [ + { id: 'c1', name: 'echo', input: { message: 'a' } }, + { id: 'c2', name: 'echo', input: { message: 'b' } }, + ], + dummyContext, + ) + + expect(results.size).toBe(2) + expect(results.get('c1')!.data).toBe('a') + expect(results.get('c2')!.data).toBe('b') + }) + + it('executeBatch isolates errors — one failure does not affect others', async () => { + const { executor } = makeExecutor(echoTool(), failTool()) + const results = await executor.executeBatch( + [ + { id: 'ok', name: 'echo', input: { message: 'fine' } }, + { id: 'bad', name: 'fail', input: {} }, + ], + dummyContext, + ) + + expect(results.get('ok')!.isError).toBeFalsy() + expect(results.get('bad')!.isError).toBe(true) + }) + + // ------------------------------------------------------------------------- + // Concurrency control + // ------------------------------------------------------------------------- + + it('respects maxConcurrency limit', async () => { + let peak = 0 + let running = 0 + + const trackTool = defineTool({ + name: 'track', + description: 'Tracks concurrency.', + inputSchema: z.object({}), + execute: async () => { + running++ + peak = Math.max(peak, running) + await new Promise((r) => setTimeout(r, 50)) + running-- + return { data: 'ok', isError: false } + }, + }) + + const registry = new ToolRegistry() + registry.register(trackTool) + const executor = new ToolExecutor(registry, { maxConcurrency: 2 }) + + await executor.executeBatch( + Array.from({ length: 5 }, (_, i) => ({ id: `t${i}`, name: 'track', input: {} })), + dummyContext, + ) + + expect(peak).toBeLessThanOrEqual(2) + }) +}) + +// --------------------------------------------------------------------------- +// ToolRegistry +// --------------------------------------------------------------------------- + +describe('ToolRegistry', () => { + it('registers and retrieves a tool', () => { + const registry = new ToolRegistry() + registry.register(echoTool()) + expect(registry.get('echo')).toBeDefined() + expect(registry.has('echo')).toBe(true) + }) + + it('throws on duplicate registration', () => { + const registry = new ToolRegistry() + registry.register(echoTool()) + expect(() => registry.register(echoTool())).toThrow('already registered') + }) + + it('unregister removes the tool', () => { + const registry = new ToolRegistry() + registry.register(echoTool()) + registry.unregister('echo') + expect(registry.has('echo')).toBe(false) + }) + + it('toToolDefs produces JSON schema representations', () => { + const registry = new ToolRegistry() + registry.register(echoTool()) + const defs = registry.toToolDefs() + expect(defs).toHaveLength(1) + expect(defs[0].name).toBe('echo') + expect(defs[0].inputSchema).toHaveProperty('properties') + }) +}) From 3a46669a69895f787457513e81da58774e1afa8f Mon Sep 17 00:00:00 2001 From: JackChen Date: Thu, 2 Apr 2026 23:46:43 +0800 Subject: [PATCH 11/38] fix: use explicit crypto import for Node 18 compatibility crypto.randomUUID() is not globally available in Node 18. Import randomUUID from node:crypto explicitly so the framework works on all supported Node versions (>=18). --- src/task/task.ts | 3 ++- src/team/messaging.ts | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/task/task.ts b/src/task/task.ts index a297100..9a11476 100644 --- a/src/task/task.ts +++ b/src/task/task.ts @@ -6,6 +6,7 @@ * Stateful orchestration belongs in {@link TaskQueue}. */ +import { randomUUID } from 'node:crypto' import type { Task, TaskStatus } from '../types.js' // --------------------------------------------------------------------------- @@ -33,7 +34,7 @@ export function createTask(input: { }): Task { const now = new Date() return { - id: crypto.randomUUID(), + id: randomUUID(), title: input.title, description: input.description, status: 'pending' as TaskStatus, diff --git a/src/team/messaging.ts b/src/team/messaging.ts index de4cdae..35a4c2e 100644 --- a/src/team/messaging.ts +++ b/src/team/messaging.ts @@ -6,6 +6,8 @@ * for replay and audit; read-state is tracked per recipient. */ +import { randomUUID } from 'node:crypto' + // --------------------------------------------------------------------------- // Message type // --------------------------------------------------------------------------- @@ -93,7 +95,7 @@ export class MessageBus { */ send(from: string, to: string, content: string): Message { const message: Message = { - id: crypto.randomUUID(), + id: randomUUID(), from, to, content, From 54400580a2c72098336422112e20b38d43ab688c Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 00:05:31 +0800 Subject: [PATCH 12/38] =?UTF-8?q?docs:=20fix=20source=20file=20count=20in?= =?UTF-8?q?=20README=20(20=20=E2=86=92=2027)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cc7400e..34b0a1b 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Build AI agent teams that decompose goals into tasks automatically. Define agents with roles and tools, describe a goal — the framework plans the task graph, schedules dependencies, and runs everything in parallel. -3 runtime dependencies. 20 source files. One `runTeam()` call from goal to result. +3 runtime dependencies. 27 source files. One `runTeam()` call from goal to result. [![GitHub stars](https://img.shields.io/github/stars/JackChen-me/open-multi-agent)](https://github.com/JackChen-me/open-multi-agent/stargazers) [![license](https://img.shields.io/github/license/JackChen-me/open-multi-agent)](./LICENSE) From 6e6a85178be11636ec00364b388d4e5fa8b518d2 Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 00:07:11 +0800 Subject: [PATCH 13/38] =?UTF-8?q?docs:=20sync=20Chinese=20README=20source?= =?UTF-8?q?=20file=20count=20(20=20=E2=86=92=2027)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README_zh.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_zh.md b/README_zh.md index 42129b0..86436dc 100644 --- a/README_zh.md +++ b/README_zh.md @@ -2,7 +2,7 @@ 构建能自动拆解目标的 AI 智能体团队。定义智能体的角色和工具,描述一个目标——框架自动规划任务图、调度依赖、并行执行。 -3 个运行时依赖,20 个源文件,一次 `runTeam()` 调用从目标到结果。 +3 个运行时依赖,27 个源文件,一次 `runTeam()` 调用从目标到结果。 [![GitHub stars](https://img.shields.io/github/stars/JackChen-me/open-multi-agent)](https://github.com/JackChen-me/open-multi-agent/stargazers) [![license](https://img.shields.io/github/license/JackChen-me/open-multi-agent)](./LICENSE) From 31a0fa495110a4f3a8adf93f28aa782d00dc724e Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 02:12:05 +0800 Subject: [PATCH 14/38] docs: add examples for local models (Ollama) and fan-out/aggregate pattern - 06-local-model.ts: mix Ollama (local) + Claude (cloud) in a runTasks pipeline, demonstrating baseURL and apiKey placeholder for OpenAI-compatible servers - 07-fan-out-aggregate.ts: MapReduce pattern using AgentPool.runParallel() to fan out analysis to 3 perspective agents, then aggregate via a synthesizer --- examples/06-local-model.ts | 199 +++++++++++++++++++++++++++++ examples/07-fan-out-aggregate.ts | 209 +++++++++++++++++++++++++++++++ 2 files changed, 408 insertions(+) create mode 100644 examples/06-local-model.ts create mode 100644 examples/07-fan-out-aggregate.ts diff --git a/examples/06-local-model.ts b/examples/06-local-model.ts new file mode 100644 index 0000000..d7cf292 --- /dev/null +++ b/examples/06-local-model.ts @@ -0,0 +1,199 @@ +/** + * Example 06 — Local Model + Cloud Model Team (Ollama + Claude) + * + * Demonstrates mixing a local model served by Ollama with a cloud model + * (Claude) in the same task pipeline. The key technique is using + * `provider: 'openai'` with a custom `baseURL` pointing at Ollama's + * OpenAI-compatible endpoint. + * + * This pattern works with ANY OpenAI-compatible local server: + * - Ollama → http://localhost:11434/v1 + * - vLLM → http://localhost:8000/v1 + * - LM Studio → http://localhost:1234/v1 + * - llama.cpp → http://localhost:8080/v1 + * Just change the baseURL and model name below. + * + * Run: + * npx tsx examples/06-local-model.ts + * + * Prerequisites: + * 1. Ollama installed and running: https://ollama.com + * 2. Pull the model: ollama pull llama3.1 + * 3. ANTHROPIC_API_KEY env var must be set. + */ + +import { OpenMultiAgent } from '../src/index.js' +import type { AgentConfig, OrchestratorEvent, Task } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Agents +// --------------------------------------------------------------------------- + +/** + * Coder — uses Claude (Anthropic) for high-quality code generation. + */ +const coder: AgentConfig = { + name: 'coder', + model: 'claude-sonnet-4-6', + provider: 'anthropic', + systemPrompt: `You are a senior TypeScript developer. Write clean, well-typed, +production-quality code. Use the tools to write files to /tmp/local-model-demo/. +Always include brief JSDoc comments on exported functions.`, + tools: ['bash', 'file_write'], + maxTurns: 6, +} + +/** + * Reviewer — uses a local Ollama model via the OpenAI-compatible API. + * The apiKey is required by the OpenAI SDK but Ollama ignores it, + * so we pass the placeholder string 'ollama'. + */ +const reviewer: AgentConfig = { + name: 'reviewer', + model: 'llama3.1', + provider: 'openai', // 'openai' here means "OpenAI-compatible protocol", not the OpenAI cloud + baseURL: 'http://localhost:11434/v1', + apiKey: 'ollama', + systemPrompt: `You are a code reviewer. You read source files and produce a structured review. +Your review MUST include these sections: +- Summary (2-3 sentences) +- Strengths (bullet list) +- Issues (bullet list — or "None found" if the code is clean) +- Verdict: SHIP or NEEDS WORK + +Be specific and constructive. Reference line numbers or function names when possible.`, + tools: ['file_read'], + maxTurns: 4, +} + +// --------------------------------------------------------------------------- +// Progress handler +// --------------------------------------------------------------------------- + +const taskTimes = new Map() + +function handleProgress(event: OrchestratorEvent): void { + const ts = new Date().toISOString().slice(11, 23) + + switch (event.type) { + case 'task_start': { + taskTimes.set(event.task ?? '', Date.now()) + const task = event.data as Task | undefined + console.log(`[${ts}] TASK READY "${task?.title ?? event.task}" → ${task?.assignee ?? '?'}`) + break + } + case 'task_complete': { + const elapsed = Date.now() - (taskTimes.get(event.task ?? '') ?? Date.now()) + console.log(`[${ts}] TASK DONE task=${event.task} in ${elapsed}ms`) + break + } + case 'agent_start': + console.log(`[${ts}] AGENT START ${event.agent}`) + break + case 'agent_complete': + console.log(`[${ts}] AGENT DONE ${event.agent}`) + break + case 'error': + console.error(`[${ts}] ERROR ${event.agent ?? ''} task=${event.task ?? '?'}`) + break + } +} + +// --------------------------------------------------------------------------- +// Orchestrator + Team +// --------------------------------------------------------------------------- + +const orchestrator = new OpenMultiAgent({ + defaultModel: 'claude-sonnet-4-6', + maxConcurrency: 2, + onProgress: handleProgress, +}) + +const team = orchestrator.createTeam('local-cloud-team', { + name: 'local-cloud-team', + agents: [coder, reviewer], + sharedMemory: true, +}) + +// --------------------------------------------------------------------------- +// Task pipeline: code → review +// --------------------------------------------------------------------------- + +const OUTPUT_DIR = '/tmp/local-model-demo' + +const tasks: Array<{ + title: string + description: string + assignee?: string + dependsOn?: string[] +}> = [ + { + title: 'Write: retry utility', + description: `Write a small but complete TypeScript utility to ${OUTPUT_DIR}/retry.ts. + +The module should export: +1. A \`RetryOptions\` interface with: maxRetries (number), delayMs (number), + backoffFactor (optional number, default 2), shouldRetry (optional predicate + taking the error and returning boolean). +2. An async \`retry(fn: () => Promise, options: RetryOptions): Promise\` + function that retries \`fn\` with exponential backoff. +3. A convenience \`withRetry\` wrapper that returns a new function with retry + behaviour baked in. + +Include JSDoc comments. No external dependencies — use only Node built-ins. +After writing the file, also create a small test script at ${OUTPUT_DIR}/retry-test.ts +that exercises the happy path and a failure case, then run it with \`npx tsx\`.`, + assignee: 'coder', + }, + { + title: 'Review: retry utility', + description: `Read the files at ${OUTPUT_DIR}/retry.ts and ${OUTPUT_DIR}/retry-test.ts. + +Produce a structured code review covering: +- Summary (2-3 sentences describing the module) +- Strengths (bullet list) +- Issues (bullet list — be specific about what and why) +- Verdict: SHIP or NEEDS WORK`, + assignee: 'reviewer', + dependsOn: ['Write: retry utility'], + }, +] + +// --------------------------------------------------------------------------- +// Run +// --------------------------------------------------------------------------- + +console.log('Local + Cloud model team') +console.log(` coder → Claude (${coder.model}) via Anthropic API`) +console.log(` reviewer → Ollama (${reviewer.model}) at ${reviewer.baseURL}`) +console.log() +console.log('Pipeline: coder writes code → local model reviews it') +console.log('='.repeat(60)) + +const result = await orchestrator.runTasks(team, tasks) + +// --------------------------------------------------------------------------- +// Summary +// --------------------------------------------------------------------------- + +console.log('\n' + '='.repeat(60)) +console.log('Pipeline complete.\n') +console.log(`Overall success: ${result.success}`) +console.log(`Tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`) + +console.log('\nPer-agent summary:') +for (const [name, r] of result.agentResults) { + const icon = r.success ? 'OK ' : 'FAIL' + const provider = name === 'coder' ? 'anthropic' : 'ollama (local)' + const tools = r.toolCalls.map(c => c.toolName).join(', ') + console.log(` [${icon}] ${name.padEnd(10)} (${provider.padEnd(16)}) tools: ${tools || '(none)'}`) +} + +// Print the reviewer's output +const review = result.agentResults.get('reviewer') +if (review?.success) { + console.log('\nCode review (from local model):') + console.log('─'.repeat(60)) + console.log(review.output) + console.log('─'.repeat(60)) +} diff --git a/examples/07-fan-out-aggregate.ts b/examples/07-fan-out-aggregate.ts new file mode 100644 index 0000000..43b2c32 --- /dev/null +++ b/examples/07-fan-out-aggregate.ts @@ -0,0 +1,209 @@ +/** + * Example 07 — Fan-Out / Aggregate (MapReduce) Pattern + * + * Demonstrates: + * - Fan-out: send the same question to N "analyst" agents in parallel + * - Aggregate: a "synthesizer" agent reads all analyst outputs and produces + * a balanced final report + * - AgentPool with runParallel() for concurrent fan-out + * - No tools needed — pure LLM reasoning to keep the focus on the pattern + * + * Run: + * npx tsx examples/07-fan-out-aggregate.ts + * + * Prerequisites: + * ANTHROPIC_API_KEY env var must be set. + */ + +import { Agent, AgentPool, ToolRegistry, ToolExecutor, registerBuiltInTools } from '../src/index.js' +import type { AgentConfig, AgentRunResult } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Analysis topic +// --------------------------------------------------------------------------- + +const TOPIC = `Should a solo developer build a SaaS product that uses AI agents +for automated customer support? Consider the current state of AI technology, +market demand, competition, costs, and the unique constraints of being a solo +founder with limited time (~6 hours/day of productive work).` + +// --------------------------------------------------------------------------- +// Analyst agent configs — three perspectives on the same question +// --------------------------------------------------------------------------- + +const optimistConfig: AgentConfig = { + name: 'optimist', + model: 'claude-sonnet-4-6', + systemPrompt: `You are an optimistic technology analyst who focuses on +opportunities, upside potential, and emerging trends. You see possibilities +where others see obstacles. Back your optimism with concrete reasoning — +cite market trends, cost curves, and real capabilities. Keep your analysis +to 200-300 words.`, + maxTurns: 1, + temperature: 0.4, +} + +const skepticConfig: AgentConfig = { + name: 'skeptic', + model: 'claude-sonnet-4-6', + systemPrompt: `You are a skeptical technology analyst who focuses on risks, +challenges, failure modes, and hidden costs. You stress-test assumptions and +ask "what could go wrong?" Back your skepticism with concrete reasoning — +cite failure rates, technical limitations, and market realities. Keep your +analysis to 200-300 words.`, + maxTurns: 1, + temperature: 0.4, +} + +const pragmatistConfig: AgentConfig = { + name: 'pragmatist', + model: 'claude-sonnet-4-6', + systemPrompt: `You are a pragmatic technology analyst who focuses on practical +feasibility, execution complexity, and resource requirements. You care about +what works today, not what might work someday. You think in terms of MVPs, +timelines, and concrete tradeoffs. Keep your analysis to 200-300 words.`, + maxTurns: 1, + temperature: 0.4, +} + +const synthesizerConfig: AgentConfig = { + name: 'synthesizer', + model: 'claude-sonnet-4-6', + systemPrompt: `You are a senior strategy advisor who synthesizes multiple +perspectives into a balanced, actionable recommendation. You do not simply +summarise — you weigh the arguments, identify where they agree and disagree, +and produce a clear verdict with next steps. Structure your output as: + +1. Key agreements across perspectives +2. Key disagreements and how you weigh them +3. Verdict (go / no-go / conditional go) +4. Recommended next steps (3-5 bullet points) + +Keep the final report to 300-400 words.`, + maxTurns: 1, + temperature: 0.3, +} + +// --------------------------------------------------------------------------- +// Build agents — no tools needed for pure reasoning +// --------------------------------------------------------------------------- + +function buildAgent(config: AgentConfig): Agent { + const registry = new ToolRegistry() + registerBuiltInTools(registry) // not needed here, but safe if tools are added later + const executor = new ToolExecutor(registry) + return new Agent(config, registry, executor) +} + +const optimist = buildAgent(optimistConfig) +const skeptic = buildAgent(skepticConfig) +const pragmatist = buildAgent(pragmatistConfig) +const synthesizer = buildAgent(synthesizerConfig) + +// --------------------------------------------------------------------------- +// Set up the pool +// --------------------------------------------------------------------------- + +const pool = new AgentPool(3) // 3 analysts can run simultaneously +pool.add(optimist) +pool.add(skeptic) +pool.add(pragmatist) +pool.add(synthesizer) + +console.log('Fan-Out / Aggregate (MapReduce) Pattern') +console.log('='.repeat(60)) +console.log(`\nTopic: ${TOPIC.replace(/\n/g, ' ').trim()}\n`) + +// --------------------------------------------------------------------------- +// Step 1: Fan-out — run all 3 analysts in parallel +// --------------------------------------------------------------------------- + +console.log('[Step 1] Fan-out: 3 analysts running in parallel...\n') + +const analystResults: Map = await pool.runParallel([ + { agent: 'optimist', prompt: TOPIC }, + { agent: 'skeptic', prompt: TOPIC }, + { agent: 'pragmatist', prompt: TOPIC }, +]) + +// Print each analyst's output (truncated) +const analysts = ['optimist', 'skeptic', 'pragmatist'] as const +for (const name of analysts) { + const result = analystResults.get(name)! + const status = result.success ? 'OK' : 'FAILED' + console.log(` ${name} [${status}] — ${result.tokenUsage.output_tokens} output tokens`) + console.log(` ${result.output.slice(0, 150).replace(/\n/g, ' ')}...`) + console.log() +} + +// Check all analysts succeeded +for (const name of analysts) { + if (!analystResults.get(name)!.success) { + console.error(`Analyst '${name}' failed: ${analystResults.get(name)!.output}`) + process.exit(1) + } +} + +// --------------------------------------------------------------------------- +// Step 2: Aggregate — synthesizer reads all 3 analyses +// --------------------------------------------------------------------------- + +console.log('[Step 2] Aggregate: synthesizer producing final report...\n') + +const synthesizerPrompt = `Three analysts have independently evaluated the same question. +Read their analyses below and produce your synthesis report. + +--- OPTIMIST --- +${analystResults.get('optimist')!.output} + +--- SKEPTIC --- +${analystResults.get('skeptic')!.output} + +--- PRAGMATIST --- +${analystResults.get('pragmatist')!.output} + +Now synthesize these three perspectives into a balanced recommendation.` + +const synthResult = await pool.run('synthesizer', synthesizerPrompt) + +if (!synthResult.success) { + console.error('Synthesizer failed:', synthResult.output) + process.exit(1) +} + +// --------------------------------------------------------------------------- +// Final output +// --------------------------------------------------------------------------- + +console.log('='.repeat(60)) +console.log('SYNTHESIZED REPORT') +console.log('='.repeat(60)) +console.log() +console.log(synthResult.output) +console.log() +console.log('-'.repeat(60)) + +// --------------------------------------------------------------------------- +// Token usage comparison +// --------------------------------------------------------------------------- + +console.log('\nToken Usage Summary:') +console.log('-'.repeat(60)) + +let totalInput = 0 +let totalOutput = 0 + +for (const name of analysts) { + const r = analystResults.get(name)! + totalInput += r.tokenUsage.input_tokens + totalOutput += r.tokenUsage.output_tokens + console.log(` ${name.padEnd(12)} — input: ${r.tokenUsage.input_tokens}, output: ${r.tokenUsage.output_tokens}`) +} + +totalInput += synthResult.tokenUsage.input_tokens +totalOutput += synthResult.tokenUsage.output_tokens +console.log(` ${'synthesizer'.padEnd(12)} — input: ${synthResult.tokenUsage.input_tokens}, output: ${synthResult.tokenUsage.output_tokens}`) +console.log('-'.repeat(60)) +console.log(` ${'TOTAL'.padEnd(12)} — input: ${totalInput}, output: ${totalOutput}`) + +console.log('\nDone.') From 94cccf24d737e094659faa6fce9a0f3192cc4aa3 Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 02:18:58 +0800 Subject: [PATCH 15/38] docs: replace inline code examples with examples/ index table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove ~160 lines of duplicated code snippets from both READMEs. Link to the runnable scripts in examples/ instead — single source of truth, type-checked by npm run lint. --- README.md | 168 ++++----------------------------------------------- README_zh.md | 168 ++++----------------------------------------------- 2 files changed, 26 insertions(+), 310 deletions(-) diff --git a/README.md b/README.md index 34b0a1b..12340be 100644 --- a/README.md +++ b/README.md @@ -104,165 +104,23 @@ Tokens: 12847 output tokens -## More Examples +## Examples -
-Single Agent — one agent, one prompt +All examples are runnable scripts in [`examples/`](./examples/). Run any of them with `npx tsx`: -```typescript -import { OpenMultiAgent } from '@jackchen_me/open-multi-agent' - -const orchestrator = new OpenMultiAgent({ defaultModel: 'claude-sonnet-4-6' }) - -const result = await orchestrator.runAgent( - { - name: 'coder', - model: 'claude-sonnet-4-6', - tools: ['bash', 'file_write'], - }, - 'Write a TypeScript function that reverses a string, save it to /tmp/reverse.ts, and run it.', -) - -console.log(result.output) +```bash +npx tsx examples/01-single-agent.ts ``` -
- -
-Task Pipeline — explicit control over task graph and assignments - -```typescript -const result = await orchestrator.runTasks(team, [ - { - title: 'Design the data model', - description: 'Write a TypeScript interface spec to /tmp/spec.md', - assignee: 'architect', - }, - { - title: 'Implement the module', - description: 'Read /tmp/spec.md and implement the module in /tmp/src/', - assignee: 'developer', - dependsOn: ['Design the data model'], // blocked until design completes - }, - { - title: 'Write tests', - description: 'Read the implementation and write Vitest tests.', - assignee: 'developer', - dependsOn: ['Implement the module'], - }, - { - title: 'Review code', - description: 'Review /tmp/src/ and produce a structured code review.', - assignee: 'reviewer', - dependsOn: ['Implement the module'], // can run in parallel with tests - }, -]) -``` - -
- -
-Custom Tools — define tools with Zod schemas - -```typescript -import { z } from 'zod' -import { defineTool, Agent, ToolRegistry, ToolExecutor, registerBuiltInTools } from '@jackchen_me/open-multi-agent' - -const searchTool = defineTool({ - name: 'web_search', - description: 'Search the web and return the top results.', - inputSchema: z.object({ - query: z.string().describe('The search query.'), - maxResults: z.number().optional().describe('Number of results (default 5).'), - }), - execute: async ({ query, maxResults = 5 }) => { - const results = await mySearchProvider(query, maxResults) - return { data: JSON.stringify(results), isError: false } - }, -}) - -const registry = new ToolRegistry() -registerBuiltInTools(registry) -registry.register(searchTool) - -const executor = new ToolExecutor(registry) -const agent = new Agent( - { name: 'researcher', model: 'claude-sonnet-4-6', tools: ['web_search'] }, - registry, - executor, -) - -const result = await agent.run('Find the three most recent TypeScript releases.') -``` - -
- -
-Multi-Model Teams — mix Claude, GPT, and local models in one workflow - -```typescript -const claudeAgent: AgentConfig = { - name: 'strategist', - model: 'claude-opus-4-6', - provider: 'anthropic', - systemPrompt: 'You plan high-level approaches.', - tools: ['file_write'], -} - -const gptAgent: AgentConfig = { - name: 'implementer', - model: 'gpt-5.4', - provider: 'openai', - systemPrompt: 'You implement plans as working code.', - tools: ['bash', 'file_read', 'file_write'], -} - -// Any OpenAI-compatible API — Ollama, vLLM, LM Studio, etc. -const localAgent: AgentConfig = { - name: 'reviewer', - model: 'llama3.1', - provider: 'openai', - baseURL: 'http://localhost:11434/v1', - apiKey: 'ollama', - systemPrompt: 'You review code for correctness and clarity.', - tools: ['file_read', 'grep'], -} - -const team = orchestrator.createTeam('mixed-team', { - name: 'mixed-team', - agents: [claudeAgent, gptAgent, localAgent], - sharedMemory: true, -}) - -const result = await orchestrator.runTeam(team, 'Build a CLI tool that converts JSON to CSV.') -``` - -
- -
-Streaming Output - -```typescript -import { Agent, ToolRegistry, ToolExecutor, registerBuiltInTools } from '@jackchen_me/open-multi-agent' - -const registry = new ToolRegistry() -registerBuiltInTools(registry) -const executor = new ToolExecutor(registry) - -const agent = new Agent( - { name: 'writer', model: 'claude-sonnet-4-6', maxTurns: 3 }, - registry, - executor, -) - -for await (const event of agent.stream('Explain monads in two sentences.')) { - if (event.type === 'text' && typeof event.data === 'string') { - process.stdout.write(event.data) - } -} -``` - -
+| Example | What it shows | +|---------|---------------| +| [01 — Single Agent](examples/01-single-agent.ts) | `runAgent()` one-shot, `stream()` streaming, `prompt()` multi-turn | +| [02 — Team Collaboration](examples/02-team-collaboration.ts) | `runTeam()` auto-orchestration with coordinator pattern | +| [03 — Task Pipeline](examples/03-task-pipeline.ts) | `runTasks()` explicit dependency graph (design → implement → test + review) | +| [04 — Multi-Model Team](examples/04-multi-model-team.ts) | `defineTool()` custom tools, mixed Anthropic + OpenAI providers, `AgentPool` | +| [05 — Copilot](examples/05-copilot-test.ts) | GitHub Copilot as an LLM provider | +| [06 — Local Model](examples/06-local-model.ts) | Ollama + Claude in one pipeline via `baseURL` (works with vLLM, LM Studio, etc.) | +| [07 — Fan-Out / Aggregate](examples/07-fan-out-aggregate.ts) | `runParallel()` MapReduce — 3 analysts in parallel, then synthesize | ## Architecture diff --git a/README_zh.md b/README_zh.md index 86436dc..0f38c51 100644 --- a/README_zh.md +++ b/README_zh.md @@ -108,165 +108,23 @@ Tokens: 12847 output tokens -## 更多示例 +## 示例 -
-单智能体 — 一个智能体,一个提示词 +所有示例都是可运行脚本,位于 [`examples/`](./examples/) 目录。使用 `npx tsx` 运行: -```typescript -import { OpenMultiAgent } from '@jackchen_me/open-multi-agent' - -const orchestrator = new OpenMultiAgent({ defaultModel: 'claude-sonnet-4-6' }) - -const result = await orchestrator.runAgent( - { - name: 'coder', - model: 'claude-sonnet-4-6', - tools: ['bash', 'file_write'], - }, - 'Write a TypeScript function that reverses a string, save it to /tmp/reverse.ts, and run it.', -) - -console.log(result.output) +```bash +npx tsx examples/01-single-agent.ts ``` -
- -
-任务流水线 — 显式控制任务图和分配 - -```typescript -const result = await orchestrator.runTasks(team, [ - { - title: 'Design the data model', - description: 'Write a TypeScript interface spec to /tmp/spec.md', - assignee: 'architect', - }, - { - title: 'Implement the module', - description: 'Read /tmp/spec.md and implement the module in /tmp/src/', - assignee: 'developer', - dependsOn: ['Design the data model'], // 等待设计完成后才开始 - }, - { - title: 'Write tests', - description: 'Read the implementation and write Vitest tests.', - assignee: 'developer', - dependsOn: ['Implement the module'], - }, - { - title: 'Review code', - description: 'Review /tmp/src/ and produce a structured code review.', - assignee: 'reviewer', - dependsOn: ['Implement the module'], // 可以和测试并行执行 - }, -]) -``` - -
- -
-自定义工具 — 使用 Zod schema 定义工具 - -```typescript -import { z } from 'zod' -import { defineTool, Agent, ToolRegistry, ToolExecutor, registerBuiltInTools } from '@jackchen_me/open-multi-agent' - -const searchTool = defineTool({ - name: 'web_search', - description: 'Search the web and return the top results.', - inputSchema: z.object({ - query: z.string().describe('The search query.'), - maxResults: z.number().optional().describe('Number of results (default 5).'), - }), - execute: async ({ query, maxResults = 5 }) => { - const results = await mySearchProvider(query, maxResults) - return { data: JSON.stringify(results), isError: false } - }, -}) - -const registry = new ToolRegistry() -registerBuiltInTools(registry) -registry.register(searchTool) - -const executor = new ToolExecutor(registry) -const agent = new Agent( - { name: 'researcher', model: 'claude-sonnet-4-6', tools: ['web_search'] }, - registry, - executor, -) - -const result = await agent.run('Find the three most recent TypeScript releases.') -``` - -
- -
-多模型团队 — 在一个工作流中混合使用 Claude、GPT 和本地模型 - -```typescript -const claudeAgent: AgentConfig = { - name: 'strategist', - model: 'claude-opus-4-6', - provider: 'anthropic', - systemPrompt: 'You plan high-level approaches.', - tools: ['file_write'], -} - -const gptAgent: AgentConfig = { - name: 'implementer', - model: 'gpt-5.4', - provider: 'openai', - systemPrompt: 'You implement plans as working code.', - tools: ['bash', 'file_read', 'file_write'], -} - -// 任何 OpenAI 兼容 API — Ollama、vLLM、LM Studio 等 -const localAgent: AgentConfig = { - name: 'reviewer', - model: 'llama3.1', - provider: 'openai', - baseURL: 'http://localhost:11434/v1', - apiKey: 'ollama', - systemPrompt: 'You review code for correctness and clarity.', - tools: ['file_read', 'grep'], -} - -const team = orchestrator.createTeam('mixed-team', { - name: 'mixed-team', - agents: [claudeAgent, gptAgent, localAgent], - sharedMemory: true, -}) - -const result = await orchestrator.runTeam(team, 'Build a CLI tool that converts JSON to CSV.') -``` - -
- -
-流式输出 - -```typescript -import { Agent, ToolRegistry, ToolExecutor, registerBuiltInTools } from '@jackchen_me/open-multi-agent' - -const registry = new ToolRegistry() -registerBuiltInTools(registry) -const executor = new ToolExecutor(registry) - -const agent = new Agent( - { name: 'writer', model: 'claude-sonnet-4-6', maxTurns: 3 }, - registry, - executor, -) - -for await (const event of agent.stream('Explain monads in two sentences.')) { - if (event.type === 'text' && typeof event.data === 'string') { - process.stdout.write(event.data) - } -} -``` - -
+| 示例 | 展示内容 | +|------|----------| +| [01 — 单智能体](examples/01-single-agent.ts) | `runAgent()` 单次调用、`stream()` 流式输出、`prompt()` 多轮对话 | +| [02 — 团队协作](examples/02-team-collaboration.ts) | `runTeam()` 自动编排 + 协调者模式 | +| [03 — 任务流水线](examples/03-task-pipeline.ts) | `runTasks()` 显式依赖图(设计 → 实现 → 测试 + 评审) | +| [04 — 多模型团队](examples/04-multi-model-team.ts) | `defineTool()` 自定义工具、Anthropic + OpenAI 混合、`AgentPool` | +| [05 — Copilot](examples/05-copilot-test.ts) | GitHub Copilot 作为 LLM 提供者 | +| [06 — 本地模型](examples/06-local-model.ts) | Ollama + Claude 混合流水线,通过 `baseURL` 接入(兼容 vLLM、LM Studio 等) | +| [07 — 扇出聚合](examples/07-fan-out-aggregate.ts) | `runParallel()` MapReduce — 3 个分析师并行,然后综合 | ## 架构 From 24a2c4fe1ab31795c1d4396ad6b92bd3763459ab Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 02:20:47 +0800 Subject: [PATCH 16/38] chore: add tsx to devDependencies for running examples --- package-lock.json | 532 +++++++++++++++++++++++++++++++++++++++++++++- package.json | 5 +- 2 files changed, 533 insertions(+), 4 deletions(-) diff --git a/package-lock.json b/package-lock.json index 96f1dec..0b541e2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,11 +1,11 @@ { - "name": "maestro-agents", + "name": "@jackchen_me/open-multi-agent", "version": "0.1.0", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "maestro-agents", + "name": "@jackchen_me/open-multi-agent", "version": "0.1.0", "license": "MIT", "dependencies": { @@ -15,6 +15,7 @@ }, "devDependencies": { "@types/node": "^22.0.0", + "tsx": "^4.21.0", "typescript": "^5.6.0", "vitest": "^2.1.0" }, @@ -320,6 +321,23 @@ "node": ">=12" } }, + "node_modules/@esbuild/netbsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.7.tgz", + "integrity": "sha512-b6pqtrQdigZBwZxAn1UpazEisvwaIDvdbMbmrly7cDTMFnw/+3lVxxCTGOrkPVnsYIosJJXAsILG9XcQS+Yu6w==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, "node_modules/@esbuild/netbsd-x64": { "version": "0.21.5", "resolved": "https://registry.npmmirror.com/@esbuild/netbsd-x64/-/netbsd-x64-0.21.5.tgz", @@ -337,6 +355,23 @@ "node": ">=12" } }, + "node_modules/@esbuild/openbsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.7.tgz", + "integrity": "sha512-AFuojMQTxAz75Fo8idVcqoQWEHIXFRbOc1TrVcFSgCZtQfSdc1RXgB3tjOn/krRHENUB4j00bfGjyl2mJrU37A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, "node_modules/@esbuild/openbsd-x64": { "version": "0.21.5", "resolved": "https://registry.npmmirror.com/@esbuild/openbsd-x64/-/openbsd-x64-0.21.5.tgz", @@ -354,6 +389,23 @@ "node": ">=12" } }, + "node_modules/@esbuild/openharmony-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.7.tgz", + "integrity": "sha512-+KrvYb/C8zA9CU/g0sR6w2RBw7IGc5J2BPnc3dYc5VJxHCSF1yNMxTV5LQ7GuKteQXZtspjFbiuW5/dOj7H4Yw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": ">=18" + } + }, "node_modules/@esbuild/sunos-x64": { "version": "0.21.5", "resolved": "https://registry.npmmirror.com/@esbuild/sunos-x64/-/sunos-x64-0.21.5.tgz", @@ -1287,6 +1339,19 @@ "node": ">= 0.4" } }, + "node_modules/get-tsconfig": { + "version": "4.13.7", + "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.7.tgz", + "integrity": "sha512-7tN6rFgBlMgpBML5j8typ92BKFi2sFQvIdpAqLA2beia5avZDrMs0FLZiM5etShWq5irVyGcGMEA1jcDaK7A/Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "resolve-pkg-maps": "^1.0.0" + }, + "funding": { + "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" + } + }, "node_modules/gopd": { "version": "1.2.0", "resolved": "https://registry.npmmirror.com/gopd/-/gopd-1.2.0.tgz", @@ -1557,6 +1622,16 @@ "node": "^10 || ^12 || >=14" } }, + "node_modules/resolve-pkg-maps": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", + "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" + } + }, "node_modules/rollup": { "version": "4.60.1", "resolved": "https://registry.npmmirror.com/rollup/-/rollup-4.60.1.tgz", @@ -1683,6 +1758,459 @@ "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", "license": "MIT" }, + "node_modules/tsx": { + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz", + "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==", + "dev": true, + "license": "MIT", + "dependencies": { + "esbuild": "~0.27.0", + "get-tsconfig": "^4.7.5" + }, + "bin": { + "tsx": "dist/cli.mjs" + }, + "engines": { + "node": ">=18.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + } + }, + "node_modules/tsx/node_modules/@esbuild/aix-ppc64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.7.tgz", + "integrity": "sha512-EKX3Qwmhz1eMdEJokhALr0YiD0lhQNwDqkPYyPhiSwKrh7/4KRjQc04sZ8db+5DVVnZ1LmbNDI1uAMPEUBnQPg==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/android-arm": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.7.tgz", + "integrity": "sha512-jbPXvB4Yj2yBV7HUfE2KHe4GJX51QplCN1pGbYjvsyCZbQmies29EoJbkEc+vYuU5o45AfQn37vZlyXy4YJ8RQ==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/android-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.7.tgz", + "integrity": "sha512-62dPZHpIXzvChfvfLJow3q5dDtiNMkwiRzPylSCfriLvZeq0a1bWChrGx/BbUbPwOrsWKMn8idSllklzBy+dgQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/android-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.7.tgz", + "integrity": "sha512-x5VpMODneVDb70PYV2VQOmIUUiBtY3D3mPBG8NxVk5CogneYhkR7MmM3yR/uMdITLrC1ml/NV1rj4bMJuy9MCg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/darwin-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.7.tgz", + "integrity": "sha512-5lckdqeuBPlKUwvoCXIgI2D9/ABmPq3Rdp7IfL70393YgaASt7tbju3Ac+ePVi3KDH6N2RqePfHnXkaDtY9fkw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/darwin-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.7.tgz", + "integrity": "sha512-rYnXrKcXuT7Z+WL5K980jVFdvVKhCHhUwid+dDYQpH+qu+TefcomiMAJpIiC2EM3Rjtq0sO3StMV/+3w3MyyqQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/freebsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.7.tgz", + "integrity": "sha512-B48PqeCsEgOtzME2GbNM2roU29AMTuOIN91dsMO30t+Ydis3z/3Ngoj5hhnsOSSwNzS+6JppqWsuhTp6E82l2w==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/freebsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.7.tgz", + "integrity": "sha512-jOBDK5XEjA4m5IJK3bpAQF9/Lelu/Z9ZcdhTRLf4cajlB+8VEhFFRjWgfy3M1O4rO2GQ/b2dLwCUGpiF/eATNQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-arm": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.7.tgz", + "integrity": "sha512-RkT/YXYBTSULo3+af8Ib0ykH8u2MBh57o7q/DAs3lTJlyVQkgQvlrPTnjIzzRPQyavxtPtfg0EopvDyIt0j1rA==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.7.tgz", + "integrity": "sha512-RZPHBoxXuNnPQO9rvjh5jdkRmVizktkT7TCDkDmQ0W2SwHInKCAV95GRuvdSvA7w4VMwfCjUiPwDi0ZO6Nfe9A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-ia32": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.7.tgz", + "integrity": "sha512-GA48aKNkyQDbd3KtkplYWT102C5sn/EZTY4XROkxONgruHPU72l+gW+FfF8tf2cFjeHaRbWpOYa/uRBz/Xq1Pg==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-loong64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.7.tgz", + "integrity": "sha512-a4POruNM2oWsD4WKvBSEKGIiWQF8fZOAsycHOt6JBpZ+JN2n2JH9WAv56SOyu9X5IqAjqSIPTaJkqN8F7XOQ5Q==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-mips64el": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.7.tgz", + "integrity": "sha512-KabT5I6StirGfIz0FMgl1I+R1H73Gp0ofL9A3nG3i/cYFJzKHhouBV5VWK1CSgKvVaG4q1RNpCTR2LuTVB3fIw==", + "cpu": [ + "mips64el" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-ppc64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.7.tgz", + "integrity": "sha512-gRsL4x6wsGHGRqhtI+ifpN/vpOFTQtnbsupUF5R5YTAg+y/lKelYR1hXbnBdzDjGbMYjVJLJTd2OFmMewAgwlQ==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-riscv64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.7.tgz", + "integrity": "sha512-hL25LbxO1QOngGzu2U5xeXtxXcW+/GvMN3ejANqXkxZ/opySAZMrc+9LY/WyjAan41unrR3YrmtTsUpwT66InQ==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-s390x": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.7.tgz", + "integrity": "sha512-2k8go8Ycu1Kb46vEelhu1vqEP+UeRVj2zY1pSuPdgvbd5ykAw82Lrro28vXUrRmzEsUV0NzCf54yARIK8r0fdw==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.7.tgz", + "integrity": "sha512-hzznmADPt+OmsYzw1EE33ccA+HPdIqiCRq7cQeL1Jlq2gb1+OyWBkMCrYGBJ+sxVzve2ZJEVeePbLM2iEIZSxA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/netbsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.7.tgz", + "integrity": "sha512-OfatkLojr6U+WN5EDYuoQhtM+1xco+/6FSzJJnuWiUw5eVcicbyK3dq5EeV/QHT1uy6GoDhGbFpprUiHUYggrw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/openbsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.7.tgz", + "integrity": "sha512-+A1NJmfM8WNDv5CLVQYJ5PshuRm/4cI6WMZRg1by1GwPIQPCTs1GLEUHwiiQGT5zDdyLiRM/l1G0Pv54gvtKIg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/sunos-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.7.tgz", + "integrity": "sha512-ikktIhFBzQNt/QDyOL580ti9+5mL/YZeUPKU2ivGtGjdTYoqz6jObj6nOMfhASpS4GU4Q/Clh1QtxWAvcYKamA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/win32-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.7.tgz", + "integrity": "sha512-7yRhbHvPqSpRUV7Q20VuDwbjW5kIMwTHpptuUzV+AA46kiPze5Z7qgt6CLCK3pWFrHeNfDd1VKgyP4O+ng17CA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/win32-ia32": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.7.tgz", + "integrity": "sha512-SmwKXe6VHIyZYbBLJrhOoCJRB/Z1tckzmgTLfFYOfpMAx63BJEaL9ExI8x7v0oAO3Zh6D/Oi1gVxEYr5oUCFhw==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/win32-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.7.tgz", + "integrity": "sha512-56hiAJPhwQ1R4i+21FVF7V8kSD5zZTdHcVuRFMW0hn753vVfQN8xlx4uOPT4xoGH0Z/oVATuR82AiqSTDIpaHg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/esbuild": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.7.tgz", + "integrity": "sha512-IxpibTjyVnmrIQo5aqNpCgoACA/dTKLTlhMHihVHhdkxKyPO1uBBthumT0rdHmcsk9uMonIWS0m4FljWzILh3w==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.27.7", + "@esbuild/android-arm": "0.27.7", + "@esbuild/android-arm64": "0.27.7", + "@esbuild/android-x64": "0.27.7", + "@esbuild/darwin-arm64": "0.27.7", + "@esbuild/darwin-x64": "0.27.7", + "@esbuild/freebsd-arm64": "0.27.7", + "@esbuild/freebsd-x64": "0.27.7", + "@esbuild/linux-arm": "0.27.7", + "@esbuild/linux-arm64": "0.27.7", + "@esbuild/linux-ia32": "0.27.7", + "@esbuild/linux-loong64": "0.27.7", + "@esbuild/linux-mips64el": "0.27.7", + "@esbuild/linux-ppc64": "0.27.7", + "@esbuild/linux-riscv64": "0.27.7", + "@esbuild/linux-s390x": "0.27.7", + "@esbuild/linux-x64": "0.27.7", + "@esbuild/netbsd-arm64": "0.27.7", + "@esbuild/netbsd-x64": "0.27.7", + "@esbuild/openbsd-arm64": "0.27.7", + "@esbuild/openbsd-x64": "0.27.7", + "@esbuild/openharmony-arm64": "0.27.7", + "@esbuild/sunos-x64": "0.27.7", + "@esbuild/win32-arm64": "0.27.7", + "@esbuild/win32-ia32": "0.27.7", + "@esbuild/win32-x64": "0.27.7" + } + }, "node_modules/typescript": { "version": "5.9.3", "resolved": "https://registry.npmmirror.com/typescript/-/typescript-5.9.3.tgz", diff --git a/package.json b/package.json index ee0e26a..b185a8c 100644 --- a/package.json +++ b/package.json @@ -42,8 +42,9 @@ "zod": "^3.23.0" }, "devDependencies": { + "@types/node": "^22.0.0", + "tsx": "^4.21.0", "typescript": "^5.6.0", - "vitest": "^2.1.0", - "@types/node": "^22.0.0" + "vitest": "^2.1.0" } } From 8d27c6a1fe2c0ad698c178ec2e63a0e947eca53d Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 02:47:02 +0800 Subject: [PATCH 17/38] docs: add supported providers section and clarify contributing guidelines - Add Supported Providers table with 4 verified providers (Anthropic, OpenAI, Copilot, Ollama) and note that other OpenAI-compatible providers are unverified - Update Contributing to distinguish baseURL verification (#25) from new adapters - Note that local models via Ollama require no API key in Quick Start --- README.md | 15 +++++++++++++-- README_zh.md | 15 +++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 12340be..edef1d4 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Requires Node.js >= 18. npm install @jackchen_me/open-multi-agent ``` -Set `ANTHROPIC_API_KEY` (and optionally `OPENAI_API_KEY` or `GITHUB_TOKEN` for Copilot) in your environment. +Set `ANTHROPIC_API_KEY` (and optionally `OPENAI_API_KEY` or `GITHUB_TOKEN` for Copilot) in your environment. Local models via Ollama require no API key — see [example 06](examples/06-local-model.ts). Three agents, one goal — the framework handles the rest: @@ -173,11 +173,22 @@ npx tsx examples/01-single-agent.ts | `file_edit` | Edit a file by replacing an exact string match. | | `grep` | Search file contents with regex. Uses ripgrep when available, falls back to Node.js. | +## Supported Providers + +| Provider | Config | Env var | Status | +|----------|--------|---------|--------| +| Anthropic (Claude) | `provider: 'anthropic'` | `ANTHROPIC_API_KEY` | Verified | +| OpenAI (GPT) | `provider: 'openai'` | `OPENAI_API_KEY` | Verified | +| GitHub Copilot | `provider: 'copilot'` | `GITHUB_TOKEN` | Verified | +| Ollama / vLLM / LM Studio | `provider: 'openai'` + `baseURL` | — | Verified | + +Any OpenAI-compatible API should work via `provider: 'openai'` + `baseURL` (DeepSeek, Groq, Mistral, Qwen, MiniMax, etc.). These providers have not been fully verified yet — contributions welcome via [#25](https://github.com/JackChen-me/open-multi-agent/issues/25). + ## Contributing Issues, feature requests, and PRs are welcome. Some areas where contributions would be especially valuable: -- **LLM Adapters** — Anthropic, OpenAI, and Copilot are supported out of the box. Any OpenAI-compatible API (Ollama, vLLM, LM Studio, etc.) works via `baseURL`. Additional adapters for Gemini and other providers are welcome. The `LLMAdapter` interface requires just two methods: `chat()` and `stream()`. +- **Provider integrations** — Verify and document OpenAI-compatible providers (DeepSeek, Groq, Qwen, MiniMax, etc.) via `baseURL`. See [#25](https://github.com/JackChen-me/open-multi-agent/issues/25). For providers that are NOT OpenAI-compatible (e.g. Gemini), a new `LLMAdapter` implementation is welcome — the interface requires just two methods: `chat()` and `stream()`. - **Examples** — Real-world workflows and use cases. - **Documentation** — Guides, tutorials, and API docs. diff --git a/README_zh.md b/README_zh.md index 0f38c51..4cf7a00 100644 --- a/README_zh.md +++ b/README_zh.md @@ -26,7 +26,7 @@ npm install @jackchen_me/open-multi-agent ``` -在环境变量中设置 `ANTHROPIC_API_KEY`(以及可选的 `OPENAI_API_KEY` 或用于 Copilot 的 `GITHUB_TOKEN`)。 +在环境变量中设置 `ANTHROPIC_API_KEY`(以及可选的 `OPENAI_API_KEY` 或用于 Copilot 的 `GITHUB_TOKEN`)。通过 Ollama 使用本地模型无需 API key — 参见 [example 06](examples/06-local-model.ts)。 三个智能体,一个目标——框架处理剩下的一切: @@ -177,11 +177,22 @@ npx tsx examples/01-single-agent.ts | `file_edit` | 通过精确字符串匹配编辑文件。 | | `grep` | 使用正则表达式搜索文件内容。优先使用 ripgrep,回退到 Node.js 实现。 | +## 支持的 Provider + +| Provider | 配置 | 环境变量 | 状态 | +|----------|------|----------|------| +| Anthropic (Claude) | `provider: 'anthropic'` | `ANTHROPIC_API_KEY` | 已验证 | +| OpenAI (GPT) | `provider: 'openai'` | `OPENAI_API_KEY` | 已验证 | +| GitHub Copilot | `provider: 'copilot'` | `GITHUB_TOKEN` | 已验证 | +| Ollama / vLLM / LM Studio | `provider: 'openai'` + `baseURL` | — | 已验证 | + +任何 OpenAI 兼容 API 均可通过 `provider: 'openai'` + `baseURL` 接入(DeepSeek、Groq、Mistral、Qwen、MiniMax 等)。这些 Provider 尚未完整验证——欢迎通过 [#25](https://github.com/JackChen-me/open-multi-agent/issues/25) 贡献验证。 + ## 参与贡献 欢迎提 Issue、功能需求和 PR。以下方向的贡献尤其有价值: -- **LLM 适配器** — Anthropic、OpenAI、Copilot 已原生支持。任何 OpenAI 兼容 API(Ollama、vLLM、LM Studio 等)可通过 `baseURL` 直接使用。欢迎贡献 Gemini 等其他适配器。`LLMAdapter` 接口只需实现两个方法:`chat()` 和 `stream()`。 +- **Provider 集成** — 验证并文档化 OpenAI 兼容 Provider(DeepSeek、Groq、Qwen、MiniMax 等)通过 `baseURL` 接入。详见 [#25](https://github.com/JackChen-me/open-multi-agent/issues/25)。对于非 OpenAI 兼容的 Provider(如 Gemini),欢迎贡献新的 `LLMAdapter` 实现——接口只需两个方法:`chat()` 和 `stream()`。 - **示例** — 真实场景的工作流和用例。 - **文档** — 指南、教程和 API 文档。 From d8a217106f535bd1c1c6c2c06a1239d495a7b4a6 Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 03:02:56 +0800 Subject: [PATCH 18/38] docs: add DECISIONS.md recording deliberate "won't do" choices Document 5 features we evaluated and chose not to implement (handoffs, checkpointing, A2A, MCP, dashboard) to maintain our "simplest multi-agent framework" positioning. Closes #17, #20. --- DECISIONS.md | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 DECISIONS.md diff --git a/DECISIONS.md b/DECISIONS.md new file mode 100644 index 0000000..a16151f --- /dev/null +++ b/DECISIONS.md @@ -0,0 +1,43 @@ +# Architecture Decisions + +This document records deliberate "won't do" decisions for the project. These are features we evaluated and chose NOT to implement — not because they're bad ideas, but because they conflict with our positioning as the **simplest multi-agent framework**. + +If you're considering a PR in any of these areas, please open a discussion first. + +## Won't Do + +### 1. Agent Handoffs + +**What**: Agent A transfers an in-progress conversation to Agent B (like OpenAI Agents SDK `handoff()`). + +**Why not**: Handoffs are a different paradigm from our task-based model. Our tasks have clear boundaries — one agent, one task, one result. Handoffs blur those boundaries and add state-transfer complexity. Users who need handoffs likely need a different framework (OpenAI Agents SDK is purpose-built for this). + +### 2. State Persistence / Checkpointing + +**What**: Save workflow state to a database so long-running workflows can resume after crashes (like LangGraph checkpointing). + +**Why not**: Requires a storage backend (SQLite, Redis, Postgres), schema migrations, and serialization logic. This is enterprise infrastructure — it triples the complexity surface. Our target users run workflows that complete in seconds to minutes, not hours. If you need checkpointing, LangGraph is the right tool. + +**Related**: Closing #20 with this rationale. + +### 3. A2A Protocol (Agent-to-Agent) + +**What**: Google's open protocol for agents on different servers to discover and communicate with each other. + +**Why not**: Too early — the spec is still evolving and adoption is minimal. Our users run agents in a single process, not across distributed services. If A2A matures and there's real demand, we can revisit. Today it would add complexity for zero practical benefit. + +### 4. MCP Integration (Model Context Protocol) + +**What**: Anthropic's protocol for connecting LLMs to external tools and data sources. + +**Why not**: MCP is valuable but targets a different layer. Our `defineTool()` API already lets users wrap any external service as a tool in ~10 lines of code. Adding MCP would mean maintaining protocol compatibility, transport layers, and tool discovery — complexity that serves tool platform builders, not our target users who just want to run agent teams. + +### 5. Dashboard / Visualization + +**What**: Built-in web UI to visualize task DAGs, agent activity, and token usage. + +**Why not**: We expose data, we don't build UI. The `onProgress` callback and upcoming `onTrace` (#18) give users all the raw data. They can pipe it into Grafana, build a custom dashboard, or use console logs. Shipping a web UI means owning a frontend stack, which is outside our scope. + +--- + +*Last updated: 2026-04-03* From f9fcac0c3810d3a6cea77b2a012a1fdbdc001a9e Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 09:12:44 +0800 Subject: [PATCH 19/38] feat: add Gemma 4 local model example with tool-calling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add examples/08-gemma4-local.ts demonstrating a pure-local multi-agent team using Gemma 4 via Ollama — zero API cost. Two agents (researcher + summarizer) collaborate through a task pipeline with bash, file_write, and file_read tools. Verified on gemma4:e2b with Ollama 0.20.0-rc1. Update both READMEs: add example 08 to the examples table and note Gemma 4 as a verified local model with tool-calling support. --- README.md | 3 + README_zh.md | 3 + examples/08-gemma4-local.ts | 203 ++++++++++++++++++++++++++++++++++++ 3 files changed, 209 insertions(+) create mode 100644 examples/08-gemma4-local.ts diff --git a/README.md b/README.md index edef1d4..a74723f 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,7 @@ npx tsx examples/01-single-agent.ts | [05 — Copilot](examples/05-copilot-test.ts) | GitHub Copilot as an LLM provider | | [06 — Local Model](examples/06-local-model.ts) | Ollama + Claude in one pipeline via `baseURL` (works with vLLM, LM Studio, etc.) | | [07 — Fan-Out / Aggregate](examples/07-fan-out-aggregate.ts) | `runParallel()` MapReduce — 3 analysts in parallel, then synthesize | +| [08 — Gemma 4 Local](examples/08-gemma4-local.ts) | Pure-local Gemma 4 agent team with tool-calling — zero API cost | ## Architecture @@ -182,6 +183,8 @@ npx tsx examples/01-single-agent.ts | GitHub Copilot | `provider: 'copilot'` | `GITHUB_TOKEN` | Verified | | Ollama / vLLM / LM Studio | `provider: 'openai'` + `baseURL` | — | Verified | +Verified local models with tool-calling: **Gemma 4** (see [example 08](examples/08-gemma4-local.ts)). + Any OpenAI-compatible API should work via `provider: 'openai'` + `baseURL` (DeepSeek, Groq, Mistral, Qwen, MiniMax, etc.). These providers have not been fully verified yet — contributions welcome via [#25](https://github.com/JackChen-me/open-multi-agent/issues/25). ## Contributing diff --git a/README_zh.md b/README_zh.md index 4cf7a00..76c5d2f 100644 --- a/README_zh.md +++ b/README_zh.md @@ -125,6 +125,7 @@ npx tsx examples/01-single-agent.ts | [05 — Copilot](examples/05-copilot-test.ts) | GitHub Copilot 作为 LLM 提供者 | | [06 — 本地模型](examples/06-local-model.ts) | Ollama + Claude 混合流水线,通过 `baseURL` 接入(兼容 vLLM、LM Studio 等) | | [07 — 扇出聚合](examples/07-fan-out-aggregate.ts) | `runParallel()` MapReduce — 3 个分析师并行,然后综合 | +| [08 — Gemma 4 本地](examples/08-gemma4-local.ts) | 纯本地 Gemma 4 智能体团队 + tool-calling — 零 API 费用 | ## 架构 @@ -186,6 +187,8 @@ npx tsx examples/01-single-agent.ts | GitHub Copilot | `provider: 'copilot'` | `GITHUB_TOKEN` | 已验证 | | Ollama / vLLM / LM Studio | `provider: 'openai'` + `baseURL` | — | 已验证 | +已验证支持 tool-calling 的本地模型:**Gemma 4**(见[示例 08](examples/08-gemma4-local.ts))。 + 任何 OpenAI 兼容 API 均可通过 `provider: 'openai'` + `baseURL` 接入(DeepSeek、Groq、Mistral、Qwen、MiniMax 等)。这些 Provider 尚未完整验证——欢迎通过 [#25](https://github.com/JackChen-me/open-multi-agent/issues/25) 贡献验证。 ## 参与贡献 diff --git a/examples/08-gemma4-local.ts b/examples/08-gemma4-local.ts new file mode 100644 index 0000000..0dd8087 --- /dev/null +++ b/examples/08-gemma4-local.ts @@ -0,0 +1,203 @@ +/** + * Example 08 — Gemma 4 Local Agent Team (100% Local, Zero API Cost) + * + * Demonstrates a fully local multi-agent team using Google's Gemma 4 via + * Ollama. No cloud API keys needed — everything runs on your machine. + * + * Two agents collaborate through a task pipeline: + * - researcher: uses bash + file_write to gather system info and write a report + * - summarizer: uses file_read to read the report and produce a concise summary + * + * This pattern works with any Ollama model that supports tool-calling. + * Gemma 4 (released 2026-04-02) has native tool-calling support. + * + * Run: + * no_proxy=localhost npx tsx examples/08-gemma4-local.ts + * + * Prerequisites: + * 1. Ollama >= 0.20.0 installed and running: https://ollama.com + * 2. Pull the model: ollama pull gemma4:e2b + * (or gemma4:e4b for better quality on machines with more RAM) + * 3. No API keys needed! + * + * Note: The no_proxy=localhost prefix is needed if you have an HTTP proxy + * configured, since the OpenAI SDK would otherwise route Ollama requests + * through the proxy. + */ + +import { OpenMultiAgent } from '../src/index.js' +import type { AgentConfig, OrchestratorEvent, Task } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Configuration — change this to match your Ollama setup +// --------------------------------------------------------------------------- + +// See available tags at https://ollama.com/library/gemma4 +const OLLAMA_MODEL = 'gemma4:e2b' // or 'gemma4:e4b', 'gemma4:26b' +const OLLAMA_BASE_URL = 'http://localhost:11434/v1' +const OUTPUT_DIR = '/tmp/gemma4-demo' + +// --------------------------------------------------------------------------- +// Agents — both use Gemma 4 locally +// --------------------------------------------------------------------------- + +/** + * Researcher — gathers system information using shell commands. + */ +const researcher: AgentConfig = { + name: 'researcher', + model: OLLAMA_MODEL, + provider: 'openai', + baseURL: OLLAMA_BASE_URL, + apiKey: 'ollama', // placeholder — Ollama ignores this, but the OpenAI SDK requires a non-empty value + systemPrompt: `You are a system researcher. Your job is to gather information +about the current machine using shell commands and write a structured report. + +Use the bash tool to run commands like: uname -a, df -h, uptime, and similar +non-destructive read-only commands. +On macOS you can also use: sw_vers, sysctl -n hw.memsize. +On Linux you can also use: cat /etc/os-release, free -h. + +Then use file_write to save a Markdown report to ${OUTPUT_DIR}/system-report.md. +The report should have sections: OS, Hardware, Disk, and Uptime. +Be concise — one or two lines per section is enough.`, + tools: ['bash', 'file_write'], + maxTurns: 8, +} + +/** + * Summarizer — reads the report and writes a one-paragraph executive summary. + */ +const summarizer: AgentConfig = { + name: 'summarizer', + model: OLLAMA_MODEL, + provider: 'openai', + baseURL: OLLAMA_BASE_URL, + apiKey: 'ollama', + systemPrompt: `You are a technical writer. Read the system report file provided, +then produce a concise one-paragraph executive summary (3-5 sentences). +Focus on the key highlights: what OS, how much RAM, disk status, and uptime.`, + tools: ['file_read'], + maxTurns: 4, +} + +// --------------------------------------------------------------------------- +// Progress handler +// --------------------------------------------------------------------------- + +const taskTimes = new Map() + +function handleProgress(event: OrchestratorEvent): void { + const ts = new Date().toISOString().slice(11, 23) + + switch (event.type) { + case 'task_start': { + taskTimes.set(event.task ?? '', Date.now()) + const task = event.data as Task | undefined + console.log(`[${ts}] TASK START "${task?.title ?? event.task}" → ${task?.assignee ?? '?'}`) + break + } + case 'task_complete': { + const elapsed = Date.now() - (taskTimes.get(event.task ?? '') ?? Date.now()) + console.log(`[${ts}] TASK DONE "${event.task}" in ${(elapsed / 1000).toFixed(1)}s`) + break + } + case 'agent_start': + console.log(`[${ts}] AGENT START ${event.agent}`) + break + case 'agent_complete': + console.log(`[${ts}] AGENT DONE ${event.agent}`) + break + case 'error': + console.error(`[${ts}] ERROR ${event.agent ?? ''} task=${event.task ?? '?'}`) + break + } +} + +// --------------------------------------------------------------------------- +// Orchestrator + Team +// --------------------------------------------------------------------------- + +const orchestrator = new OpenMultiAgent({ + defaultModel: OLLAMA_MODEL, + maxConcurrency: 1, // run agents sequentially — local model can only serve one at a time + onProgress: handleProgress, +}) + +const team = orchestrator.createTeam('gemma4-team', { + name: 'gemma4-team', + agents: [researcher, summarizer], + sharedMemory: true, +}) + +// --------------------------------------------------------------------------- +// Task pipeline: research → summarize +// --------------------------------------------------------------------------- + +const tasks: Array<{ + title: string + description: string + assignee?: string + dependsOn?: string[] +}> = [ + { + title: 'Gather system information', + description: `Use bash to run system info commands (uname -a, sw_vers, sysctl, df -h, uptime). +Then write a structured Markdown report to ${OUTPUT_DIR}/system-report.md with sections: +OS, Hardware, Disk, and Uptime.`, + assignee: 'researcher', + }, + { + title: 'Summarize the report', + description: `Read the file at ${OUTPUT_DIR}/system-report.md. +Produce a concise one-paragraph executive summary of the system information.`, + assignee: 'summarizer', + dependsOn: ['Gather system information'], + }, +] + +// --------------------------------------------------------------------------- +// Run +// --------------------------------------------------------------------------- + +console.log('Gemma 4 Local Agent Team — Zero API Cost') +console.log('='.repeat(60)) +console.log(` model → ${OLLAMA_MODEL} via Ollama`) +console.log(` researcher → bash + file_write`) +console.log(` summarizer → file_read`) +console.log(` output dir → ${OUTPUT_DIR}`) +console.log() +console.log('Pipeline: researcher gathers info → summarizer writes summary') +console.log('='.repeat(60)) + +const start = Date.now() +const result = await orchestrator.runTasks(team, tasks) +const totalTime = Date.now() - start + +// --------------------------------------------------------------------------- +// Summary +// --------------------------------------------------------------------------- + +console.log('\n' + '='.repeat(60)) +console.log('Pipeline complete.\n') +console.log(`Overall success: ${result.success}`) +console.log(`Total time: ${(totalTime / 1000).toFixed(1)}s`) +console.log(`Tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`) + +console.log('\nPer-agent results:') +for (const [name, r] of result.agentResults) { + const icon = r.success ? 'OK ' : 'FAIL' + const tools = r.toolCalls.map(c => c.toolName).join(', ') + console.log(` [${icon}] ${name.padEnd(12)} tools: ${tools || '(none)'}`) +} + +// Print the summarizer's output +const summary = result.agentResults.get('summarizer') +if (summary?.success) { + console.log('\nExecutive Summary (from local Gemma 4):') + console.log('-'.repeat(60)) + console.log(summary.output) + console.log('-'.repeat(60)) +} + +console.log('\nAll processing done locally. $0 API cost.') From d86ea766d3c92d73044c96a5be44a81d15d6aa99 Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 09:28:45 +0800 Subject: [PATCH 20/38] feat: add Gemma 4 auto-orchestration example (runTeam) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add examples/09-gemma4-auto-orchestration.ts demonstrating runTeam() with Gemma 4 as the coordinator — the framework's key feature running fully local. The coordinator successfully decomposes goals into JSON task arrays, schedules dependencies, and synthesises results. Verified on gemma4:e2b (5.1B params) with Ollama 0.20.0-rc1. --- README.md | 1 + README_zh.md | 1 + examples/09-gemma4-auto-orchestration.ts | 162 +++++++++++++++++++++++ 3 files changed, 164 insertions(+) create mode 100644 examples/09-gemma4-auto-orchestration.ts diff --git a/README.md b/README.md index a74723f..e1871d1 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,7 @@ npx tsx examples/01-single-agent.ts | [06 — Local Model](examples/06-local-model.ts) | Ollama + Claude in one pipeline via `baseURL` (works with vLLM, LM Studio, etc.) | | [07 — Fan-Out / Aggregate](examples/07-fan-out-aggregate.ts) | `runParallel()` MapReduce — 3 analysts in parallel, then synthesize | | [08 — Gemma 4 Local](examples/08-gemma4-local.ts) | Pure-local Gemma 4 agent team with tool-calling — zero API cost | +| [09 — Gemma 4 Auto-Orchestration](examples/09-gemma4-auto-orchestration.ts) | `runTeam()` with Gemma 4 as coordinator — auto task decomposition, fully local | ## Architecture diff --git a/README_zh.md b/README_zh.md index 76c5d2f..362d0c5 100644 --- a/README_zh.md +++ b/README_zh.md @@ -126,6 +126,7 @@ npx tsx examples/01-single-agent.ts | [06 — 本地模型](examples/06-local-model.ts) | Ollama + Claude 混合流水线,通过 `baseURL` 接入(兼容 vLLM、LM Studio 等) | | [07 — 扇出聚合](examples/07-fan-out-aggregate.ts) | `runParallel()` MapReduce — 3 个分析师并行,然后综合 | | [08 — Gemma 4 本地](examples/08-gemma4-local.ts) | 纯本地 Gemma 4 智能体团队 + tool-calling — 零 API 费用 | +| [09 — Gemma 4 自动编排](examples/09-gemma4-auto-orchestration.ts) | `runTeam()` 用 Gemma 4 当 coordinator — 自动任务拆解,完全本地 | ## 架构 diff --git a/examples/09-gemma4-auto-orchestration.ts b/examples/09-gemma4-auto-orchestration.ts new file mode 100644 index 0000000..185ede7 --- /dev/null +++ b/examples/09-gemma4-auto-orchestration.ts @@ -0,0 +1,162 @@ +/** + * Example 09 — Gemma 4 Auto-Orchestration (runTeam, 100% Local) + * + * Demonstrates the framework's key feature — automatic task decomposition — + * powered entirely by a local Gemma 4 model. No cloud API needed. + * + * What happens: + * 1. A Gemma 4 "coordinator" receives the goal + agent roster + * 2. It outputs a structured JSON task array (title, description, assignee, dependsOn) + * 3. The framework resolves dependencies, schedules tasks, and runs agents + * 4. The coordinator synthesises all task results into a final answer + * + * This is the hardest test for a local model — it must produce valid JSON + * for task decomposition AND do tool-calling for actual task execution. + * Gemma 4 e2b (5.1B params) handles both reliably. + * + * Run: + * no_proxy=localhost npx tsx examples/09-gemma4-auto-orchestration.ts + * + * Prerequisites: + * 1. Ollama >= 0.20.0 installed and running: https://ollama.com + * 2. Pull the model: ollama pull gemma4:e2b + * 3. No API keys needed! + * + * Note: The no_proxy=localhost prefix is needed if you have an HTTP proxy + * configured, since the OpenAI SDK would otherwise route Ollama requests + * through the proxy. + */ + +import { OpenMultiAgent } from '../src/index.js' +import type { AgentConfig, OrchestratorEvent, Task } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Configuration +// --------------------------------------------------------------------------- + +// See available tags at https://ollama.com/library/gemma4 +const OLLAMA_MODEL = 'gemma4:e2b' // or 'gemma4:e4b', 'gemma4:26b' +const OLLAMA_BASE_URL = 'http://localhost:11434/v1' + +// --------------------------------------------------------------------------- +// Agents — the coordinator is created automatically by runTeam() +// --------------------------------------------------------------------------- + +const researcher: AgentConfig = { + name: 'researcher', + model: OLLAMA_MODEL, + provider: 'openai', + baseURL: OLLAMA_BASE_URL, + apiKey: 'ollama', + systemPrompt: `You are a system researcher. Use bash to run non-destructive, +read-only commands and report the results concisely.`, + tools: ['bash'], + maxTurns: 4, +} + +const writer: AgentConfig = { + name: 'writer', + model: OLLAMA_MODEL, + provider: 'openai', + baseURL: OLLAMA_BASE_URL, + apiKey: 'ollama', + systemPrompt: `You are a technical writer. Use file_write to create clear, +structured Markdown reports based on the information provided.`, + tools: ['file_write'], + maxTurns: 4, +} + +// --------------------------------------------------------------------------- +// Progress handler +// --------------------------------------------------------------------------- + +function handleProgress(event: OrchestratorEvent): void { + const ts = new Date().toISOString().slice(11, 23) + switch (event.type) { + case 'task_start': { + const task = event.data as Task | undefined + console.log(`[${ts}] TASK START "${task?.title ?? event.task}" → ${task?.assignee ?? '?'}`) + break + } + case 'task_complete': + console.log(`[${ts}] TASK DONE "${event.task}"`) + break + case 'agent_start': + console.log(`[${ts}] AGENT START ${event.agent}`) + break + case 'agent_complete': + console.log(`[${ts}] AGENT DONE ${event.agent}`) + break + case 'error': + console.error(`[${ts}] ERROR ${event.agent ?? ''} task=${event.task ?? '?'}`) + break + } +} + +// --------------------------------------------------------------------------- +// Orchestrator — defaultModel is used for the coordinator agent +// --------------------------------------------------------------------------- + +const orchestrator = new OpenMultiAgent({ + defaultModel: OLLAMA_MODEL, + defaultProvider: 'openai', + defaultBaseURL: OLLAMA_BASE_URL, + defaultApiKey: 'ollama', + maxConcurrency: 1, // local model serves one request at a time + onProgress: handleProgress, +}) + +const team = orchestrator.createTeam('gemma4-auto', { + name: 'gemma4-auto', + agents: [researcher, writer], + sharedMemory: true, +}) + +// --------------------------------------------------------------------------- +// Give a goal — the framework handles the rest +// --------------------------------------------------------------------------- + +const goal = `Check this machine's Node.js version, npm version, and OS info, +then write a short Markdown summary report to /tmp/gemma4-auto/report.md` + +console.log('Gemma 4 Auto-Orchestration — Zero API Cost') +console.log('='.repeat(60)) +console.log(` model → ${OLLAMA_MODEL} via Ollama (all agents + coordinator)`) +console.log(` researcher → bash`) +console.log(` writer → file_write`) +console.log(` coordinator → auto-created by runTeam()`) +console.log() +console.log(`Goal: ${goal.replace(/\n/g, ' ').trim()}`) +console.log('='.repeat(60)) + +const start = Date.now() +const result = await orchestrator.runTeam(team, goal) +const totalTime = Date.now() - start + +// --------------------------------------------------------------------------- +// Results +// --------------------------------------------------------------------------- + +console.log('\n' + '='.repeat(60)) +console.log('Pipeline complete.\n') +console.log(`Overall success: ${result.success}`) +console.log(`Total time: ${(totalTime / 1000).toFixed(1)}s`) +console.log(`Tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`) + +console.log('\nPer-agent results:') +for (const [name, r] of result.agentResults) { + const icon = r.success ? 'OK ' : 'FAIL' + const tools = r.toolCalls.length > 0 ? r.toolCalls.map(c => c.toolName).join(', ') : '(none)' + console.log(` [${icon}] ${name.padEnd(24)} tools: ${tools}`) +} + +// Print the coordinator's final synthesis +const coordResult = result.agentResults.get('coordinator') +if (coordResult?.success) { + console.log('\nFinal synthesis (from local Gemma 4 coordinator):') + console.log('-'.repeat(60)) + console.log(coordResult.output) + console.log('-'.repeat(60)) +} + +console.log('\nAll processing done locally. $0 API cost.') From 4fc7bb3f85c1eedffbe5d3b9eae573b5d9bedbee Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 09:38:22 +0800 Subject: [PATCH 21/38] docs: add Gemma 4 to Model Agnostic feature description --- README.md | 2 +- README_zh.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e1871d1..223e919 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Build AI agent teams that decompose goals into tasks automatically. Define agent - **Auto Task Decomposition** — Describe a goal in plain text. A built-in coordinator agent breaks it into a task DAG with dependencies and assignees — no manual orchestration needed. - **Multi-Agent Teams** — Define agents with different roles, tools, and even different models. They collaborate through a message bus and shared memory. - **Task DAG Scheduling** — Tasks have dependencies. The framework resolves them topologically — dependent tasks wait, independent tasks run in parallel. -- **Model Agnostic** — Claude, GPT, and local models (Ollama, vLLM, LM Studio) in the same team. Swap models per agent via `baseURL`. +- **Model Agnostic** — Claude, GPT, Gemma 4, and local models (Ollama, vLLM, LM Studio) in the same team. Swap models per agent via `baseURL`. - **In-Process Execution** — No subprocess overhead. Everything runs in one Node.js process. Deploy to serverless, Docker, CI/CD. ## Quick Start diff --git a/README_zh.md b/README_zh.md index 362d0c5..9c71047 100644 --- a/README_zh.md +++ b/README_zh.md @@ -15,7 +15,7 @@ - **自动任务拆解** — 用自然语言描述目标,内置的协调者智能体自动将其拆解为带依赖关系和分配的任务图——无需手动编排。 - **多智能体团队** — 定义不同角色、工具甚至不同模型的智能体。它们通过消息总线和共享内存协作。 - **任务 DAG 调度** — 任务之间存在依赖关系。框架进行拓扑排序——有依赖的任务等待,无依赖的任务并行执行。 -- **模型无关** — Claude、GPT 和本地模型(Ollama、vLLM、LM Studio)可以在同一个团队中使用。通过 `baseURL` 即可接入任何 OpenAI 兼容服务。 +- **模型无关** — Claude、GPT、Gemma 4 和本地模型(Ollama、vLLM、LM Studio)可以在同一个团队中使用。通过 `baseURL` 即可接入任何 OpenAI 兼容服务。 - **进程内执行** — 没有子进程开销。所有内容在一个 Node.js 进程中运行。可部署到 Serverless、Docker、CI/CD。 ## 快速开始 From 37bd56b19359940267b00a6480824233d6e73e56 Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 09:56:17 +0800 Subject: [PATCH 22/38] docs: refresh Star History chart cache --- README.md | 2 +- README_zh.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 223e919..d8116d6 100644 --- a/README.md +++ b/README.md @@ -198,7 +198,7 @@ Issues, feature requests, and PRs are welcome. Some areas where contributions wo ## Star History -[![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260402b)](https://star-history.com/#JackChen-me/open-multi-agent&Date) +[![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260403)](https://star-history.com/#JackChen-me/open-multi-agent&Date) ## License diff --git a/README_zh.md b/README_zh.md index 9c71047..d2efc21 100644 --- a/README_zh.md +++ b/README_zh.md @@ -202,7 +202,7 @@ npx tsx examples/01-single-agent.ts ## Star 趋势 -[![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260402b)](https://star-history.com/#JackChen-me/open-multi-agent&Date) +[![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260403)](https://star-history.com/#JackChen-me/open-multi-agent&Date) ## 许可证 From 0db0a4d869ff54abe635ee5492cc73270f4c3df2 Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 09:57:18 +0800 Subject: [PATCH 23/38] docs: adapt Star History chart for dark mode --- README.md | 8 +++++++- README_zh.md | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d8116d6..a57d458 100644 --- a/README.md +++ b/README.md @@ -198,7 +198,13 @@ Issues, feature requests, and PRs are welcome. Some areas where contributions wo ## Star History -[![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260403)](https://star-history.com/#JackChen-me/open-multi-agent&Date) + + + + + Star History Chart + + ## License diff --git a/README_zh.md b/README_zh.md index d2efc21..1ad0167 100644 --- a/README_zh.md +++ b/README_zh.md @@ -202,7 +202,13 @@ npx tsx examples/01-single-agent.ts ## Star 趋势 -[![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260403)](https://star-history.com/#JackChen-me/open-multi-agent&Date) + + + + + Star History Chart + + ## 许可证 From 277dac2fe4be835d805d1aa320fc7aabbd9a9b77 Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 12:09:41 +0800 Subject: [PATCH 24/38] docs: add Code of Conduct and issue templates Add Contributor Covenant Code of Conduct and GitHub issue templates (bug report + feature request) to reach 100% community health score. --- .github/ISSUE_TEMPLATE/bug_report.md | 40 +++++++++++++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 23 +++++++++++ CODE_OF_CONDUCT.md | 48 +++++++++++++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 CODE_OF_CONDUCT.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..8f43f71 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,40 @@ +--- +name: Bug Report +about: Report a bug to help us improve +title: "[Bug] " +labels: bug +assignees: '' +--- + +## Describe the bug + +A clear and concise description of what the bug is. + +## To Reproduce + +Steps to reproduce the behavior: + +1. Configure agent with '...' +2. Call `runTeam(...)` with '...' +3. See error + +## Expected behavior + +A clear description of what you expected to happen. + +## Error output + +``` +Paste any error messages or logs here +``` + +## Environment + +- OS: [e.g. macOS 14, Ubuntu 22.04] +- Node.js version: [e.g. 20.11] +- Package version: [e.g. 0.1.0] +- LLM provider: [e.g. Anthropic, OpenAI] + +## Additional context + +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..c31759e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,23 @@ +--- +name: Feature Request +about: Suggest an idea for this project +title: "[Feature] " +labels: enhancement +assignees: '' +--- + +## Problem + +A clear description of the problem or limitation you're experiencing. + +## Proposed Solution + +Describe what you'd like to happen. + +## Alternatives Considered + +Any alternative solutions or features you've considered. + +## Additional context + +Add any other context, code examples, or screenshots about the feature request here. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..1036d4e --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,48 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a positive experience for everyone, regardless of background or +identity. + +## Our Standards + +Examples of behavior that contributes to a positive environment: + +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive feedback +- Focusing on what is best for the community +- Showing empathy towards other community members + +Examples of unacceptable behavior: + +- Trolling, insulting or derogatory comments, and personal attacks +- Public or private unwelcome conduct +- Publishing others' private information without explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate or harmful. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. + +## Enforcement + +Instances of unacceptable behavior may be reported to the community leaders +responsible for enforcement at **jack@yuanasi.com**. All complaints will be +reviewed and investigated promptly and fairly. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org), +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html). From e0438e3764b8315401f57b43fa3a9c8fd6a42820 Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 12:10:47 +0800 Subject: [PATCH 25/38] docs: add security policy --- SECURITY.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..235d6d9 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,17 @@ +# Security Policy + +## Supported Versions + +| Version | Supported | +|---------|-----------| +| latest | Yes | + +## Reporting a Vulnerability + +If you discover a security vulnerability, please report it responsibly via email: + +**jack@yuanasi.com** + +Please do **not** open a public GitHub issue for security vulnerabilities. + +We will acknowledge receipt within 48 hours and aim to provide a fix or mitigation plan within 7 days. From fbc5546fa14bda93d56592609a8082d88e4e7691 Mon Sep 17 00:00:00 2001 From: JackChen <26346076+JackChen-me@users.noreply.github.com> Date: Fri, 3 Apr 2026 13:45:47 +0800 Subject: [PATCH 26/38] feat: add optional outputSchema (Zod) for structured agent output (#36) When `outputSchema` is set on AgentConfig, the agent's final text output is parsed as JSON, validated against the Zod schema, and exposed via `result.structured`. On validation failure a single retry with error feedback is attempted automatically. Closes #29 --- src/agent/agent.ts | 115 ++++++++++- src/agent/structured-output.ts | 126 ++++++++++++ src/index.ts | 1 + src/orchestrator/orchestrator.ts | 4 +- src/types.ts | 12 ++ tests/structured-output.test.ts | 331 +++++++++++++++++++++++++++++++ 6 files changed, 583 insertions(+), 6 deletions(-) create mode 100644 src/agent/structured-output.ts create mode 100644 tests/structured-output.test.ts diff --git a/src/agent/agent.ts b/src/agent/agent.ts index 4ef392e..a178e37 100644 --- a/src/agent/agent.ts +++ b/src/agent/agent.ts @@ -35,7 +35,12 @@ import type { import type { ToolDefinition as FrameworkToolDefinition, ToolRegistry } from '../tool/framework.js' import type { ToolExecutor } from '../tool/executor.js' import { createAdapter } from '../llm/adapter.js' -import { AgentRunner, type RunnerOptions, type RunOptions } from './runner.js' +import { AgentRunner, type RunnerOptions, type RunOptions, type RunResult } from './runner.js' +import { + buildStructuredOutputInstruction, + extractJSON, + validateOutput, +} from './structured-output.js' // --------------------------------------------------------------------------- // Internal helpers @@ -111,9 +116,18 @@ export class Agent { const provider = this.config.provider ?? 'anthropic' const adapter = await createAdapter(provider, this.config.apiKey, this.config.baseURL) + // Append structured-output instructions when an outputSchema is configured. + let effectiveSystemPrompt = this.config.systemPrompt + if (this.config.outputSchema) { + const instruction = buildStructuredOutputInstruction(this.config.outputSchema) + effectiveSystemPrompt = effectiveSystemPrompt + ? effectiveSystemPrompt + '\n' + instruction + : instruction + } + const runnerOptions: RunnerOptions = { model: this.config.model, - systemPrompt: this.config.systemPrompt, + systemPrompt: effectiveSystemPrompt, maxTurns: this.config.maxTurns, maxTokens: this.config.maxTokens, temperature: this.config.temperature, @@ -264,10 +278,19 @@ export class Agent { } const result = await runner.run(messages, runOptions) - this.state.tokenUsage = addUsage(this.state.tokenUsage, result.tokenUsage) - this.transitionTo('completed') + // --- Structured output validation --- + if (this.config.outputSchema) { + return this.validateStructuredOutput( + messages, + result, + runner, + runOptions, + ) + } + + this.transitionTo('completed') return this.toAgentRunResult(result, true) } catch (err) { const error = err instanceof Error ? err : new Error(String(err)) @@ -279,6 +302,86 @@ export class Agent { messages: [], tokenUsage: ZERO_USAGE, toolCalls: [], + structured: undefined, + } + } + } + + /** + * Validate agent output against the configured `outputSchema`. + * On first validation failure, retry once with error feedback. + */ + private async validateStructuredOutput( + originalMessages: LLMMessage[], + result: RunResult, + runner: AgentRunner, + runOptions: RunOptions, + ): Promise { + const schema = this.config.outputSchema! + + // First attempt + let firstAttemptError: unknown + try { + const parsed = extractJSON(result.output) + const validated = validateOutput(schema, parsed) + this.transitionTo('completed') + return this.toAgentRunResult(result, true, validated) + } catch (e) { + firstAttemptError = e + } + + // Retry: send full context + error feedback + const errorMsg = firstAttemptError instanceof Error + ? firstAttemptError.message + : String(firstAttemptError) + + const retryMessages: LLMMessage[] = [ + ...originalMessages, + ...result.messages, + { + role: 'user' as const, + content: [{ + type: 'text' as const, + text: [ + 'Your previous response did not produce valid JSON matching the required schema.', + '', + `Error: ${errorMsg}`, + '', + 'Please try again. Respond with ONLY valid JSON, no other text.', + ].join('\n'), + }], + }, + ] + + const retryResult = await runner.run(retryMessages, runOptions) + this.state.tokenUsage = addUsage(this.state.tokenUsage, retryResult.tokenUsage) + + const mergedTokenUsage = addUsage(result.tokenUsage, retryResult.tokenUsage) + const mergedMessages = [...result.messages, ...retryResult.messages] + const mergedToolCalls = [...result.toolCalls, ...retryResult.toolCalls] + + try { + const parsed = extractJSON(retryResult.output) + const validated = validateOutput(schema, parsed) + this.transitionTo('completed') + return { + success: true, + output: retryResult.output, + messages: mergedMessages, + tokenUsage: mergedTokenUsage, + toolCalls: mergedToolCalls, + structured: validated, + } + } catch { + // Retry also failed + this.transitionTo('completed') + return { + success: false, + output: retryResult.output, + messages: mergedMessages, + tokenUsage: mergedTokenUsage, + toolCalls: mergedToolCalls, + structured: undefined, } } } @@ -331,8 +434,9 @@ export class Agent { // ------------------------------------------------------------------------- private toAgentRunResult( - result: import('./runner.js').RunResult, + result: RunResult, success: boolean, + structured?: unknown, ): AgentRunResult { return { success, @@ -340,6 +444,7 @@ export class Agent { messages: result.messages, tokenUsage: result.tokenUsage, toolCalls: result.toolCalls, + structured, } } diff --git a/src/agent/structured-output.ts b/src/agent/structured-output.ts new file mode 100644 index 0000000..3da0f06 --- /dev/null +++ b/src/agent/structured-output.ts @@ -0,0 +1,126 @@ +/** + * @fileoverview Structured output utilities for agent responses. + * + * Provides JSON extraction, Zod validation, and system-prompt injection so + * that agents can return typed, schema-validated output. + */ + +import { type ZodSchema } from 'zod' +import { zodToJsonSchema } from '../tool/framework.js' + +// --------------------------------------------------------------------------- +// System-prompt instruction builder +// --------------------------------------------------------------------------- + +/** + * Build a JSON-mode instruction block to append to the agent's system prompt. + * + * Converts the Zod schema to JSON Schema and formats it as a clear directive + * for the LLM to respond with valid JSON matching the schema. + */ +export function buildStructuredOutputInstruction(schema: ZodSchema): string { + const jsonSchema = zodToJsonSchema(schema) + return [ + '', + '## Output Format (REQUIRED)', + 'You MUST respond with ONLY valid JSON that conforms to the following JSON Schema.', + 'Do NOT include any text, markdown fences, or explanation outside the JSON object.', + 'Do NOT wrap the JSON in ```json code fences.', + '', + '```', + JSON.stringify(jsonSchema, null, 2), + '```', + ].join('\n') +} + +// --------------------------------------------------------------------------- +// JSON extraction +// --------------------------------------------------------------------------- + +/** + * Attempt to extract and parse JSON from the agent's raw text output. + * + * Handles three cases in order: + * 1. The output is already valid JSON (ideal case) + * 2. The output contains a ` ```json ` fenced block + * 3. The output contains a bare JSON object/array (first `{`/`[` to last `}`/`]`) + * + * @throws {Error} when no valid JSON can be extracted + */ +export function extractJSON(raw: string): unknown { + const trimmed = raw.trim() + + // Case 1: Direct parse + try { + return JSON.parse(trimmed) + } catch { + // Continue to fallback strategies + } + + // Case 2a: Prefer ```json tagged fence + const jsonFenceMatch = trimmed.match(/```json\s*([\s\S]*?)```/) + if (jsonFenceMatch?.[1]) { + try { + return JSON.parse(jsonFenceMatch[1].trim()) + } catch { + // Continue + } + } + + // Case 2b: Fall back to bare ``` fence + const bareFenceMatch = trimmed.match(/```\s*([\s\S]*?)```/) + if (bareFenceMatch?.[1]) { + try { + return JSON.parse(bareFenceMatch[1].trim()) + } catch { + // Continue + } + } + + // Case 3: Find first { to last } (object) + const objStart = trimmed.indexOf('{') + const objEnd = trimmed.lastIndexOf('}') + if (objStart !== -1 && objEnd > objStart) { + try { + return JSON.parse(trimmed.slice(objStart, objEnd + 1)) + } catch { + // Fall through + } + } + + // Case 3b: Find first [ to last ] (array) + const arrStart = trimmed.indexOf('[') + const arrEnd = trimmed.lastIndexOf(']') + if (arrStart !== -1 && arrEnd > arrStart) { + try { + return JSON.parse(trimmed.slice(arrStart, arrEnd + 1)) + } catch { + // Fall through + } + } + + throw new Error( + `Failed to extract JSON from output. Raw output begins with: "${trimmed.slice(0, 100)}"`, + ) +} + +// --------------------------------------------------------------------------- +// Zod validation +// --------------------------------------------------------------------------- + +/** + * Validate a parsed JSON value against a Zod schema. + * + * @returns The validated (and potentially transformed) value on success. + * @throws {Error} with a human-readable Zod error message on failure. + */ +export function validateOutput(schema: ZodSchema, data: unknown): unknown { + const result = schema.safeParse(data) + if (result.success) { + return result.data + } + const issues = result.error.issues + .map(issue => ` - ${issue.path.length > 0 ? issue.path.join('.') : '(root)'}: ${issue.message}`) + .join('\n') + throw new Error(`Output validation failed:\n${issues}`) +} diff --git a/src/index.ts b/src/index.ts index 814996f..fb8b6bf 100644 --- a/src/index.ts +++ b/src/index.ts @@ -63,6 +63,7 @@ export type { SchedulingStrategy } from './orchestrator/scheduler.js' // --------------------------------------------------------------------------- export { Agent } from './agent/agent.js' +export { buildStructuredOutputInstruction, extractJSON, validateOutput } from './agent/structured-output.js' export { AgentPool, Semaphore } from './agent/pool.js' export type { PoolStatus } from './agent/pool.js' diff --git a/src/orchestrator/orchestrator.ts b/src/orchestrator/orchestrator.ts index 1da8fb5..76e060f 100644 --- a/src/orchestrator/orchestrator.ts +++ b/src/orchestrator/orchestrator.ts @@ -837,13 +837,15 @@ export class OpenMultiAgent { if (!existing) { collapsed.set(agentName, result) } else { - // Merge multiple results for the same agent (multi-task case) + // Merge multiple results for the same agent (multi-task case). + // Keep the latest `structured` value (last completed task wins). collapsed.set(agentName, { success: existing.success && result.success, output: [existing.output, result.output].filter(Boolean).join('\n\n---\n\n'), messages: [...existing.messages, ...result.messages], tokenUsage: addUsage(existing.tokenUsage, result.tokenUsage), toolCalls: [...existing.toolCalls, ...result.toolCalls], + structured: result.structured ?? existing.structured, }) } diff --git a/src/types.ts b/src/types.ts index bd44065..6e76640 100644 --- a/src/types.ts +++ b/src/types.ts @@ -201,6 +201,12 @@ export interface AgentConfig { readonly maxTurns?: number readonly maxTokens?: number readonly temperature?: number + /** + * Optional Zod schema for structured output. When set, the agent's final + * output is parsed as JSON and validated against this schema. A single + * retry with error feedback is attempted on validation failure. + */ + readonly outputSchema?: ZodSchema } /** Lifecycle state tracked during an agent run. */ @@ -227,6 +233,12 @@ export interface AgentRunResult { readonly messages: LLMMessage[] readonly tokenUsage: TokenUsage readonly toolCalls: ToolCallRecord[] + /** + * Parsed and validated structured output when `outputSchema` is set on the + * agent config. `undefined` when no schema is configured or validation + * failed after retry. + */ + readonly structured?: unknown } // --------------------------------------------------------------------------- diff --git a/tests/structured-output.test.ts b/tests/structured-output.test.ts new file mode 100644 index 0000000..27f9201 --- /dev/null +++ b/tests/structured-output.test.ts @@ -0,0 +1,331 @@ +import { describe, it, expect } from 'vitest' +import { z } from 'zod' +import { + buildStructuredOutputInstruction, + extractJSON, + validateOutput, +} from '../src/agent/structured-output.js' +import { Agent } from '../src/agent/agent.js' +import { AgentRunner } from '../src/agent/runner.js' +import { ToolRegistry } from '../src/tool/framework.js' +import { ToolExecutor } from '../src/tool/executor.js' +import type { AgentConfig, LLMAdapter, LLMResponse } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Mock LLM adapter factory +// --------------------------------------------------------------------------- + +function mockAdapter(responses: string[]): LLMAdapter { + let callIndex = 0 + return { + name: 'mock', + async chat() { + const text = responses[callIndex++] ?? '' + return { + id: `mock-${callIndex}`, + content: [{ type: 'text' as const, text }], + model: 'mock-model', + stop_reason: 'end_turn', + usage: { input_tokens: 10, output_tokens: 20 }, + } satisfies LLMResponse + }, + async *stream() { + /* unused in these tests */ + }, + } +} + +// --------------------------------------------------------------------------- +// extractJSON +// --------------------------------------------------------------------------- + +describe('extractJSON', () => { + it('parses clean JSON', () => { + expect(extractJSON('{"a":1}')).toEqual({ a: 1 }) + }) + + it('parses JSON wrapped in ```json fence', () => { + const raw = 'Here is the result:\n```json\n{"a":1}\n```\nDone.' + expect(extractJSON(raw)).toEqual({ a: 1 }) + }) + + it('parses JSON wrapped in bare ``` fence', () => { + const raw = '```\n{"a":1}\n```' + expect(extractJSON(raw)).toEqual({ a: 1 }) + }) + + it('extracts embedded JSON object from surrounding text', () => { + const raw = 'The answer is {"summary":"hello","score":5} as shown above.' + expect(extractJSON(raw)).toEqual({ summary: 'hello', score: 5 }) + }) + + it('extracts JSON array', () => { + expect(extractJSON('[1,2,3]')).toEqual([1, 2, 3]) + }) + + it('extracts embedded JSON array from surrounding text', () => { + const raw = 'Here: [{"a":1},{"a":2}] end' + expect(extractJSON(raw)).toEqual([{ a: 1 }, { a: 2 }]) + }) + + it('throws on non-JSON text', () => { + expect(() => extractJSON('just plain text')).toThrow('Failed to extract JSON') + }) + + it('throws on empty string', () => { + expect(() => extractJSON('')).toThrow('Failed to extract JSON') + }) +}) + +// --------------------------------------------------------------------------- +// validateOutput +// --------------------------------------------------------------------------- + +describe('validateOutput', () => { + const schema = z.object({ + summary: z.string(), + score: z.number().min(0).max(10), + }) + + it('returns validated data on success', () => { + const data = { summary: 'hello', score: 5 } + expect(validateOutput(schema, data)).toEqual(data) + }) + + it('throws on missing field', () => { + expect(() => validateOutput(schema, { summary: 'hello' })).toThrow( + 'Output validation failed', + ) + }) + + it('throws on wrong type', () => { + expect(() => + validateOutput(schema, { summary: 'hello', score: 'not a number' }), + ).toThrow('Output validation failed') + }) + + it('throws on value out of range', () => { + expect(() => + validateOutput(schema, { summary: 'hello', score: 99 }), + ).toThrow('Output validation failed') + }) + + it('applies Zod transforms', () => { + const transformSchema = z.object({ + name: z.string().transform(s => s.toUpperCase()), + }) + const result = validateOutput(transformSchema, { name: 'alice' }) + expect(result).toEqual({ name: 'ALICE' }) + }) + + it('strips unknown keys with strict schema', () => { + const strictSchema = z.object({ a: z.number() }).strict() + expect(() => + validateOutput(strictSchema, { a: 1, b: 2 }), + ).toThrow('Output validation failed') + }) + + it('shows (root) for root-level errors', () => { + const stringSchema = z.string() + expect(() => validateOutput(stringSchema, 42)).toThrow('(root)') + }) +}) + +// --------------------------------------------------------------------------- +// buildStructuredOutputInstruction +// --------------------------------------------------------------------------- + +describe('buildStructuredOutputInstruction', () => { + it('includes the JSON Schema representation', () => { + const schema = z.object({ + summary: z.string(), + score: z.number(), + }) + const instruction = buildStructuredOutputInstruction(schema) + + expect(instruction).toContain('Output Format (REQUIRED)') + expect(instruction).toContain('"type": "object"') + expect(instruction).toContain('"summary"') + expect(instruction).toContain('"score"') + expect(instruction).toContain('ONLY valid JSON') + }) + + it('includes description from Zod schema', () => { + const schema = z.object({ + name: z.string().describe('The person name'), + }) + const instruction = buildStructuredOutputInstruction(schema) + expect(instruction).toContain('The person name') + }) +}) + +// --------------------------------------------------------------------------- +// Agent integration (mocked LLM) +// --------------------------------------------------------------------------- + +/** + * Build an Agent with a mocked LLM adapter by injecting an AgentRunner + * directly into the Agent's private `runner` field, bypassing `createAdapter`. + */ +function buildMockAgent(config: AgentConfig, responses: string[]): Agent { + const adapter = mockAdapter(responses) + const registry = new ToolRegistry() + const executor = new ToolExecutor(registry) + const agent = new Agent(config, registry, executor) + + // Inject a pre-built runner so `getRunner()` returns it without calling createAdapter. + const runner = new AgentRunner(adapter, registry, executor, { + model: config.model, + systemPrompt: config.systemPrompt, + maxTurns: config.maxTurns, + maxTokens: config.maxTokens, + temperature: config.temperature, + agentName: config.name, + }) + ;(agent as any).runner = runner + + return agent +} + +describe('Agent structured output (end-to-end)', () => { + const schema = z.object({ + summary: z.string(), + sentiment: z.enum(['positive', 'negative', 'neutral']), + confidence: z.number().min(0).max(1), + }) + + const baseConfig: AgentConfig = { + name: 'test-agent', + model: 'mock-model', + systemPrompt: 'You are a test agent.', + outputSchema: schema, + } + + it('happy path: valid JSON on first attempt', async () => { + const validJSON = JSON.stringify({ + summary: 'Great product', + sentiment: 'positive', + confidence: 0.95, + }) + + const agent = buildMockAgent(baseConfig, [validJSON]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(true) + expect(result.structured).toEqual({ + summary: 'Great product', + sentiment: 'positive', + confidence: 0.95, + }) + }) + + it('retry: invalid first attempt, valid second attempt', async () => { + const invalidJSON = JSON.stringify({ + summary: 'Great product', + sentiment: 'INVALID_VALUE', + confidence: 0.95, + }) + const validJSON = JSON.stringify({ + summary: 'Great product', + sentiment: 'positive', + confidence: 0.95, + }) + + const agent = buildMockAgent(baseConfig, [invalidJSON, validJSON]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(true) + expect(result.structured).toEqual({ + summary: 'Great product', + sentiment: 'positive', + confidence: 0.95, + }) + // Token usage should reflect both attempts + expect(result.tokenUsage.input_tokens).toBe(20) // 10 + 10 + expect(result.tokenUsage.output_tokens).toBe(40) // 20 + 20 + }) + + it('both attempts fail: success=false, structured=undefined', async () => { + const bad1 = '{"summary": "ok", "sentiment": "WRONG"}' + const bad2 = '{"summary": "ok", "sentiment": "ALSO_WRONG"}' + + const agent = buildMockAgent(baseConfig, [bad1, bad2]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(false) + expect(result.structured).toBeUndefined() + }) + + it('no outputSchema: original behavior, structured is undefined', async () => { + const configNoSchema: AgentConfig = { + name: 'plain-agent', + model: 'mock-model', + systemPrompt: 'You are a test agent.', + } + + const agent = buildMockAgent(configNoSchema, ['Just plain text output']) + const result = await agent.run('Hello') + + expect(result.success).toBe(true) + expect(result.output).toBe('Just plain text output') + expect(result.structured).toBeUndefined() + }) + + it('handles JSON wrapped in markdown fence', async () => { + const fenced = '```json\n{"summary":"ok","sentiment":"neutral","confidence":0.5}\n```' + + const agent = buildMockAgent(baseConfig, [fenced]) + const result = await agent.run('Analyze') + + expect(result.success).toBe(true) + expect(result.structured).toEqual({ + summary: 'ok', + sentiment: 'neutral', + confidence: 0.5, + }) + }) + + it('non-JSON output triggers retry, valid JSON on retry succeeds', async () => { + const nonJSON = 'I am not sure how to analyze this.' + const validJSON = JSON.stringify({ + summary: 'Uncertain', + sentiment: 'neutral', + confidence: 0.1, + }) + + const agent = buildMockAgent(baseConfig, [nonJSON, validJSON]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(true) + expect(result.structured).toEqual({ + summary: 'Uncertain', + sentiment: 'neutral', + confidence: 0.1, + }) + }) + + it('non-JSON output on both attempts: success=false', async () => { + const agent = buildMockAgent(baseConfig, [ + 'Sorry, I cannot do that.', + 'Still cannot do it.', + ]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(false) + expect(result.structured).toBeUndefined() + }) + + it('token usage on first-attempt success reflects single call only', async () => { + const validJSON = JSON.stringify({ + summary: 'Good', + sentiment: 'positive', + confidence: 0.9, + }) + + const agent = buildMockAgent(baseConfig, [validJSON]) + const result = await agent.run('Analyze') + + expect(result.tokenUsage.input_tokens).toBe(10) + expect(result.tokenUsage.output_tokens).toBe(20) + }) +}) From 99b028dc1dcc6c0763d5c1e1228687e43c8ee3c3 Mon Sep 17 00:00:00 2001 From: JackChen <26346076+JackChen-me@users.noreply.github.com> Date: Fri, 3 Apr 2026 14:08:27 +0800 Subject: [PATCH 27/38] fix: address Codex review for structured output (#36) (#38) - Include error feedback user turn in mergedMessages to maintain alternating user/assistant roles required by Anthropic API - Use explicit undefined check instead of ?? for structured merge to preserve null as a valid structured output value --- src/agent/agent.ts | 32 ++++++++++++++++++-------------- src/orchestrator/orchestrator.ts | 2 +- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/agent/agent.ts b/src/agent/agent.ts index a178e37..df6b7df 100644 --- a/src/agent/agent.ts +++ b/src/agent/agent.ts @@ -335,29 +335,33 @@ export class Agent { ? firstAttemptError.message : String(firstAttemptError) + const errorFeedbackMessage: LLMMessage = { + role: 'user' as const, + content: [{ + type: 'text' as const, + text: [ + 'Your previous response did not produce valid JSON matching the required schema.', + '', + `Error: ${errorMsg}`, + '', + 'Please try again. Respond with ONLY valid JSON, no other text.', + ].join('\n'), + }], + } + const retryMessages: LLMMessage[] = [ ...originalMessages, ...result.messages, - { - role: 'user' as const, - content: [{ - type: 'text' as const, - text: [ - 'Your previous response did not produce valid JSON matching the required schema.', - '', - `Error: ${errorMsg}`, - '', - 'Please try again. Respond with ONLY valid JSON, no other text.', - ].join('\n'), - }], - }, + errorFeedbackMessage, ] const retryResult = await runner.run(retryMessages, runOptions) this.state.tokenUsage = addUsage(this.state.tokenUsage, retryResult.tokenUsage) const mergedTokenUsage = addUsage(result.tokenUsage, retryResult.tokenUsage) - const mergedMessages = [...result.messages, ...retryResult.messages] + // Include the error feedback turn to maintain alternating user/assistant roles, + // which is required by Anthropic's API for subsequent prompt() calls. + const mergedMessages = [...result.messages, errorFeedbackMessage, ...retryResult.messages] const mergedToolCalls = [...result.toolCalls, ...retryResult.toolCalls] try { diff --git a/src/orchestrator/orchestrator.ts b/src/orchestrator/orchestrator.ts index 76e060f..3f44792 100644 --- a/src/orchestrator/orchestrator.ts +++ b/src/orchestrator/orchestrator.ts @@ -845,7 +845,7 @@ export class OpenMultiAgent { messages: [...existing.messages, ...result.messages], tokenUsage: addUsage(existing.tokenUsage, result.tokenUsage), toolCalls: [...existing.toolCalls, ...result.toolCalls], - structured: result.structured ?? existing.structured, + structured: result.structured !== undefined ? result.structured : existing.structured, }) } From 4d7564b71a720a64507876d1c99feae8f79c2702 Mon Sep 17 00:00:00 2001 From: JackChen <26346076+JackChen-me@users.noreply.github.com> Date: Fri, 3 Apr 2026 14:08:36 +0800 Subject: [PATCH 28/38] feat: task-level retry with exponential backoff (#37) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add task-level retry with exponential backoff Add `maxRetries`, `retryDelayMs`, and `retryBackoff` to task config. When a task fails and retries remain, the orchestrator waits with exponential backoff and re-runs the task with a fresh agent conversation. A `task_retry` event is emitted via `onProgress` for observability. Cascade failure only occurs after all retries are exhausted. Closes #30 * fix: address review — extract executeWithRetry, add delay cap, fix tests - Extract `executeWithRetry()` as a testable exported function - Add `computeRetryDelay()` with 30s max cap (prevents runaway backoff) - Remove retry fields from `ParsedTaskSpec` (dead code for runTeam path) - Deduplicate retry event emission (single code path for both error types) - Injectable delay function for test determinism - Rewrite tests to call the real `executeWithRetry`, not a copy - 15 tests covering: success, retry+success, retry+failure, backoff calculation, delay cap, delay function injection, no-retry default * fix: clamp negative maxRetries/retryBackoff to safe values - maxRetries clamped to >= 0 (negative values treated as no retry) - retryBackoff clamped to >= 1 (prevents zero/negative delay oscillation) - retryDelayMs clamped to >= 0 - Add tests for negative maxRetries and negative backoff Addresses Codex review P1 on #37 * fix: accumulate token usage across retry attempts Previously only the final attempt's tokenUsage was returned, causing under-reporting of actual model consumption when retries occurred. Now all attempts' token counts are summed in the returned result. Addresses Codex review P2 (token usage) on #37 --- src/index.ts | 2 +- src/orchestrator/orchestrator.ts | 180 ++++++++++++--- src/task/task.ts | 6 + src/types.ts | 7 + tests/task-retry.test.ts | 368 +++++++++++++++++++++++++++++++ 5 files changed, 528 insertions(+), 35 deletions(-) create mode 100644 tests/task-retry.test.ts diff --git a/src/index.ts b/src/index.ts index fb8b6bf..f624707 100644 --- a/src/index.ts +++ b/src/index.ts @@ -54,7 +54,7 @@ // Orchestrator (primary entry point) // --------------------------------------------------------------------------- -export { OpenMultiAgent } from './orchestrator/orchestrator.js' +export { OpenMultiAgent, executeWithRetry, computeRetryDelay } from './orchestrator/orchestrator.js' export { Scheduler } from './orchestrator/scheduler.js' export type { SchedulingStrategy } from './orchestrator/scheduler.js' diff --git a/src/orchestrator/orchestrator.ts b/src/orchestrator/orchestrator.ts index 3f44792..9d6d857 100644 --- a/src/orchestrator/orchestrator.ts +++ b/src/orchestrator/orchestrator.ts @@ -92,6 +92,104 @@ function buildAgent(config: AgentConfig): Agent { return new Agent(config, registry, executor) } +/** Promise-based delay. */ +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)) +} + +/** Maximum delay cap to prevent runaway exponential backoff (30 seconds). */ +const MAX_RETRY_DELAY_MS = 30_000 + +/** + * Compute the retry delay for a given attempt, capped at {@link MAX_RETRY_DELAY_MS}. + */ +export function computeRetryDelay( + baseDelay: number, + backoff: number, + attempt: number, +): number { + return Math.min(baseDelay * backoff ** (attempt - 1), MAX_RETRY_DELAY_MS) +} + +/** + * Execute an agent task with optional retry and exponential backoff. + * + * Exported for testability — called internally by {@link executeQueue}. + * + * @param run - The function that executes the task (typically `pool.run`). + * @param task - The task to execute (retry config read from its fields). + * @param onRetry - Called before each retry sleep with event data. + * @param delayFn - Injectable delay function (defaults to real `sleep`). + * @returns The final {@link AgentRunResult} from the last attempt. + */ +export async function executeWithRetry( + run: () => Promise, + task: Task, + onRetry?: (data: { attempt: number; maxAttempts: number; error: string; nextDelayMs: number }) => void, + delayFn: (ms: number) => Promise = sleep, +): Promise { + const maxAttempts = Math.max(0, task.maxRetries ?? 0) + 1 + const baseDelay = Math.max(0, task.retryDelayMs ?? 1000) + const backoff = Math.max(1, task.retryBackoff ?? 2) + + let lastError: string = '' + // Accumulate token usage across all attempts so billing/observability + // reflects the true cost of retries. + let totalUsage: TokenUsage = { input_tokens: 0, output_tokens: 0 } + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + try { + const result = await run() + totalUsage = { + input_tokens: totalUsage.input_tokens + result.tokenUsage.input_tokens, + output_tokens: totalUsage.output_tokens + result.tokenUsage.output_tokens, + } + + if (result.success) { + return { ...result, tokenUsage: totalUsage } + } + lastError = result.output + + // Failure — retry or give up + if (attempt < maxAttempts) { + const delay = computeRetryDelay(baseDelay, backoff, attempt) + onRetry?.({ attempt, maxAttempts, error: lastError, nextDelayMs: delay }) + await delayFn(delay) + continue + } + + return { ...result, tokenUsage: totalUsage } + } catch (err) { + lastError = err instanceof Error ? err.message : String(err) + + if (attempt < maxAttempts) { + const delay = computeRetryDelay(baseDelay, backoff, attempt) + onRetry?.({ attempt, maxAttempts, error: lastError, nextDelayMs: delay }) + await delayFn(delay) + continue + } + + // All retries exhausted — return a failure result + return { + success: false, + output: lastError, + messages: [], + tokenUsage: totalUsage, + toolCalls: [], + } + } + } + + // Should not be reached, but TypeScript needs a return + return { + success: false, + output: lastError, + messages: [], + tokenUsage: totalUsage, + toolCalls: [], + } +} + // --------------------------------------------------------------------------- // Parsed task spec (result of coordinator decomposition) // --------------------------------------------------------------------------- @@ -239,49 +337,50 @@ async function executeQueue( // Build the prompt: inject shared memory context + task description const prompt = await buildTaskPrompt(task, team) - try { - const result = await pool.run(assignee, prompt) - ctx.agentResults.set(`${assignee}:${task.id}`, result) - - if (result.success) { - // Persist result into shared memory so other agents can read it - const sharedMem = team.getSharedMemoryInstance() - if (sharedMem) { - await sharedMem.write(assignee, `task:${task.id}:result`, result.output) - } - - queue.complete(task.id, result.output) - + const result = await executeWithRetry( + () => pool.run(assignee, prompt), + task, + (retryData) => { config.onProgress?.({ - type: 'task_complete', + type: 'task_retry', task: task.id, agent: assignee, - data: result, + data: retryData, } satisfies OrchestratorEvent) + }, + ) - config.onProgress?.({ - type: 'agent_complete', - agent: assignee, - task: task.id, - data: result, - } satisfies OrchestratorEvent) - } else { - queue.fail(task.id, result.output) - config.onProgress?.({ - type: 'error', - task: task.id, - agent: assignee, - data: result, - } satisfies OrchestratorEvent) + ctx.agentResults.set(`${assignee}:${task.id}`, result) + + if (result.success) { + // Persist result into shared memory so other agents can read it + const sharedMem = team.getSharedMemoryInstance() + if (sharedMem) { + await sharedMem.write(assignee, `task:${task.id}:result`, result.output) } - } catch (err) { - const message = err instanceof Error ? err.message : String(err) - queue.fail(task.id, message) + + queue.complete(task.id, result.output) + + config.onProgress?.({ + type: 'task_complete', + task: task.id, + agent: assignee, + data: result, + } satisfies OrchestratorEvent) + + config.onProgress?.({ + type: 'agent_complete', + agent: assignee, + task: task.id, + data: result, + } satisfies OrchestratorEvent) + } else { + queue.fail(task.id, result.output) config.onProgress?.({ type: 'error', task: task.id, agent: assignee, - data: err, + data: result, } satisfies OrchestratorEvent) } }) @@ -574,6 +673,9 @@ export class OpenMultiAgent { description: string assignee?: string dependsOn?: string[] + maxRetries?: number + retryDelayMs?: number + retryBackoff?: number }>, ): Promise { const agentConfigs = team.getAgents() @@ -586,6 +688,9 @@ export class OpenMultiAgent { description: t.description, assignee: t.assignee, dependsOn: t.dependsOn, + maxRetries: t.maxRetries, + retryDelayMs: t.retryDelayMs, + retryBackoff: t.retryBackoff, })), agentConfigs, queue, @@ -743,7 +848,11 @@ export class OpenMultiAgent { * then resolving them to real IDs before adding tasks to the queue. */ private loadSpecsIntoQueue( - specs: ReadonlyArray, + specs: ReadonlyArray, agentConfigs: AgentConfig[], queue: TaskQueue, ): void { @@ -760,6 +869,9 @@ export class OpenMultiAgent { assignee: spec.assignee && agentNames.has(spec.assignee) ? spec.assignee : undefined, + maxRetries: spec.maxRetries, + retryDelayMs: spec.retryDelayMs, + retryBackoff: spec.retryBackoff, }) titleToId.set(spec.title.toLowerCase().trim(), task.id) createdTasks.push(task) diff --git a/src/task/task.ts b/src/task/task.ts index 9a11476..d74e70b 100644 --- a/src/task/task.ts +++ b/src/task/task.ts @@ -31,6 +31,9 @@ export function createTask(input: { description: string assignee?: string dependsOn?: string[] + maxRetries?: number + retryDelayMs?: number + retryBackoff?: number }): Task { const now = new Date() return { @@ -43,6 +46,9 @@ export function createTask(input: { result: undefined, createdAt: now, updatedAt: now, + maxRetries: input.maxRetries, + retryDelayMs: input.retryDelayMs, + retryBackoff: input.retryBackoff, } } diff --git a/src/types.ts b/src/types.ts index 6e76640..bd2ce64 100644 --- a/src/types.ts +++ b/src/types.ts @@ -281,6 +281,12 @@ export interface Task { result?: string readonly createdAt: Date updatedAt: Date + /** Maximum number of retry attempts on failure (default: 0 — no retry). */ + readonly maxRetries?: number + /** Base delay in ms before the first retry (default: 1000). */ + readonly retryDelayMs?: number + /** Exponential backoff multiplier (default: 2). */ + readonly retryBackoff?: number } // --------------------------------------------------------------------------- @@ -294,6 +300,7 @@ export interface OrchestratorEvent { | 'agent_complete' | 'task_start' | 'task_complete' + | 'task_retry' | 'message' | 'error' readonly agent?: string diff --git a/tests/task-retry.test.ts b/tests/task-retry.test.ts new file mode 100644 index 0000000..56bdb76 --- /dev/null +++ b/tests/task-retry.test.ts @@ -0,0 +1,368 @@ +import { describe, it, expect, vi } from 'vitest' +import { createTask } from '../src/task/task.js' +import { executeWithRetry, computeRetryDelay } from '../src/orchestrator/orchestrator.js' +import type { AgentRunResult } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +const SUCCESS_RESULT: AgentRunResult = { + success: true, + output: 'done', + messages: [], + tokenUsage: { input_tokens: 10, output_tokens: 20 }, + toolCalls: [], +} + +const FAILURE_RESULT: AgentRunResult = { + success: false, + output: 'agent failed', + messages: [], + tokenUsage: { input_tokens: 10, output_tokens: 20 }, + toolCalls: [], +} + +/** No-op delay for tests. */ +const noDelay = () => Promise.resolve() + +// --------------------------------------------------------------------------- +// computeRetryDelay +// --------------------------------------------------------------------------- + +describe('computeRetryDelay', () => { + it('computes exponential backoff', () => { + expect(computeRetryDelay(1000, 2, 1)).toBe(1000) // 1000 * 2^0 + expect(computeRetryDelay(1000, 2, 2)).toBe(2000) // 1000 * 2^1 + expect(computeRetryDelay(1000, 2, 3)).toBe(4000) // 1000 * 2^2 + }) + + it('caps at 30 seconds', () => { + // 1000 * 2^20 = 1,048,576,000 — way over cap + expect(computeRetryDelay(1000, 2, 21)).toBe(30_000) + }) + + it('handles backoff of 1 (constant delay)', () => { + expect(computeRetryDelay(500, 1, 1)).toBe(500) + expect(computeRetryDelay(500, 1, 5)).toBe(500) + }) +}) + +// --------------------------------------------------------------------------- +// createTask: retry fields +// --------------------------------------------------------------------------- + +describe('createTask with retry fields', () => { + it('passes through retry config', () => { + const t = createTask({ + title: 'Retry task', + description: 'test', + maxRetries: 3, + retryDelayMs: 500, + retryBackoff: 1.5, + }) + expect(t.maxRetries).toBe(3) + expect(t.retryDelayMs).toBe(500) + expect(t.retryBackoff).toBe(1.5) + }) + + it('defaults retry fields to undefined', () => { + const t = createTask({ title: 'No retry', description: 'test' }) + expect(t.maxRetries).toBeUndefined() + expect(t.retryDelayMs).toBeUndefined() + expect(t.retryBackoff).toBeUndefined() + }) +}) + +// --------------------------------------------------------------------------- +// executeWithRetry — tests the real exported function +// --------------------------------------------------------------------------- + +describe('executeWithRetry', () => { + it('succeeds on first attempt with no retry config', async () => { + const run = vi.fn().mockResolvedValue(SUCCESS_RESULT) + const task = createTask({ title: 'Simple', description: 'test' }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(true) + expect(result.output).toBe('done') + expect(run).toHaveBeenCalledTimes(1) + }) + + it('succeeds on first attempt even when maxRetries > 0', async () => { + const run = vi.fn().mockResolvedValue(SUCCESS_RESULT) + const task = createTask({ + title: 'Has retries', + description: 'test', + maxRetries: 3, + }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(true) + expect(run).toHaveBeenCalledTimes(1) + }) + + it('retries on exception and succeeds on second attempt', async () => { + const run = vi.fn() + .mockRejectedValueOnce(new Error('transient error')) + .mockResolvedValueOnce(SUCCESS_RESULT) + + const task = createTask({ + title: 'Retry task', + description: 'test', + maxRetries: 2, + retryDelayMs: 100, + retryBackoff: 2, + }) + + const retryEvents: unknown[] = [] + const result = await executeWithRetry( + run, + task, + (data) => retryEvents.push(data), + noDelay, + ) + + expect(result.success).toBe(true) + expect(run).toHaveBeenCalledTimes(2) + expect(retryEvents).toHaveLength(1) + expect(retryEvents[0]).toEqual({ + attempt: 1, + maxAttempts: 3, + error: 'transient error', + nextDelayMs: 100, // 100 * 2^0 + }) + }) + + it('retries on success:false and succeeds on second attempt', async () => { + const run = vi.fn() + .mockResolvedValueOnce(FAILURE_RESULT) + .mockResolvedValueOnce(SUCCESS_RESULT) + + const task = createTask({ + title: 'Retry task', + description: 'test', + maxRetries: 1, + retryDelayMs: 50, + }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(true) + expect(run).toHaveBeenCalledTimes(2) + }) + + it('exhausts all retries on persistent exception', async () => { + const run = vi.fn().mockRejectedValue(new Error('persistent error')) + + const task = createTask({ + title: 'Always fails', + description: 'test', + maxRetries: 2, + retryDelayMs: 10, + retryBackoff: 1, + }) + + const retryEvents: unknown[] = [] + const result = await executeWithRetry( + run, + task, + (data) => retryEvents.push(data), + noDelay, + ) + + expect(result.success).toBe(false) + expect(result.output).toBe('persistent error') + expect(run).toHaveBeenCalledTimes(3) // 1 initial + 2 retries + expect(retryEvents).toHaveLength(2) + }) + + it('exhausts all retries on persistent success:false', async () => { + const run = vi.fn().mockResolvedValue(FAILURE_RESULT) + + const task = createTask({ + title: 'Always fails', + description: 'test', + maxRetries: 1, + }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(false) + expect(result.output).toBe('agent failed') + expect(run).toHaveBeenCalledTimes(2) + }) + + it('emits correct exponential backoff delays', async () => { + const run = vi.fn().mockRejectedValue(new Error('error')) + + const task = createTask({ + title: 'Backoff test', + description: 'test', + maxRetries: 3, + retryDelayMs: 100, + retryBackoff: 2, + }) + + const retryEvents: Array<{ nextDelayMs: number }> = [] + await executeWithRetry( + run, + task, + (data) => retryEvents.push(data), + noDelay, + ) + + expect(retryEvents).toHaveLength(3) + expect(retryEvents[0]!.nextDelayMs).toBe(100) // 100 * 2^0 + expect(retryEvents[1]!.nextDelayMs).toBe(200) // 100 * 2^1 + expect(retryEvents[2]!.nextDelayMs).toBe(400) // 100 * 2^2 + }) + + it('no retry events when maxRetries is 0 (default)', async () => { + const run = vi.fn().mockRejectedValue(new Error('fail')) + const task = createTask({ title: 'No retry', description: 'test' }) + + const retryEvents: unknown[] = [] + const result = await executeWithRetry( + run, + task, + (data) => retryEvents.push(data), + noDelay, + ) + + expect(result.success).toBe(false) + expect(run).toHaveBeenCalledTimes(1) + expect(retryEvents).toHaveLength(0) + }) + + it('calls the delay function with computed delay', async () => { + const run = vi.fn() + .mockRejectedValueOnce(new Error('error')) + .mockResolvedValueOnce(SUCCESS_RESULT) + + const task = createTask({ + title: 'Delay test', + description: 'test', + maxRetries: 1, + retryDelayMs: 250, + retryBackoff: 3, + }) + + const mockDelay = vi.fn().mockResolvedValue(undefined) + await executeWithRetry(run, task, undefined, mockDelay) + + expect(mockDelay).toHaveBeenCalledTimes(1) + expect(mockDelay).toHaveBeenCalledWith(250) // 250 * 3^0 + }) + + it('caps delay at 30 seconds', async () => { + const run = vi.fn() + .mockRejectedValueOnce(new Error('error')) + .mockResolvedValueOnce(SUCCESS_RESULT) + + const task = createTask({ + title: 'Cap test', + description: 'test', + maxRetries: 1, + retryDelayMs: 50_000, + retryBackoff: 2, + }) + + const mockDelay = vi.fn().mockResolvedValue(undefined) + await executeWithRetry(run, task, undefined, mockDelay) + + expect(mockDelay).toHaveBeenCalledWith(30_000) // capped + }) + + it('accumulates token usage across retry attempts', async () => { + const failResult: AgentRunResult = { + ...FAILURE_RESULT, + tokenUsage: { input_tokens: 100, output_tokens: 50 }, + } + const successResult: AgentRunResult = { + ...SUCCESS_RESULT, + tokenUsage: { input_tokens: 200, output_tokens: 80 }, + } + + const run = vi.fn() + .mockResolvedValueOnce(failResult) + .mockResolvedValueOnce(failResult) + .mockResolvedValueOnce(successResult) + + const task = createTask({ + title: 'Token test', + description: 'test', + maxRetries: 2, + retryDelayMs: 10, + }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(true) + // 100+100+200 input, 50+50+80 output + expect(result.tokenUsage.input_tokens).toBe(400) + expect(result.tokenUsage.output_tokens).toBe(180) + }) + + it('accumulates token usage even when all retries fail', async () => { + const failResult: AgentRunResult = { + ...FAILURE_RESULT, + tokenUsage: { input_tokens: 50, output_tokens: 30 }, + } + + const run = vi.fn().mockResolvedValue(failResult) + + const task = createTask({ + title: 'Token fail test', + description: 'test', + maxRetries: 1, + }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(false) + // 50+50 input, 30+30 output (2 attempts) + expect(result.tokenUsage.input_tokens).toBe(100) + expect(result.tokenUsage.output_tokens).toBe(60) + }) + + it('clamps negative maxRetries to 0 (single attempt)', async () => { + const run = vi.fn().mockRejectedValue(new Error('fail')) + + const task = createTask({ + title: 'Negative retry', + description: 'test', + maxRetries: -5, + }) + // Manually set negative value since createTask doesn't validate + ;(task as any).maxRetries = -5 + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(false) + expect(run).toHaveBeenCalledTimes(1) // exactly 1 attempt, no retries + }) + + it('clamps backoff below 1 to 1 (constant delay)', async () => { + const run = vi.fn() + .mockRejectedValueOnce(new Error('error')) + .mockResolvedValueOnce(SUCCESS_RESULT) + + const task = createTask({ + title: 'Bad backoff', + description: 'test', + maxRetries: 1, + retryDelayMs: 100, + retryBackoff: -2, + }) + ;(task as any).retryBackoff = -2 + + const mockDelay = vi.fn().mockResolvedValue(undefined) + await executeWithRetry(run, task, undefined, mockDelay) + + // backoff clamped to 1, so delay = 100 * 1^0 = 100 + expect(mockDelay).toHaveBeenCalledWith(100) + }) +}) From 2187f3b310afa2edf6012a2fc88c5d4ab2b1edfe Mon Sep 17 00:00:00 2001 From: JackChen <26346076+JackChen-me@users.noreply.github.com> Date: Fri, 3 Apr 2026 14:11:54 +0800 Subject: [PATCH 29/38] docs: add Structured Output and Task Retry to feature list (#39) --- README.md | 2 ++ README_zh.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index a57d458..6fee192 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,8 @@ Build AI agent teams that decompose goals into tasks automatically. Define agent - **Multi-Agent Teams** — Define agents with different roles, tools, and even different models. They collaborate through a message bus and shared memory. - **Task DAG Scheduling** — Tasks have dependencies. The framework resolves them topologically — dependent tasks wait, independent tasks run in parallel. - **Model Agnostic** — Claude, GPT, Gemma 4, and local models (Ollama, vLLM, LM Studio) in the same team. Swap models per agent via `baseURL`. +- **Structured Output** — Add `outputSchema` (Zod) to any agent. Output is parsed as JSON, validated, and auto-retried once on failure. Access typed results via `result.structured`. +- **Task Retry** — Set `maxRetries` on tasks for automatic retry with exponential backoff. Failed attempts accumulate token usage for accurate billing. - **In-Process Execution** — No subprocess overhead. Everything runs in one Node.js process. Deploy to serverless, Docker, CI/CD. ## Quick Start diff --git a/README_zh.md b/README_zh.md index 1ad0167..5cba7f7 100644 --- a/README_zh.md +++ b/README_zh.md @@ -16,6 +16,8 @@ - **多智能体团队** — 定义不同角色、工具甚至不同模型的智能体。它们通过消息总线和共享内存协作。 - **任务 DAG 调度** — 任务之间存在依赖关系。框架进行拓扑排序——有依赖的任务等待,无依赖的任务并行执行。 - **模型无关** — Claude、GPT、Gemma 4 和本地模型(Ollama、vLLM、LM Studio)可以在同一个团队中使用。通过 `baseURL` 即可接入任何 OpenAI 兼容服务。 +- **结构化输出** — 为任意智能体添加 `outputSchema`(Zod),输出自动解析为 JSON 并校验,校验失败自动重试一次。通过 `result.structured` 获取类型化结果。 +- **任务重试** — 为任务设置 `maxRetries`,失败时自动指数退避重试。所有尝试的 token 用量累计,确保计费准确。 - **进程内执行** — 没有子进程开销。所有内容在一个 Node.js 进程中运行。可部署到 Serverless、Docker、CI/CD。 ## 快速开始 From 42f3717115ea4a1d187f5388c88302b9af92bd7f Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 14:13:33 +0800 Subject: [PATCH 30/38] chore: bump version to 0.2.0 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index b185a8c..fc54d44 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@jackchen_me/open-multi-agent", - "version": "0.1.0", + "version": "0.2.0", "description": "Production-grade multi-agent orchestration framework. Model-agnostic, supports team collaboration, task scheduling, and inter-agent communication.", "type": "module", "main": "dist/index.js", From d9b20c0cf6b583210412757f8465d38de4100b49 Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 14:14:34 +0800 Subject: [PATCH 31/38] fix: guard retry fields against Infinity/NaN Use Number.isFinite() to sanitize maxRetries, retryDelayMs, and retryBackoff before entering the retry loop. Prevents unbounded retries from Infinity or broken loop bounds from NaN. --- src/orchestrator/orchestrator.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/orchestrator/orchestrator.ts b/src/orchestrator/orchestrator.ts index 9d6d857..848bfde 100644 --- a/src/orchestrator/orchestrator.ts +++ b/src/orchestrator/orchestrator.ts @@ -128,9 +128,10 @@ export async function executeWithRetry( onRetry?: (data: { attempt: number; maxAttempts: number; error: string; nextDelayMs: number }) => void, delayFn: (ms: number) => Promise = sleep, ): Promise { - const maxAttempts = Math.max(0, task.maxRetries ?? 0) + 1 - const baseDelay = Math.max(0, task.retryDelayMs ?? 1000) - const backoff = Math.max(1, task.retryBackoff ?? 2) + const rawRetries = Number.isFinite(task.maxRetries) ? task.maxRetries! : 0 + const maxAttempts = Math.max(0, rawRetries) + 1 + const baseDelay = Math.max(0, Number.isFinite(task.retryDelayMs) ? task.retryDelayMs! : 1000) + const backoff = Math.max(1, Number.isFinite(task.retryBackoff) ? task.retryBackoff! : 2) let lastError: string = '' // Accumulate token usage across all attempts so billing/observability From 043deaf562c1f6c370ebccda4cadabb1f39ca96b Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 14:16:44 +0800 Subject: [PATCH 32/38] docs: update CLAUDE.md with structured output and task retry --- CLAUDE.md | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..6cbeb45 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,80 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Commands + +```bash +npm run build # Compile TypeScript (src/ → dist/) +npm run dev # Watch mode compilation +npm run lint # Type-check only (tsc --noEmit) +npm test # Run all tests (vitest run) +npm run test:watch # Vitest watch mode +``` + +Tests live in `tests/` (vitest). Examples in `examples/` are standalone scripts requiring API keys (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`). + +## Architecture + +ES module TypeScript framework for multi-agent orchestration. Three runtime dependencies: `@anthropic-ai/sdk`, `openai`, `zod`. + +### Core Execution Flow + +**`OpenMultiAgent`** (`src/orchestrator/orchestrator.ts`) is the top-level public API with three execution modes: + +1. **`runAgent(config, prompt)`** — single agent, one-shot +2. **`runTeam(team, goal)`** — automatic orchestration: a temporary "coordinator" agent decomposes the goal into a task DAG via LLM call, then tasks execute in dependency order +3. **`runTasks(team, tasks)`** — explicit task pipeline with user-defined dependencies + +### The Coordinator Pattern (runTeam) + +This is the framework's key feature. When `runTeam()` is called: +1. A coordinator agent receives the goal + agent roster and produces a JSON task array (title, description, assignee, dependsOn) +2. `TaskQueue` resolves dependencies topologically — independent tasks run in parallel, dependent tasks wait +3. `Scheduler` auto-assigns any unassigned tasks (strategies: `dependency-first` default, `round-robin`, `least-busy`, `capability-match`) +4. Each task result is written to `SharedMemory` so subsequent agents see prior results +5. The coordinator synthesizes all task results into a final output + +### Layer Map + +| Layer | Files | Responsibility | +|-------|-------|----------------| +| Orchestrator | `orchestrator/orchestrator.ts`, `orchestrator/scheduler.ts` | Top-level API, task decomposition, coordinator pattern | +| Team | `team/team.ts`, `team/messaging.ts` | Agent roster, MessageBus (point-to-point + broadcast), SharedMemory binding | +| Agent | `agent/agent.ts`, `agent/runner.ts`, `agent/pool.ts`, `agent/structured-output.ts` | Agent lifecycle (idle→running→completed/error), conversation loop, concurrency pool with Semaphore, structured output validation | +| Task | `task/queue.ts`, `task/task.ts` | Dependency-aware queue, auto-unblock on completion, cascade failure to dependents | +| Tool | `tool/framework.ts`, `tool/executor.ts`, `tool/built-in/` | `defineTool()` with Zod schemas, ToolRegistry, parallel batch execution with concurrency semaphore | +| LLM | `llm/adapter.ts`, `llm/anthropic.ts`, `llm/openai.ts` | `LLMAdapter` interface (`chat` + `stream`), factory `createAdapter()` | +| Memory | `memory/shared.ts`, `memory/store.ts` | Namespaced key-value store (`agentName/key`), markdown summary injection into prompts | +| Types | `types.ts` | All interfaces in one file to avoid circular deps | +| Exports | `index.ts` | Public API surface | + +### Agent Conversation Loop (AgentRunner) + +`AgentRunner.run()`: send messages → extract tool-use blocks → execute tools in parallel batch → append results → loop until `end_turn` or `maxTurns` exhausted. Accumulates `TokenUsage` across all turns. + +### Concurrency Control + +Two independent semaphores: `AgentPool` (max concurrent agent runs, default 5) and `ToolExecutor` (max concurrent tool calls, default 4). + +### Structured Output + +Optional `outputSchema` (Zod) on `AgentConfig`. When set, the agent's final output is parsed as JSON and validated. On validation failure, one retry with error feedback is attempted. Validated data is available via `result.structured`. Logic lives in `agent/structured-output.ts`, wired into `Agent.executeRun()`. + +### Task Retry + +Optional `maxRetries`, `retryDelayMs`, `retryBackoff` on task config (used via `runTasks()`). `executeWithRetry()` in `orchestrator.ts` handles the retry loop with exponential backoff (capped at 30s). Token usage is accumulated across all attempts. Emits `task_retry` event via `onProgress`. + +### Error Handling + +- Tool errors → caught, returned as `ToolResult(isError: true)`, never thrown +- Task failures → retry if `maxRetries > 0`, then cascade to all dependents; independent tasks continue +- LLM API errors → propagate to caller + +### Built-in Tools + +`bash`, `file_read`, `file_write`, `file_edit`, `grep` — registered via `registerBuiltInTools(registry)`. + +### Adding an LLM Adapter + +Implement `LLMAdapter` interface with `chat(messages, options)` and `stream(messages, options)`, then register in `createAdapter()` factory in `src/llm/adapter.ts`. From 27c0103736d91616e8ec66042c738a9a21e73a37 Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 14:23:22 +0800 Subject: [PATCH 33/38] docs: add examples for structured output and task retry --- README.md | 2 + examples/10-structured-output.ts | 73 +++++++++++++++++ examples/11-task-retry.ts | 132 +++++++++++++++++++++++++++++++ 3 files changed, 207 insertions(+) create mode 100644 examples/10-structured-output.ts create mode 100644 examples/11-task-retry.ts diff --git a/README.md b/README.md index 6fee192..b8ca34f 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,8 @@ npx tsx examples/01-single-agent.ts | [07 — Fan-Out / Aggregate](examples/07-fan-out-aggregate.ts) | `runParallel()` MapReduce — 3 analysts in parallel, then synthesize | | [08 — Gemma 4 Local](examples/08-gemma4-local.ts) | Pure-local Gemma 4 agent team with tool-calling — zero API cost | | [09 — Gemma 4 Auto-Orchestration](examples/09-gemma4-auto-orchestration.ts) | `runTeam()` with Gemma 4 as coordinator — auto task decomposition, fully local | +| [10 — Structured Output](examples/10-structured-output.ts) | `outputSchema` (Zod) on AgentConfig — validated JSON via `result.structured` | +| [11 — Task Retry](examples/11-task-retry.ts) | `maxRetries` / `retryDelayMs` / `retryBackoff` with `task_retry` progress events | ## Architecture diff --git a/examples/10-structured-output.ts b/examples/10-structured-output.ts new file mode 100644 index 0000000..ef20872 --- /dev/null +++ b/examples/10-structured-output.ts @@ -0,0 +1,73 @@ +/** + * Example 10 — Structured Output + * + * Demonstrates `outputSchema` on AgentConfig. The agent's response is + * automatically parsed as JSON and validated against a Zod schema. + * On validation failure, the framework retries once with error feedback. + * + * The validated result is available via `result.structured`. + * + * Run: + * npx tsx examples/10-structured-output.ts + * + * Prerequisites: + * ANTHROPIC_API_KEY env var must be set. + */ + +import { z } from 'zod' +import { OpenMultiAgent } from '../src/index.js' +import type { AgentConfig } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Define a Zod schema for the expected output +// --------------------------------------------------------------------------- + +const ReviewAnalysis = z.object({ + summary: z.string().describe('One-sentence summary of the review'), + sentiment: z.enum(['positive', 'negative', 'neutral']), + confidence: z.number().min(0).max(1).describe('How confident the analysis is'), + keyTopics: z.array(z.string()).describe('Main topics mentioned in the review'), +}) + +type ReviewAnalysis = z.infer + +// --------------------------------------------------------------------------- +// Agent with outputSchema +// --------------------------------------------------------------------------- + +const analyst: AgentConfig = { + name: 'analyst', + model: 'claude-sonnet-4-6', + systemPrompt: 'You are a product review analyst. Analyze the given review and extract structured insights.', + outputSchema: ReviewAnalysis, +} + +// --------------------------------------------------------------------------- +// Run +// --------------------------------------------------------------------------- + +const orchestrator = new OpenMultiAgent({ defaultModel: 'claude-sonnet-4-6' }) + +const reviews = [ + 'This keyboard is amazing! The mechanical switches feel incredible and the RGB lighting is stunning. Build quality is top-notch. Only downside is the price.', + 'Terrible experience. The product arrived broken, customer support was unhelpful, and the return process took 3 weeks.', + 'It works fine. Nothing special, nothing bad. Does what it says on the box.', +] + +console.log('Analyzing product reviews with structured output...\n') + +for (const review of reviews) { + const result = await orchestrator.runAgent(analyst, `Analyze this review: "${review}"`) + + if (result.structured) { + const data = result.structured as ReviewAnalysis + console.log(`Sentiment: ${data.sentiment} (confidence: ${data.confidence})`) + console.log(`Summary: ${data.summary}`) + console.log(`Topics: ${data.keyTopics.join(', ')}`) + } else { + console.log(`Validation failed. Raw output: ${result.output.slice(0, 100)}`) + } + + console.log(`Tokens: ${result.tokenUsage.input_tokens} in / ${result.tokenUsage.output_tokens} out`) + console.log('---') +} diff --git a/examples/11-task-retry.ts b/examples/11-task-retry.ts new file mode 100644 index 0000000..eafd274 --- /dev/null +++ b/examples/11-task-retry.ts @@ -0,0 +1,132 @@ +/** + * Example 11 — Task Retry with Exponential Backoff + * + * Demonstrates `maxRetries`, `retryDelayMs`, and `retryBackoff` on task config. + * When a task fails, the framework automatically retries with exponential + * backoff. The `onProgress` callback receives `task_retry` events so you can + * log retry attempts in real time. + * + * Scenario: a two-step pipeline where the first task (data fetch) is configured + * to retry on failure, and the second task (analysis) depends on it. + * + * Run: + * npx tsx examples/11-task-retry.ts + * + * Prerequisites: + * ANTHROPIC_API_KEY env var must be set. + */ + +import { OpenMultiAgent } from '../src/index.js' +import type { AgentConfig, OrchestratorEvent } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Agents +// --------------------------------------------------------------------------- + +const fetcher: AgentConfig = { + name: 'fetcher', + model: 'claude-sonnet-4-6', + systemPrompt: `You are a data-fetching agent. When given a topic, produce a short +JSON summary with 3-5 key facts. Output ONLY valid JSON, no markdown fences. +Example: {"topic":"...", "facts":["fact1","fact2","fact3"]}`, + maxTurns: 2, +} + +const analyst: AgentConfig = { + name: 'analyst', + model: 'claude-sonnet-4-6', + systemPrompt: `You are a data analyst. Read the fetched data from shared memory +and produce a brief analysis (3-4 sentences) highlighting trends or insights.`, + maxTurns: 2, +} + +// --------------------------------------------------------------------------- +// Progress handler — watch for task_retry events +// --------------------------------------------------------------------------- + +function handleProgress(event: OrchestratorEvent): void { + const ts = new Date().toISOString().slice(11, 23) + + switch (event.type) { + case 'task_start': + console.log(`[${ts}] TASK START "${event.task}" (agent: ${event.agent})`) + break + case 'task_complete': + console.log(`[${ts}] TASK DONE "${event.task}"`) + break + case 'task_retry': { + const d = event.data as { attempt: number; maxAttempts: number; error: string; nextDelayMs: number } + console.log(`[${ts}] TASK RETRY "${event.task}" — attempt ${d.attempt}/${d.maxAttempts}, next in ${d.nextDelayMs}ms`) + console.log(` error: ${d.error.slice(0, 120)}`) + break + } + case 'error': + console.log(`[${ts}] ERROR "${event.task}" agent=${event.agent}`) + break + } +} + +// --------------------------------------------------------------------------- +// Orchestrator + team +// --------------------------------------------------------------------------- + +const orchestrator = new OpenMultiAgent({ + defaultModel: 'claude-sonnet-4-6', + onProgress: handleProgress, +}) + +const team = orchestrator.createTeam('retry-demo', { + name: 'retry-demo', + agents: [fetcher, analyst], + sharedMemory: true, +}) + +// --------------------------------------------------------------------------- +// Tasks — fetcher has retry config, analyst depends on it +// --------------------------------------------------------------------------- + +const tasks = [ + { + title: 'Fetch data', + description: 'Fetch key facts about the adoption of TypeScript in open-source projects as of 2024. Output a JSON object with a "topic" and "facts" array.', + assignee: 'fetcher', + // Retry config: up to 2 retries, 500ms base delay, 2x backoff (500ms, 1000ms) + maxRetries: 2, + retryDelayMs: 500, + retryBackoff: 2, + }, + { + title: 'Analyze data', + description: 'Read the fetched data from shared memory and produce a 3-4 sentence analysis of TypeScript adoption trends.', + assignee: 'analyst', + dependsOn: ['Fetch data'], + // No retry — if analysis fails, just report the error + }, +] + +// --------------------------------------------------------------------------- +// Run +// --------------------------------------------------------------------------- + +console.log('Task Retry Example') +console.log('='.repeat(60)) +console.log('Pipeline: fetch (with retry) → analyze') +console.log(`Retry config: maxRetries=2, delay=500ms, backoff=2x`) +console.log('='.repeat(60)) +console.log() + +const result = await orchestrator.runTasks(team, tasks) + +// --------------------------------------------------------------------------- +// Summary +// --------------------------------------------------------------------------- + +console.log('\n' + '='.repeat(60)) +console.log(`Overall success: ${result.success}`) +console.log(`Tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`) + +for (const [name, r] of result.agentResults) { + const icon = r.success ? 'OK ' : 'FAIL' + console.log(` [${icon}] ${name}`) + console.log(` ${r.output.slice(0, 200)}`) +} From 17546fd93e51695833f47e41c646272d5f248a3a Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 14:32:48 +0800 Subject: [PATCH 34/38] docs: merge Gemma 4 examples, reorder README sections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Merge examples 08 (runTasks) and 09 (runTeam) into a single Gemma 4 example - Renumber: structured output → 09, task retry → 10 - Move Author and Contributors sections to bottom in both READMEs - Add Author section to English README --- README.md | 23 +-- README_zh.md | 25 +-- examples/08-gemma4-local.ts | 169 ++++++++---------- examples/09-gemma4-auto-orchestration.ts | 162 ----------------- ...ured-output.ts => 09-structured-output.ts} | 4 +- .../{11-task-retry.ts => 10-task-retry.ts} | 4 +- 6 files changed, 109 insertions(+), 278 deletions(-) delete mode 100644 examples/09-gemma4-auto-orchestration.ts rename examples/{10-structured-output.ts => 09-structured-output.ts} (97%) rename examples/{11-task-retry.ts => 10-task-retry.ts} (98%) diff --git a/README.md b/README.md index b8ca34f..df5a920 100644 --- a/README.md +++ b/README.md @@ -100,12 +100,6 @@ Tokens: 12847 output tokens | Auto-orchestrated team | `runTeam()` | Give a goal, framework plans and executes | | Explicit pipeline | `runTasks()` | You define the task graph and assignments | -## Contributors - - - - - ## Examples All examples are runnable scripts in [`examples/`](./examples/). Run any of them with `npx tsx`: @@ -123,10 +117,9 @@ npx tsx examples/01-single-agent.ts | [05 — Copilot](examples/05-copilot-test.ts) | GitHub Copilot as an LLM provider | | [06 — Local Model](examples/06-local-model.ts) | Ollama + Claude in one pipeline via `baseURL` (works with vLLM, LM Studio, etc.) | | [07 — Fan-Out / Aggregate](examples/07-fan-out-aggregate.ts) | `runParallel()` MapReduce — 3 analysts in parallel, then synthesize | -| [08 — Gemma 4 Local](examples/08-gemma4-local.ts) | Pure-local Gemma 4 agent team with tool-calling — zero API cost | -| [09 — Gemma 4 Auto-Orchestration](examples/09-gemma4-auto-orchestration.ts) | `runTeam()` with Gemma 4 as coordinator — auto task decomposition, fully local | -| [10 — Structured Output](examples/10-structured-output.ts) | `outputSchema` (Zod) on AgentConfig — validated JSON via `result.structured` | -| [11 — Task Retry](examples/11-task-retry.ts) | `maxRetries` / `retryDelayMs` / `retryBackoff` with `task_retry` progress events | +| [08 — Gemma 4 Local](examples/08-gemma4-local.ts) | `runTasks()` + `runTeam()` with local Gemma 4 via Ollama — zero API cost | +| [09 — Structured Output](examples/09-structured-output.ts) | `outputSchema` (Zod) on AgentConfig — validated JSON via `result.structured` | +| [10 — Task Retry](examples/10-task-retry.ts) | `maxRetries` / `retryDelayMs` / `retryBackoff` with `task_retry` progress events | ## Architecture @@ -200,6 +193,16 @@ Issues, feature requests, and PRs are welcome. Some areas where contributions wo - **Examples** — Real-world workflows and use cases. - **Documentation** — Guides, tutorials, and API docs. +## Author + +> JackChen — Ex PM (¥100M+ revenue), now indie builder. Follow on [X](https://x.com/JackChen_x) for AI Agent insights. + +## Contributors + + + + + ## Star History diff --git a/README_zh.md b/README_zh.md index 5cba7f7..c9f7ac9 100644 --- a/README_zh.md +++ b/README_zh.md @@ -92,10 +92,6 @@ Success: true Tokens: 12847 output tokens ``` -## 作者 - -> JackChen — 前 WPS 产品经理,现独立创业者。关注小红书[「杰克西|硅基杠杆」](https://www.xiaohongshu.com/user/profile/5a1bdc1e4eacab4aa39ea6d6),持续获取我的 AI Agent 观点和思考。 - ## 三种运行模式 | 模式 | 方法 | 适用场景 | @@ -104,12 +100,6 @@ Tokens: 12847 output tokens | 自动编排团队 | `runTeam()` | 给一个目标,框架自动规划和执行 | | 显式任务管线 | `runTasks()` | 你自己定义任务图和分配 | -## 贡献者 - - - - - ## 示例 所有示例都是可运行脚本,位于 [`examples/`](./examples/) 目录。使用 `npx tsx` 运行: @@ -127,8 +117,9 @@ npx tsx examples/01-single-agent.ts | [05 — Copilot](examples/05-copilot-test.ts) | GitHub Copilot 作为 LLM 提供者 | | [06 — 本地模型](examples/06-local-model.ts) | Ollama + Claude 混合流水线,通过 `baseURL` 接入(兼容 vLLM、LM Studio 等) | | [07 — 扇出聚合](examples/07-fan-out-aggregate.ts) | `runParallel()` MapReduce — 3 个分析师并行,然后综合 | -| [08 — Gemma 4 本地](examples/08-gemma4-local.ts) | 纯本地 Gemma 4 智能体团队 + tool-calling — 零 API 费用 | -| [09 — Gemma 4 自动编排](examples/09-gemma4-auto-orchestration.ts) | `runTeam()` 用 Gemma 4 当 coordinator — 自动任务拆解,完全本地 | +| [08 — Gemma 4 本地](examples/08-gemma4-local.ts) | `runTasks()` + `runTeam()` 本地 Gemma 4 via Ollama — 零 API 费用 | +| [09 — 结构化输出](examples/09-structured-output.ts) | `outputSchema`(Zod)— 校验 JSON 输出,通过 `result.structured` 获取 | +| [10 — 任务重试](examples/10-task-retry.ts) | `maxRetries` / `retryDelayMs` / `retryBackoff` + `task_retry` 进度事件 | ## 架构 @@ -202,6 +193,16 @@ npx tsx examples/01-single-agent.ts - **示例** — 真实场景的工作流和用例。 - **文档** — 指南、教程和 API 文档。 +## 作者 + +> JackChen — 前 WPS 产品经理,现独立创业者。关注小红书[「杰克西|硅基杠杆」](https://www.xiaohongshu.com/user/profile/5a1bdc1e4eacab4aa39ea6d6),持续获取我的 AI Agent 观点和思考。 + +## 贡献者 + + + + + ## Star 趋势 diff --git a/examples/08-gemma4-local.ts b/examples/08-gemma4-local.ts index 0dd8087..0d31853 100644 --- a/examples/08-gemma4-local.ts +++ b/examples/08-gemma4-local.ts @@ -1,15 +1,16 @@ /** - * Example 08 — Gemma 4 Local Agent Team (100% Local, Zero API Cost) + * Example 08 — Gemma 4 Local (100% Local, Zero API Cost) * - * Demonstrates a fully local multi-agent team using Google's Gemma 4 via + * Demonstrates both execution modes with a fully local Gemma 4 model via * Ollama. No cloud API keys needed — everything runs on your machine. * - * Two agents collaborate through a task pipeline: - * - researcher: uses bash + file_write to gather system info and write a report - * - summarizer: uses file_read to read the report and produce a concise summary + * Part 1 — runTasks(): explicit task pipeline (researcher → summarizer) + * Part 2 — runTeam(): auto-orchestration where Gemma 4 acts as coordinator, + * decomposes the goal into tasks, and synthesises the final result * - * This pattern works with any Ollama model that supports tool-calling. - * Gemma 4 (released 2026-04-02) has native tool-calling support. + * This is the hardest test for a local model — runTeam() requires it to + * produce valid JSON for task decomposition AND do tool-calling for execution. + * Gemma 4 e2b (5.1B params) handles both reliably. * * Run: * no_proxy=localhost npx tsx examples/08-gemma4-local.ts @@ -38,46 +39,31 @@ const OLLAMA_BASE_URL = 'http://localhost:11434/v1' const OUTPUT_DIR = '/tmp/gemma4-demo' // --------------------------------------------------------------------------- -// Agents — both use Gemma 4 locally +// Agents // --------------------------------------------------------------------------- -/** - * Researcher — gathers system information using shell commands. - */ const researcher: AgentConfig = { name: 'researcher', model: OLLAMA_MODEL, provider: 'openai', baseURL: OLLAMA_BASE_URL, apiKey: 'ollama', // placeholder — Ollama ignores this, but the OpenAI SDK requires a non-empty value - systemPrompt: `You are a system researcher. Your job is to gather information -about the current machine using shell commands and write a structured report. - -Use the bash tool to run commands like: uname -a, df -h, uptime, and similar -non-destructive read-only commands. -On macOS you can also use: sw_vers, sysctl -n hw.memsize. -On Linux you can also use: cat /etc/os-release, free -h. - -Then use file_write to save a Markdown report to ${OUTPUT_DIR}/system-report.md. -The report should have sections: OS, Hardware, Disk, and Uptime. -Be concise — one or two lines per section is enough.`, + systemPrompt: `You are a system researcher. Use bash to run non-destructive, +read-only commands (uname -a, sw_vers, df -h, uptime, etc.) and report results. +Use file_write to save reports when asked.`, tools: ['bash', 'file_write'], maxTurns: 8, } -/** - * Summarizer — reads the report and writes a one-paragraph executive summary. - */ const summarizer: AgentConfig = { name: 'summarizer', model: OLLAMA_MODEL, provider: 'openai', baseURL: OLLAMA_BASE_URL, apiKey: 'ollama', - systemPrompt: `You are a technical writer. Read the system report file provided, -then produce a concise one-paragraph executive summary (3-5 sentences). -Focus on the key highlights: what OS, how much RAM, disk status, and uptime.`, - tools: ['file_read'], + systemPrompt: `You are a technical writer. Read files and produce concise, +structured Markdown summaries. Use file_write to save reports when asked.`, + tools: ['file_read', 'file_write'], maxTurns: 4, } @@ -85,23 +71,17 @@ Focus on the key highlights: what OS, how much RAM, disk status, and uptime.`, // Progress handler // --------------------------------------------------------------------------- -const taskTimes = new Map() - function handleProgress(event: OrchestratorEvent): void { const ts = new Date().toISOString().slice(11, 23) - switch (event.type) { case 'task_start': { - taskTimes.set(event.task ?? '', Date.now()) const task = event.data as Task | undefined console.log(`[${ts}] TASK START "${task?.title ?? event.task}" → ${task?.assignee ?? '?'}`) break } - case 'task_complete': { - const elapsed = Date.now() - (taskTimes.get(event.task ?? '') ?? Date.now()) - console.log(`[${ts}] TASK DONE "${event.task}" in ${(elapsed / 1000).toFixed(1)}s`) + case 'task_complete': + console.log(`[${ts}] TASK DONE "${event.task}"`) break - } case 'agent_start': console.log(`[${ts}] AGENT START ${event.agent}`) break @@ -114,32 +94,29 @@ function handleProgress(event: OrchestratorEvent): void { } } -// --------------------------------------------------------------------------- -// Orchestrator + Team -// --------------------------------------------------------------------------- +// ═══════════════════════════════════════════════════════════════════════════ +// Part 1: runTasks() — Explicit task pipeline +// ═══════════════════════════════════════════════════════════════════════════ -const orchestrator = new OpenMultiAgent({ +console.log('Part 1: runTasks() — Explicit Pipeline') +console.log('='.repeat(60)) +console.log(` model → ${OLLAMA_MODEL} via Ollama`) +console.log(` pipeline → researcher gathers info → summarizer writes summary`) +console.log() + +const orchestrator1 = new OpenMultiAgent({ defaultModel: OLLAMA_MODEL, - maxConcurrency: 1, // run agents sequentially — local model can only serve one at a time + maxConcurrency: 1, // local model serves one request at a time onProgress: handleProgress, }) -const team = orchestrator.createTeam('gemma4-team', { - name: 'gemma4-team', +const team1 = orchestrator1.createTeam('explicit', { + name: 'explicit', agents: [researcher, summarizer], sharedMemory: true, }) -// --------------------------------------------------------------------------- -// Task pipeline: research → summarize -// --------------------------------------------------------------------------- - -const tasks: Array<{ - title: string - description: string - assignee?: string - dependsOn?: string[] -}> = [ +const tasks = [ { title: 'Gather system information', description: `Use bash to run system info commands (uname -a, sw_vers, sysctl, df -h, uptime). @@ -156,48 +133,60 @@ Produce a concise one-paragraph executive summary of the system information.`, }, ] -// --------------------------------------------------------------------------- -// Run -// --------------------------------------------------------------------------- +const start1 = Date.now() +const result1 = await orchestrator1.runTasks(team1, tasks) -console.log('Gemma 4 Local Agent Team — Zero API Cost') -console.log('='.repeat(60)) -console.log(` model → ${OLLAMA_MODEL} via Ollama`) -console.log(` researcher → bash + file_write`) -console.log(` summarizer → file_read`) -console.log(` output dir → ${OUTPUT_DIR}`) -console.log() -console.log('Pipeline: researcher gathers info → summarizer writes summary') -console.log('='.repeat(60)) +console.log(`\nSuccess: ${result1.success} Time: ${((Date.now() - start1) / 1000).toFixed(1)}s`) +console.log(`Tokens — input: ${result1.totalTokenUsage.input_tokens}, output: ${result1.totalTokenUsage.output_tokens}`) -const start = Date.now() -const result = await orchestrator.runTasks(team, tasks) -const totalTime = Date.now() - start - -// --------------------------------------------------------------------------- -// Summary -// --------------------------------------------------------------------------- - -console.log('\n' + '='.repeat(60)) -console.log('Pipeline complete.\n') -console.log(`Overall success: ${result.success}`) -console.log(`Total time: ${(totalTime / 1000).toFixed(1)}s`) -console.log(`Tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`) - -console.log('\nPer-agent results:') -for (const [name, r] of result.agentResults) { - const icon = r.success ? 'OK ' : 'FAIL' - const tools = r.toolCalls.map(c => c.toolName).join(', ') - console.log(` [${icon}] ${name.padEnd(12)} tools: ${tools || '(none)'}`) -} - -// Print the summarizer's output -const summary = result.agentResults.get('summarizer') +const summary = result1.agentResults.get('summarizer') if (summary?.success) { - console.log('\nExecutive Summary (from local Gemma 4):') + console.log('\nSummary (from local Gemma 4):') console.log('-'.repeat(60)) console.log(summary.output) console.log('-'.repeat(60)) } +// ═══════════════════════════════════════════════════════════════════════════ +// Part 2: runTeam() — Auto-orchestration (Gemma 4 as coordinator) +// ═══════════════════════════════════════════════════════════════════════════ + +console.log('\n\nPart 2: runTeam() — Auto-Orchestration') +console.log('='.repeat(60)) +console.log(` coordinator → auto-created by runTeam(), also Gemma 4`) +console.log(` goal → given in natural language, framework plans everything`) +console.log() + +const orchestrator2 = new OpenMultiAgent({ + defaultModel: OLLAMA_MODEL, + defaultProvider: 'openai', + defaultBaseURL: OLLAMA_BASE_URL, + defaultApiKey: 'ollama', + maxConcurrency: 1, + onProgress: handleProgress, +}) + +const team2 = orchestrator2.createTeam('auto', { + name: 'auto', + agents: [researcher, summarizer], + sharedMemory: true, +}) + +const goal = `Check this machine's Node.js version, npm version, and OS info, +then write a short Markdown summary report to /tmp/gemma4-auto/report.md` + +const start2 = Date.now() +const result2 = await orchestrator2.runTeam(team2, goal) + +console.log(`\nSuccess: ${result2.success} Time: ${((Date.now() - start2) / 1000).toFixed(1)}s`) +console.log(`Tokens — input: ${result2.totalTokenUsage.input_tokens}, output: ${result2.totalTokenUsage.output_tokens}`) + +const coordResult = result2.agentResults.get('coordinator') +if (coordResult?.success) { + console.log('\nFinal synthesis (from local Gemma 4 coordinator):') + console.log('-'.repeat(60)) + console.log(coordResult.output) + console.log('-'.repeat(60)) +} + console.log('\nAll processing done locally. $0 API cost.') diff --git a/examples/09-gemma4-auto-orchestration.ts b/examples/09-gemma4-auto-orchestration.ts deleted file mode 100644 index 185ede7..0000000 --- a/examples/09-gemma4-auto-orchestration.ts +++ /dev/null @@ -1,162 +0,0 @@ -/** - * Example 09 — Gemma 4 Auto-Orchestration (runTeam, 100% Local) - * - * Demonstrates the framework's key feature — automatic task decomposition — - * powered entirely by a local Gemma 4 model. No cloud API needed. - * - * What happens: - * 1. A Gemma 4 "coordinator" receives the goal + agent roster - * 2. It outputs a structured JSON task array (title, description, assignee, dependsOn) - * 3. The framework resolves dependencies, schedules tasks, and runs agents - * 4. The coordinator synthesises all task results into a final answer - * - * This is the hardest test for a local model — it must produce valid JSON - * for task decomposition AND do tool-calling for actual task execution. - * Gemma 4 e2b (5.1B params) handles both reliably. - * - * Run: - * no_proxy=localhost npx tsx examples/09-gemma4-auto-orchestration.ts - * - * Prerequisites: - * 1. Ollama >= 0.20.0 installed and running: https://ollama.com - * 2. Pull the model: ollama pull gemma4:e2b - * 3. No API keys needed! - * - * Note: The no_proxy=localhost prefix is needed if you have an HTTP proxy - * configured, since the OpenAI SDK would otherwise route Ollama requests - * through the proxy. - */ - -import { OpenMultiAgent } from '../src/index.js' -import type { AgentConfig, OrchestratorEvent, Task } from '../src/types.js' - -// --------------------------------------------------------------------------- -// Configuration -// --------------------------------------------------------------------------- - -// See available tags at https://ollama.com/library/gemma4 -const OLLAMA_MODEL = 'gemma4:e2b' // or 'gemma4:e4b', 'gemma4:26b' -const OLLAMA_BASE_URL = 'http://localhost:11434/v1' - -// --------------------------------------------------------------------------- -// Agents — the coordinator is created automatically by runTeam() -// --------------------------------------------------------------------------- - -const researcher: AgentConfig = { - name: 'researcher', - model: OLLAMA_MODEL, - provider: 'openai', - baseURL: OLLAMA_BASE_URL, - apiKey: 'ollama', - systemPrompt: `You are a system researcher. Use bash to run non-destructive, -read-only commands and report the results concisely.`, - tools: ['bash'], - maxTurns: 4, -} - -const writer: AgentConfig = { - name: 'writer', - model: OLLAMA_MODEL, - provider: 'openai', - baseURL: OLLAMA_BASE_URL, - apiKey: 'ollama', - systemPrompt: `You are a technical writer. Use file_write to create clear, -structured Markdown reports based on the information provided.`, - tools: ['file_write'], - maxTurns: 4, -} - -// --------------------------------------------------------------------------- -// Progress handler -// --------------------------------------------------------------------------- - -function handleProgress(event: OrchestratorEvent): void { - const ts = new Date().toISOString().slice(11, 23) - switch (event.type) { - case 'task_start': { - const task = event.data as Task | undefined - console.log(`[${ts}] TASK START "${task?.title ?? event.task}" → ${task?.assignee ?? '?'}`) - break - } - case 'task_complete': - console.log(`[${ts}] TASK DONE "${event.task}"`) - break - case 'agent_start': - console.log(`[${ts}] AGENT START ${event.agent}`) - break - case 'agent_complete': - console.log(`[${ts}] AGENT DONE ${event.agent}`) - break - case 'error': - console.error(`[${ts}] ERROR ${event.agent ?? ''} task=${event.task ?? '?'}`) - break - } -} - -// --------------------------------------------------------------------------- -// Orchestrator — defaultModel is used for the coordinator agent -// --------------------------------------------------------------------------- - -const orchestrator = new OpenMultiAgent({ - defaultModel: OLLAMA_MODEL, - defaultProvider: 'openai', - defaultBaseURL: OLLAMA_BASE_URL, - defaultApiKey: 'ollama', - maxConcurrency: 1, // local model serves one request at a time - onProgress: handleProgress, -}) - -const team = orchestrator.createTeam('gemma4-auto', { - name: 'gemma4-auto', - agents: [researcher, writer], - sharedMemory: true, -}) - -// --------------------------------------------------------------------------- -// Give a goal — the framework handles the rest -// --------------------------------------------------------------------------- - -const goal = `Check this machine's Node.js version, npm version, and OS info, -then write a short Markdown summary report to /tmp/gemma4-auto/report.md` - -console.log('Gemma 4 Auto-Orchestration — Zero API Cost') -console.log('='.repeat(60)) -console.log(` model → ${OLLAMA_MODEL} via Ollama (all agents + coordinator)`) -console.log(` researcher → bash`) -console.log(` writer → file_write`) -console.log(` coordinator → auto-created by runTeam()`) -console.log() -console.log(`Goal: ${goal.replace(/\n/g, ' ').trim()}`) -console.log('='.repeat(60)) - -const start = Date.now() -const result = await orchestrator.runTeam(team, goal) -const totalTime = Date.now() - start - -// --------------------------------------------------------------------------- -// Results -// --------------------------------------------------------------------------- - -console.log('\n' + '='.repeat(60)) -console.log('Pipeline complete.\n') -console.log(`Overall success: ${result.success}`) -console.log(`Total time: ${(totalTime / 1000).toFixed(1)}s`) -console.log(`Tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`) - -console.log('\nPer-agent results:') -for (const [name, r] of result.agentResults) { - const icon = r.success ? 'OK ' : 'FAIL' - const tools = r.toolCalls.length > 0 ? r.toolCalls.map(c => c.toolName).join(', ') : '(none)' - console.log(` [${icon}] ${name.padEnd(24)} tools: ${tools}`) -} - -// Print the coordinator's final synthesis -const coordResult = result.agentResults.get('coordinator') -if (coordResult?.success) { - console.log('\nFinal synthesis (from local Gemma 4 coordinator):') - console.log('-'.repeat(60)) - console.log(coordResult.output) - console.log('-'.repeat(60)) -} - -console.log('\nAll processing done locally. $0 API cost.') diff --git a/examples/10-structured-output.ts b/examples/09-structured-output.ts similarity index 97% rename from examples/10-structured-output.ts rename to examples/09-structured-output.ts index ef20872..2ffc29e 100644 --- a/examples/10-structured-output.ts +++ b/examples/09-structured-output.ts @@ -1,5 +1,5 @@ /** - * Example 10 — Structured Output + * Example 09 — Structured Output * * Demonstrates `outputSchema` on AgentConfig. The agent's response is * automatically parsed as JSON and validated against a Zod schema. @@ -8,7 +8,7 @@ * The validated result is available via `result.structured`. * * Run: - * npx tsx examples/10-structured-output.ts + * npx tsx examples/09-structured-output.ts * * Prerequisites: * ANTHROPIC_API_KEY env var must be set. diff --git a/examples/11-task-retry.ts b/examples/10-task-retry.ts similarity index 98% rename from examples/11-task-retry.ts rename to examples/10-task-retry.ts index eafd274..5f53e5e 100644 --- a/examples/11-task-retry.ts +++ b/examples/10-task-retry.ts @@ -1,5 +1,5 @@ /** - * Example 11 — Task Retry with Exponential Backoff + * Example 10 — Task Retry with Exponential Backoff * * Demonstrates `maxRetries`, `retryDelayMs`, and `retryBackoff` on task config. * When a task fails, the framework automatically retries with exponential @@ -10,7 +10,7 @@ * to retry on failure, and the second task (analysis) depends on it. * * Run: - * npx tsx examples/11-task-retry.ts + * npx tsx examples/10-task-retry.ts * * Prerequisites: * ANTHROPIC_API_KEY env var must be set. From 0111876264152316d335e033f4511ddc29a24172 Mon Sep 17 00:00:00 2001 From: JackChen <26346076+JackChen-me@users.noreply.github.com> Date: Fri, 3 Apr 2026 15:28:59 +0800 Subject: [PATCH 35/38] feat: add onTrace observability callback (#18) Add lightweight onTrace callback to OrchestratorConfig that emits structured span events (llm_call, tool_call, task, agent) with timing, token usage, and runId correlation. Zero overhead when not subscribed. Closes #18 --- README.md | 2 + README_zh.md | 2 + examples/11-trace-observability.ts | 133 +++++++++ src/agent/agent.ts | 60 +++- src/agent/pool.ts | 11 +- src/agent/runner.ts | 43 ++- src/index.ts | 11 + src/orchestrator/orchestrator.ts | 56 +++- src/types.ts | 62 +++- src/utils/trace.ts | 34 +++ tests/trace.test.ts | 453 +++++++++++++++++++++++++++++ 11 files changed, 848 insertions(+), 19 deletions(-) create mode 100644 examples/11-trace-observability.ts create mode 100644 src/utils/trace.ts create mode 100644 tests/trace.test.ts diff --git a/README.md b/README.md index df5a920..7c36d83 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ Build AI agent teams that decompose goals into tasks automatically. Define agent - **Model Agnostic** — Claude, GPT, Gemma 4, and local models (Ollama, vLLM, LM Studio) in the same team. Swap models per agent via `baseURL`. - **Structured Output** — Add `outputSchema` (Zod) to any agent. Output is parsed as JSON, validated, and auto-retried once on failure. Access typed results via `result.structured`. - **Task Retry** — Set `maxRetries` on tasks for automatic retry with exponential backoff. Failed attempts accumulate token usage for accurate billing. +- **Observability** — Optional `onTrace` callback emits structured spans for every LLM call, tool execution, task, and agent run — with timing, token usage, and a shared `runId` for correlation. Zero overhead when not subscribed, zero extra dependencies. - **In-Process Execution** — No subprocess overhead. Everything runs in one Node.js process. Deploy to serverless, Docker, CI/CD. ## Quick Start @@ -120,6 +121,7 @@ npx tsx examples/01-single-agent.ts | [08 — Gemma 4 Local](examples/08-gemma4-local.ts) | `runTasks()` + `runTeam()` with local Gemma 4 via Ollama — zero API cost | | [09 — Structured Output](examples/09-structured-output.ts) | `outputSchema` (Zod) on AgentConfig — validated JSON via `result.structured` | | [10 — Task Retry](examples/10-task-retry.ts) | `maxRetries` / `retryDelayMs` / `retryBackoff` with `task_retry` progress events | +| [11 — Trace Observability](examples/11-trace-observability.ts) | `onTrace` callback — structured spans for LLM calls, tools, tasks, and agents | ## Architecture diff --git a/README_zh.md b/README_zh.md index c9f7ac9..458d6de 100644 --- a/README_zh.md +++ b/README_zh.md @@ -18,6 +18,7 @@ - **模型无关** — Claude、GPT、Gemma 4 和本地模型(Ollama、vLLM、LM Studio)可以在同一个团队中使用。通过 `baseURL` 即可接入任何 OpenAI 兼容服务。 - **结构化输出** — 为任意智能体添加 `outputSchema`(Zod),输出自动解析为 JSON 并校验,校验失败自动重试一次。通过 `result.structured` 获取类型化结果。 - **任务重试** — 为任务设置 `maxRetries`,失败时自动指数退避重试。所有尝试的 token 用量累计,确保计费准确。 +- **可观测性** — 可选的 `onTrace` 回调为每次 LLM 调用、工具执行、任务和智能体运行发出结构化 span 事件——包含耗时、token 用量和共享的 `runId` 用于关联追踪。未订阅时零开销,零额外依赖。 - **进程内执行** — 没有子进程开销。所有内容在一个 Node.js 进程中运行。可部署到 Serverless、Docker、CI/CD。 ## 快速开始 @@ -120,6 +121,7 @@ npx tsx examples/01-single-agent.ts | [08 — Gemma 4 本地](examples/08-gemma4-local.ts) | `runTasks()` + `runTeam()` 本地 Gemma 4 via Ollama — 零 API 费用 | | [09 — 结构化输出](examples/09-structured-output.ts) | `outputSchema`(Zod)— 校验 JSON 输出,通过 `result.structured` 获取 | | [10 — 任务重试](examples/10-task-retry.ts) | `maxRetries` / `retryDelayMs` / `retryBackoff` + `task_retry` 进度事件 | +| [11 — 可观测性](examples/11-trace-observability.ts) | `onTrace` 回调 — LLM 调用、工具、任务、智能体的结构化 span 事件 | ## 架构 diff --git a/examples/11-trace-observability.ts b/examples/11-trace-observability.ts new file mode 100644 index 0000000..20b463e --- /dev/null +++ b/examples/11-trace-observability.ts @@ -0,0 +1,133 @@ +/** + * Example 11 — Trace Observability + * + * Demonstrates the `onTrace` callback for lightweight observability. Every LLM + * call, tool execution, task lifecycle, and agent run emits a structured trace + * event with timing data and token usage — giving you full visibility into + * what's happening inside a multi-agent run. + * + * Trace events share a `runId` for correlation, so you can reconstruct the + * full execution timeline. Pipe them into your own logging, OpenTelemetry, or + * dashboard. + * + * Run: + * npx tsx examples/11-trace-observability.ts + * + * Prerequisites: + * ANTHROPIC_API_KEY env var must be set. + */ + +import { OpenMultiAgent } from '../src/index.js' +import type { AgentConfig, TraceEvent } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Agents +// --------------------------------------------------------------------------- + +const researcher: AgentConfig = { + name: 'researcher', + model: 'claude-sonnet-4-6', + systemPrompt: 'You are a research assistant. Provide concise, factual answers.', + maxTurns: 2, +} + +const writer: AgentConfig = { + name: 'writer', + model: 'claude-sonnet-4-6', + systemPrompt: 'You are a technical writer. Summarize research into clear prose.', + maxTurns: 2, +} + +// --------------------------------------------------------------------------- +// Trace handler — log every span with timing +// --------------------------------------------------------------------------- + +function handleTrace(event: TraceEvent): void { + const dur = `${event.durationMs}ms`.padStart(7) + + switch (event.type) { + case 'llm_call': + console.log( + ` [LLM] ${dur} agent=${event.agent} model=${event.model} turn=${event.turn}` + + ` tokens=${event.tokens.input_tokens}in/${event.tokens.output_tokens}out`, + ) + break + case 'tool_call': + console.log( + ` [TOOL] ${dur} agent=${event.agent} tool=${event.tool}` + + ` error=${event.isError}`, + ) + break + case 'task': + console.log( + ` [TASK] ${dur} task="${event.taskTitle}" agent=${event.agent}` + + ` success=${event.success} retries=${event.retries}`, + ) + break + case 'agent': + console.log( + ` [AGENT] ${dur} agent=${event.agent} turns=${event.turns}` + + ` tools=${event.toolCalls} tokens=${event.tokens.input_tokens}in/${event.tokens.output_tokens}out`, + ) + break + } +} + +// --------------------------------------------------------------------------- +// Orchestrator + team +// --------------------------------------------------------------------------- + +const orchestrator = new OpenMultiAgent({ + defaultModel: 'claude-sonnet-4-6', + onTrace: handleTrace, +}) + +const team = orchestrator.createTeam('trace-demo', { + name: 'trace-demo', + agents: [researcher, writer], + sharedMemory: true, +}) + +// --------------------------------------------------------------------------- +// Tasks — researcher first, then writer summarizes +// --------------------------------------------------------------------------- + +const tasks = [ + { + title: 'Research topic', + description: 'List 5 key benefits of TypeScript for large codebases. Be concise.', + assignee: 'researcher', + }, + { + title: 'Write summary', + description: 'Read the research from shared memory and write a 3-sentence summary.', + assignee: 'writer', + dependsOn: ['Research topic'], + }, +] + +// --------------------------------------------------------------------------- +// Run +// --------------------------------------------------------------------------- + +console.log('Trace Observability Example') +console.log('='.repeat(60)) +console.log('Pipeline: research → write (with full trace output)') +console.log('='.repeat(60)) +console.log() + +const result = await orchestrator.runTasks(team, tasks) + +// --------------------------------------------------------------------------- +// Summary +// --------------------------------------------------------------------------- + +console.log('\n' + '='.repeat(60)) +console.log(`Overall success: ${result.success}`) +console.log(`Tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`) + +for (const [name, r] of result.agentResults) { + const icon = r.success ? 'OK ' : 'FAIL' + console.log(` [${icon}] ${name}`) + console.log(` ${r.output.slice(0, 200)}`) +} diff --git a/src/agent/agent.ts b/src/agent/agent.ts index df6b7df..58a1df3 100644 --- a/src/agent/agent.ts +++ b/src/agent/agent.ts @@ -32,6 +32,7 @@ import type { TokenUsage, ToolUseContext, } from '../types.js' +import { emitTrace, generateRunId } from '../utils/trace.js' import type { ToolDefinition as FrameworkToolDefinition, ToolRegistry } from '../tool/framework.js' import type { ToolExecutor } from '../tool/executor.js' import { createAdapter } from '../llm/adapter.js' @@ -158,12 +159,12 @@ export class Agent { * * Use this for one-shot queries where past context is irrelevant. */ - async run(prompt: string): Promise { + async run(prompt: string, runOptions?: Partial): Promise { const messages: LLMMessage[] = [ { role: 'user', content: [{ type: 'text', text: prompt }] }, ] - return this.executeRun(messages) + return this.executeRun(messages, runOptions) } /** @@ -174,6 +175,7 @@ export class Agent { * * Use this for multi-turn interactions. */ + // TODO(#18): accept optional RunOptions to forward trace context async prompt(message: string): Promise { const userMessage: LLMMessage = { role: 'user', @@ -197,6 +199,7 @@ export class Agent { * * Like {@link run}, this does not use or update the persistent history. */ + // TODO(#18): accept optional RunOptions to forward trace context async *stream(prompt: string): AsyncGenerator { const messages: LLMMessage[] = [ { role: 'user', content: [{ type: 'text', text: prompt }] }, @@ -266,15 +269,26 @@ export class Agent { * Shared execution path used by both `run` and `prompt`. * Handles state transitions and error wrapping. */ - private async executeRun(messages: LLMMessage[]): Promise { + private async executeRun( + messages: LLMMessage[], + callerOptions?: Partial, + ): Promise { this.transitionTo('running') + const agentStartMs = Date.now() + try { const runner = await this.getRunner() + const internalOnMessage = (msg: LLMMessage) => { + this.state.messages.push(msg) + callerOptions?.onMessage?.(msg) + } + // Auto-generate runId when onTrace is provided but runId is missing + const needsRunId = callerOptions?.onTrace && !callerOptions.runId const runOptions: RunOptions = { - onMessage: msg => { - this.state.messages.push(msg) - }, + ...callerOptions, + onMessage: internalOnMessage, + ...(needsRunId ? { runId: generateRunId() } : undefined), } const result = await runner.run(messages, runOptions) @@ -282,21 +296,25 @@ export class Agent { // --- Structured output validation --- if (this.config.outputSchema) { - return this.validateStructuredOutput( + const validated = await this.validateStructuredOutput( messages, result, runner, runOptions, ) + this.emitAgentTrace(callerOptions, agentStartMs, validated) + return validated } this.transitionTo('completed') - return this.toAgentRunResult(result, true) + const agentResult = this.toAgentRunResult(result, true) + this.emitAgentTrace(callerOptions, agentStartMs, agentResult) + return agentResult } catch (err) { const error = err instanceof Error ? err : new Error(String(err)) this.transitionToError(error) - return { + const errorResult: AgentRunResult = { success: false, output: error.message, messages: [], @@ -304,9 +322,33 @@ export class Agent { toolCalls: [], structured: undefined, } + this.emitAgentTrace(callerOptions, agentStartMs, errorResult) + return errorResult } } + /** Emit an `agent` trace event if `onTrace` is provided. */ + private emitAgentTrace( + options: Partial | undefined, + startMs: number, + result: AgentRunResult, + ): void { + if (!options?.onTrace) return + const endMs = Date.now() + emitTrace(options.onTrace, { + type: 'agent', + runId: options.runId ?? '', + taskId: options.taskId, + agent: options.traceAgent ?? this.name, + turns: result.messages.filter(m => m.role === 'assistant').length, + tokens: result.tokenUsage, + toolCalls: result.toolCalls.length, + startMs, + endMs, + durationMs: endMs - startMs, + }) + } + /** * Validate agent output against the configured `outputSchema`. * On first validation failure, retry once with error feedback. diff --git a/src/agent/pool.ts b/src/agent/pool.ts index 915f361..aba0eb8 100644 --- a/src/agent/pool.ts +++ b/src/agent/pool.ts @@ -21,6 +21,7 @@ */ import type { AgentRunResult } from '../types.js' +import type { RunOptions } from './runner.js' import type { Agent } from './agent.js' import { Semaphore } from '../utils/semaphore.js' @@ -123,12 +124,16 @@ export class AgentPool { * * @throws {Error} If the agent name is not found. */ - async run(agentName: string, prompt: string): Promise { + async run( + agentName: string, + prompt: string, + runOptions?: Partial, + ): Promise { const agent = this.requireAgent(agentName) await this.semaphore.acquire() try { - return await agent.run(prompt) + return await agent.run(prompt, runOptions) } finally { this.semaphore.release() } @@ -144,6 +149,7 @@ export class AgentPool { * * @param tasks - Array of `{ agent, prompt }` descriptors. */ + // TODO(#18): accept RunOptions per task to forward trace context async runParallel( tasks: ReadonlyArray<{ readonly agent: string; readonly prompt: string }>, ): Promise> { @@ -182,6 +188,7 @@ export class AgentPool { * * @throws {Error} If the pool is empty. */ + // TODO(#18): accept RunOptions to forward trace context async runAny(prompt: string): Promise { const allAgents = this.list() if (allAgents.length === 0) { diff --git a/src/agent/runner.ts b/src/agent/runner.ts index 13667db..113f93c 100644 --- a/src/agent/runner.ts +++ b/src/agent/runner.ts @@ -25,7 +25,9 @@ import type { ToolUseContext, LLMAdapter, LLMChatOptions, + TraceEvent, } from '../types.js' +import { emitTrace } from '../utils/trace.js' import type { ToolRegistry } from '../tool/framework.js' import type { ToolExecutor } from '../tool/executor.js' @@ -76,6 +78,14 @@ export interface RunOptions { readonly onToolResult?: (name: string, result: ToolResult) => void /** Fired after each complete {@link LLMMessage} is appended. */ readonly onMessage?: (message: LLMMessage) => void + /** Trace callback for observability spans. Async callbacks are safe. */ + readonly onTrace?: (event: TraceEvent) => void | Promise + /** Run ID for trace correlation. */ + readonly runId?: string + /** Task ID for trace correlation. */ + readonly taskId?: string + /** Agent name for trace correlation (overrides RunnerOptions.agentName). */ + readonly traceAgent?: string } /** The aggregated result returned when a full run completes. */ @@ -254,7 +264,23 @@ export class AgentRunner { // ------------------------------------------------------------------ // Step 1: Call the LLM and collect the full response for this turn. // ------------------------------------------------------------------ + const llmStartMs = Date.now() const response = await this.adapter.chat(conversationMessages, baseChatOptions) + if (options.onTrace) { + const llmEndMs = Date.now() + emitTrace(options.onTrace, { + type: 'llm_call', + runId: options.runId ?? '', + taskId: options.taskId, + agent: options.traceAgent ?? this.options.agentName ?? 'unknown', + model: this.options.model, + turn: turns, + tokens: response.usage, + startMs: llmStartMs, + endMs: llmEndMs, + durationMs: llmEndMs - llmStartMs, + }) + } totalUsage = addTokenUsage(totalUsage, response.usage) @@ -319,10 +345,25 @@ export class AgentRunner { result = { data: message, isError: true } } - const duration = Date.now() - startTime + const endTime = Date.now() + const duration = endTime - startTime options.onToolResult?.(block.name, result) + if (options.onTrace) { + emitTrace(options.onTrace, { + type: 'tool_call', + runId: options.runId ?? '', + taskId: options.taskId, + agent: options.traceAgent ?? this.options.agentName ?? 'unknown', + tool: block.name, + isError: result.isError ?? false, + startMs: startTime, + endMs: endTime, + durationMs: duration, + }) + } + const record: ToolCallRecord = { toolName: block.name, input: block.input, diff --git a/src/index.ts b/src/index.ts index f624707..312f852 100644 --- a/src/index.ts +++ b/src/index.ts @@ -161,7 +161,18 @@ export type { OrchestratorConfig, OrchestratorEvent, + // Trace + TraceEventType, + TraceEventBase, + TraceEvent, + LLMCallTrace, + ToolCallTrace, + TaskTrace, + AgentTrace, + // Memory MemoryEntry, MemoryStore, } from './types.js' + +export { generateRunId } from './utils/trace.js' diff --git a/src/orchestrator/orchestrator.ts b/src/orchestrator/orchestrator.ts index 848bfde..86f16c0 100644 --- a/src/orchestrator/orchestrator.ts +++ b/src/orchestrator/orchestrator.ts @@ -52,8 +52,10 @@ import type { TeamRunResult, TokenUsage, } from '../types.js' +import type { RunOptions } from '../agent/runner.js' import { Agent } from '../agent/agent.js' import { AgentPool } from '../agent/pool.js' +import { emitTrace, generateRunId } from '../utils/trace.js' import { ToolRegistry } from '../tool/framework.js' import { ToolExecutor } from '../tool/executor.js' import { registerBuiltInTools } from '../tool/built-in/index.js' @@ -260,6 +262,8 @@ interface RunContext { readonly scheduler: Scheduler readonly agentResults: Map readonly config: OrchestratorConfig + /** Trace run ID, present when `onTrace` is configured. */ + readonly runId?: string } /** @@ -338,10 +342,19 @@ async function executeQueue( // Build the prompt: inject shared memory context + task description const prompt = await buildTaskPrompt(task, team) + // Build trace context for this task's agent run + const traceOptions: Partial | undefined = config.onTrace + ? { onTrace: config.onTrace, runId: ctx.runId ?? '', taskId: task.id, traceAgent: assignee } + : undefined + + const taskStartMs = config.onTrace ? Date.now() : 0 + let retryCount = 0 + const result = await executeWithRetry( - () => pool.run(assignee, prompt), + () => pool.run(assignee, prompt, traceOptions), task, (retryData) => { + retryCount++ config.onProgress?.({ type: 'task_retry', task: task.id, @@ -351,6 +364,23 @@ async function executeQueue( }, ) + // Emit task trace + if (config.onTrace) { + const taskEndMs = Date.now() + emitTrace(config.onTrace, { + type: 'task', + runId: ctx.runId ?? '', + taskId: task.id, + taskTitle: task.title, + agent: assignee, + success: result.success, + retries: retryCount, + startMs: taskStartMs, + endMs: taskEndMs, + durationMs: taskEndMs - taskStartMs, + }) + } + ctx.agentResults.set(`${assignee}:${task.id}`, result) if (result.success) { @@ -441,8 +471,8 @@ async function buildTaskPrompt(task: Task, team: Team): Promise { */ export class OpenMultiAgent { private readonly config: Required< - Omit - > & Pick + Omit + > & Pick private readonly teams: Map = new Map() private completedTaskCount = 0 @@ -463,6 +493,7 @@ export class OpenMultiAgent { defaultBaseURL: config.defaultBaseURL, defaultApiKey: config.defaultApiKey, onProgress: config.onProgress, + onTrace: config.onTrace, } } @@ -520,7 +551,11 @@ export class OpenMultiAgent { data: { prompt }, }) - const result = await agent.run(prompt) + const traceOptions: Partial | undefined = this.config.onTrace + ? { onTrace: this.config.onTrace, runId: generateRunId(), traceAgent: config.name } + : undefined + + const result = await agent.run(prompt, traceOptions) this.config.onProgress?.({ type: 'agent_complete', @@ -578,6 +613,7 @@ export class OpenMultiAgent { const decompositionPrompt = this.buildDecompositionPrompt(goal, agentConfigs) const coordinatorAgent = buildAgent(coordinatorConfig) + const runId = this.config.onTrace ? generateRunId() : undefined this.config.onProgress?.({ type: 'agent_start', @@ -585,7 +621,10 @@ export class OpenMultiAgent { data: { phase: 'decomposition', goal }, }) - const decompositionResult = await coordinatorAgent.run(decompositionPrompt) + const decompTraceOptions: Partial | undefined = this.config.onTrace + ? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator' } + : undefined + const decompositionResult = await coordinatorAgent.run(decompositionPrompt, decompTraceOptions) const agentResults = new Map() agentResults.set('coordinator:decompose', decompositionResult) @@ -629,6 +668,7 @@ export class OpenMultiAgent { scheduler, agentResults, config: this.config, + runId, } await executeQueue(queue, ctx) @@ -637,7 +677,10 @@ export class OpenMultiAgent { // Step 5: Coordinator synthesises final result // ------------------------------------------------------------------ const synthesisPrompt = await this.buildSynthesisPrompt(goal, queue.list(), team) - const synthesisResult = await coordinatorAgent.run(synthesisPrompt) + const synthTraceOptions: Partial | undefined = this.config.onTrace + ? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator' } + : undefined + const synthesisResult = await coordinatorAgent.run(synthesisPrompt, synthTraceOptions) agentResults.set('coordinator', synthesisResult) this.config.onProgress?.({ @@ -707,6 +750,7 @@ export class OpenMultiAgent { scheduler, agentResults, config: this.config, + runId: this.config.onTrace ? generateRunId() : undefined, } await executeQueue(queue, ctx) diff --git a/src/types.ts b/src/types.ts index bd2ce64..418d54e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -315,9 +315,69 @@ export interface OrchestratorConfig { readonly defaultProvider?: 'anthropic' | 'copilot' | 'openai' readonly defaultBaseURL?: string readonly defaultApiKey?: string - onProgress?: (event: OrchestratorEvent) => void + readonly onProgress?: (event: OrchestratorEvent) => void + readonly onTrace?: (event: TraceEvent) => void | Promise } +// --------------------------------------------------------------------------- +// Trace events — lightweight observability spans +// --------------------------------------------------------------------------- + +/** Trace event type discriminants. */ +export type TraceEventType = 'llm_call' | 'tool_call' | 'task' | 'agent' + +/** Shared fields present on every trace event. */ +export interface TraceEventBase { + /** Unique identifier for the entire run (runTeam / runTasks / runAgent call). */ + readonly runId: string + readonly type: TraceEventType + /** Unix epoch ms when the span started. */ + readonly startMs: number + /** Unix epoch ms when the span ended. */ + readonly endMs: number + /** Wall-clock duration in milliseconds (`endMs - startMs`). */ + readonly durationMs: number + /** Agent name associated with this span. */ + readonly agent: string + /** Task ID associated with this span. */ + readonly taskId?: string +} + +/** Emitted for each LLM API call (one per agent turn). */ +export interface LLMCallTrace extends TraceEventBase { + readonly type: 'llm_call' + readonly model: string + readonly turn: number + readonly tokens: TokenUsage +} + +/** Emitted for each tool execution. */ +export interface ToolCallTrace extends TraceEventBase { + readonly type: 'tool_call' + readonly tool: string + readonly isError: boolean +} + +/** Emitted when a task completes (wraps the full retry sequence). */ +export interface TaskTrace extends TraceEventBase { + readonly type: 'task' + readonly taskId: string + readonly taskTitle: string + readonly success: boolean + readonly retries: number +} + +/** Emitted when an agent run completes (wraps the full conversation loop). */ +export interface AgentTrace extends TraceEventBase { + readonly type: 'agent' + readonly turns: number + readonly tokens: TokenUsage + readonly toolCalls: number +} + +/** Discriminated union of all trace event types. */ +export type TraceEvent = LLMCallTrace | ToolCallTrace | TaskTrace | AgentTrace + // --------------------------------------------------------------------------- // Memory // --------------------------------------------------------------------------- diff --git a/src/utils/trace.ts b/src/utils/trace.ts new file mode 100644 index 0000000..4f01f5f --- /dev/null +++ b/src/utils/trace.ts @@ -0,0 +1,34 @@ +/** + * @fileoverview Trace emission utilities for the observability layer. + */ + +import { randomUUID } from 'node:crypto' +import type { TraceEvent } from '../types.js' + +/** + * Safely emit a trace event. Swallows callback errors so a broken + * subscriber never crashes agent execution. + */ +export function emitTrace( + fn: ((event: TraceEvent) => void | Promise) | undefined, + event: TraceEvent, +): void { + if (!fn) return + try { + // Guard async callbacks: if fn returns a Promise, swallow its rejection + // so an async onTrace never produces an unhandled promise rejection. + const result = fn(event) as unknown + if (result && typeof (result as Promise).catch === 'function') { + ;(result as Promise).catch(noop) + } + } catch { + // Intentionally swallowed — observability must never break execution. + } +} + +function noop() {} + +/** Generate a unique run ID for trace correlation. */ +export function generateRunId(): string { + return randomUUID() +} diff --git a/tests/trace.test.ts b/tests/trace.test.ts new file mode 100644 index 0000000..fbeb78c --- /dev/null +++ b/tests/trace.test.ts @@ -0,0 +1,453 @@ +import { describe, it, expect, vi } from 'vitest' +import { z } from 'zod' +import { Agent } from '../src/agent/agent.js' +import { AgentRunner, type RunOptions } from '../src/agent/runner.js' +import { ToolRegistry, defineTool } from '../src/tool/framework.js' +import { ToolExecutor } from '../src/tool/executor.js' +import { executeWithRetry } from '../src/orchestrator/orchestrator.js' +import { emitTrace, generateRunId } from '../src/utils/trace.js' +import { createTask } from '../src/task/task.js' +import type { + AgentConfig, + AgentRunResult, + LLMAdapter, + LLMResponse, + TraceEvent, +} from '../src/types.js' + +// --------------------------------------------------------------------------- +// Mock adapters +// --------------------------------------------------------------------------- + +function mockAdapter(responses: LLMResponse[]): LLMAdapter { + let callIndex = 0 + return { + name: 'mock', + async chat() { + return responses[callIndex++]! + }, + async *stream() { + /* unused */ + }, + } +} + +function textResponse(text: string): LLMResponse { + return { + id: `resp-${Math.random().toString(36).slice(2)}`, + content: [{ type: 'text' as const, text }], + model: 'mock-model', + stop_reason: 'end_turn', + usage: { input_tokens: 10, output_tokens: 20 }, + } +} + +function toolUseResponse(toolName: string, input: Record): LLMResponse { + return { + id: `resp-${Math.random().toString(36).slice(2)}`, + content: [ + { + type: 'tool_use' as const, + id: `tu-${Math.random().toString(36).slice(2)}`, + name: toolName, + input, + }, + ], + model: 'mock-model', + stop_reason: 'tool_use', + usage: { input_tokens: 15, output_tokens: 25 }, + } +} + +function buildMockAgent( + config: AgentConfig, + responses: LLMResponse[], + registry?: ToolRegistry, + executor?: ToolExecutor, +): Agent { + const reg = registry ?? new ToolRegistry() + const exec = executor ?? new ToolExecutor(reg) + const adapter = mockAdapter(responses) + const agent = new Agent(config, reg, exec) + + const runner = new AgentRunner(adapter, reg, exec, { + model: config.model, + systemPrompt: config.systemPrompt, + maxTurns: config.maxTurns, + maxTokens: config.maxTokens, + temperature: config.temperature, + agentName: config.name, + }) + ;(agent as any).runner = runner + + return agent +} + +// --------------------------------------------------------------------------- +// emitTrace helper +// --------------------------------------------------------------------------- + +describe('emitTrace', () => { + it('does nothing when fn is undefined', () => { + // Should not throw + emitTrace(undefined, { + type: 'agent', + runId: 'r1', + agent: 'a', + turns: 1, + tokens: { input_tokens: 0, output_tokens: 0 }, + toolCalls: 0, + startMs: 0, + endMs: 0, + durationMs: 0, + }) + }) + + it('calls fn with the event', () => { + const fn = vi.fn() + const event: TraceEvent = { + type: 'agent', + runId: 'r1', + agent: 'a', + turns: 1, + tokens: { input_tokens: 0, output_tokens: 0 }, + toolCalls: 0, + startMs: 0, + endMs: 0, + durationMs: 0, + } + emitTrace(fn, event) + expect(fn).toHaveBeenCalledWith(event) + }) + + it('swallows errors thrown by callback', () => { + const fn = () => { throw new Error('boom') } + expect(() => + emitTrace(fn, { + type: 'agent', + runId: 'r1', + agent: 'a', + turns: 1, + tokens: { input_tokens: 0, output_tokens: 0 }, + toolCalls: 0, + startMs: 0, + endMs: 0, + durationMs: 0, + }), + ).not.toThrow() + }) + + it('swallows rejected promises from async callbacks', async () => { + // An async onTrace that rejects should not produce unhandled rejection + const fn = async () => { throw new Error('async boom') } + emitTrace(fn as unknown as (event: TraceEvent) => void, { + type: 'agent', + runId: 'r1', + agent: 'a', + turns: 1, + tokens: { input_tokens: 0, output_tokens: 0 }, + toolCalls: 0, + startMs: 0, + endMs: 0, + durationMs: 0, + }) + // If the rejection is not caught, vitest will fail with unhandled rejection. + // Give the microtask queue a tick to surface any unhandled rejection. + await new Promise(resolve => setTimeout(resolve, 10)) + }) +}) + +describe('generateRunId', () => { + it('returns a UUID string', () => { + const id = generateRunId() + expect(id).toMatch(/^[0-9a-f-]{36}$/) + }) + + it('returns unique IDs', () => { + const ids = new Set(Array.from({ length: 100 }, generateRunId)) + expect(ids.size).toBe(100) + }) +}) + +// --------------------------------------------------------------------------- +// AgentRunner trace events +// --------------------------------------------------------------------------- + +describe('AgentRunner trace events', () => { + it('emits llm_call trace for each LLM turn', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + const executor = new ToolExecutor(registry) + const adapter = mockAdapter([textResponse('Hello!')]) + + const runner = new AgentRunner(adapter, registry, executor, { + model: 'test-model', + agentName: 'test-agent', + }) + + const runOptions: RunOptions = { + onTrace: (e) => traces.push(e), + runId: 'run-1', + traceAgent: 'test-agent', + } + + await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'hi' }] }], + runOptions, + ) + + const llmTraces = traces.filter(t => t.type === 'llm_call') + expect(llmTraces).toHaveLength(1) + + const llm = llmTraces[0]! + expect(llm.type).toBe('llm_call') + expect(llm.runId).toBe('run-1') + expect(llm.agent).toBe('test-agent') + expect(llm.model).toBe('test-model') + expect(llm.turn).toBe(1) + expect(llm.tokens).toEqual({ input_tokens: 10, output_tokens: 20 }) + expect(llm.durationMs).toBeGreaterThanOrEqual(0) + expect(llm.startMs).toBeLessThanOrEqual(llm.endMs) + }) + + it('emits tool_call trace with correct fields', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'echo', + description: 'echoes', + inputSchema: z.object({ msg: z.string() }), + execute: async ({ msg }) => ({ data: msg }), + }), + ) + const executor = new ToolExecutor(registry) + const adapter = mockAdapter([ + toolUseResponse('echo', { msg: 'hello' }), + textResponse('Done'), + ]) + + const runner = new AgentRunner(adapter, registry, executor, { + model: 'test-model', + agentName: 'tooler', + }) + + await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'test' }] }], + { onTrace: (e) => traces.push(e), runId: 'run-2', traceAgent: 'tooler' }, + ) + + const toolTraces = traces.filter(t => t.type === 'tool_call') + expect(toolTraces).toHaveLength(1) + + const tool = toolTraces[0]! + expect(tool.type).toBe('tool_call') + expect(tool.runId).toBe('run-2') + expect(tool.agent).toBe('tooler') + expect(tool.tool).toBe('echo') + expect(tool.isError).toBe(false) + expect(tool.durationMs).toBeGreaterThanOrEqual(0) + }) + + it('tool_call trace has isError: true on tool failure', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'boom', + description: 'fails', + inputSchema: z.object({}), + execute: async () => { throw new Error('fail') }, + }), + ) + const executor = new ToolExecutor(registry) + const adapter = mockAdapter([ + toolUseResponse('boom', {}), + textResponse('Handled'), + ]) + + const runner = new AgentRunner(adapter, registry, executor, { + model: 'test-model', + agentName: 'err-agent', + }) + + await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'test' }] }], + { onTrace: (e) => traces.push(e), runId: 'run-3', traceAgent: 'err-agent' }, + ) + + const toolTraces = traces.filter(t => t.type === 'tool_call') + expect(toolTraces).toHaveLength(1) + expect(toolTraces[0]!.isError).toBe(true) + }) + + it('does not call Date.now for LLM timing when onTrace is absent', async () => { + // This test just verifies no errors occur when onTrace is not provided + const registry = new ToolRegistry() + const executor = new ToolExecutor(registry) + const adapter = mockAdapter([textResponse('hi')]) + + const runner = new AgentRunner(adapter, registry, executor, { + model: 'test-model', + }) + + const result = await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'test' }] }], + {}, + ) + + expect(result.output).toBe('hi') + }) +}) + +// --------------------------------------------------------------------------- +// Agent-level trace events +// --------------------------------------------------------------------------- + +describe('Agent trace events', () => { + it('emits agent trace with turns, tokens, and toolCalls', async () => { + const traces: TraceEvent[] = [] + const config: AgentConfig = { + name: 'my-agent', + model: 'mock-model', + systemPrompt: 'You are a test.', + } + + const agent = buildMockAgent(config, [textResponse('Hello world')]) + + const runOptions: Partial = { + onTrace: (e) => traces.push(e), + runId: 'run-agent-1', + traceAgent: 'my-agent', + } + + const result = await agent.run('Say hello', runOptions) + expect(result.success).toBe(true) + + const agentTraces = traces.filter(t => t.type === 'agent') + expect(agentTraces).toHaveLength(1) + + const at = agentTraces[0]! + expect(at.type).toBe('agent') + expect(at.runId).toBe('run-agent-1') + expect(at.agent).toBe('my-agent') + expect(at.turns).toBe(1) // one assistant message + expect(at.tokens).toEqual({ input_tokens: 10, output_tokens: 20 }) + expect(at.toolCalls).toBe(0) + expect(at.durationMs).toBeGreaterThanOrEqual(0) + }) + + it('all traces share the same runId', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'greet', + description: 'greets', + inputSchema: z.object({ name: z.string() }), + execute: async ({ name }) => ({ data: `Hi ${name}` }), + }), + ) + const executor = new ToolExecutor(registry) + const config: AgentConfig = { + name: 'multi-trace-agent', + model: 'mock-model', + tools: ['greet'], + } + + const agent = buildMockAgent( + config, + [ + toolUseResponse('greet', { name: 'world' }), + textResponse('Done'), + ], + registry, + executor, + ) + + const runId = 'shared-run-id' + await agent.run('test', { + onTrace: (e) => traces.push(e), + runId, + traceAgent: 'multi-trace-agent', + }) + + // Should have: 2 llm_call, 1 tool_call, 1 agent + expect(traces.length).toBeGreaterThanOrEqual(4) + + for (const trace of traces) { + expect(trace.runId).toBe(runId) + } + }) + + it('onTrace error does not break agent execution', async () => { + const config: AgentConfig = { + name: 'resilient-agent', + model: 'mock-model', + } + + const agent = buildMockAgent(config, [textResponse('OK')]) + + const result = await agent.run('test', { + onTrace: () => { throw new Error('callback exploded') }, + runId: 'run-err', + traceAgent: 'resilient-agent', + }) + + // The run should still succeed despite the broken callback + expect(result.success).toBe(true) + expect(result.output).toBe('OK') + }) + + it('per-turn token usage in llm_call traces', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'noop', + description: 'noop', + inputSchema: z.object({}), + execute: async () => ({ data: 'ok' }), + }), + ) + const executor = new ToolExecutor(registry) + + // Two LLM calls: first triggers a tool, second is the final response + const resp1: LLMResponse = { + id: 'r1', + content: [{ type: 'tool_use', id: 'tu1', name: 'noop', input: {} }], + model: 'mock-model', + stop_reason: 'tool_use', + usage: { input_tokens: 100, output_tokens: 50 }, + } + const resp2: LLMResponse = { + id: 'r2', + content: [{ type: 'text', text: 'Final answer' }], + model: 'mock-model', + stop_reason: 'end_turn', + usage: { input_tokens: 200, output_tokens: 100 }, + } + + const adapter = mockAdapter([resp1, resp2]) + const runner = new AgentRunner(adapter, registry, executor, { + model: 'mock-model', + agentName: 'token-agent', + }) + + await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'go' }] }], + { onTrace: (e) => traces.push(e), runId: 'run-tok', traceAgent: 'token-agent' }, + ) + + const llmTraces = traces.filter(t => t.type === 'llm_call') + expect(llmTraces).toHaveLength(2) + + // Each trace carries its own turn's token usage, not the aggregate + expect(llmTraces[0]!.tokens).toEqual({ input_tokens: 100, output_tokens: 50 }) + expect(llmTraces[1]!.tokens).toEqual({ input_tokens: 200, output_tokens: 100 }) + + // Turn numbers should be sequential + expect(llmTraces[0]!.turn).toBe(1) + expect(llmTraces[1]!.turn).toBe(2) + }) +}) From 2253122515f72cdd2fed91b0e5bcf4fa08e51fec Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 17:32:17 +0800 Subject: [PATCH 36/38] docs: reposition README around TypeScript-native, goal-to-result, lightweight differentiators --- README.md | 12 ++++++------ README_zh.md | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 7c36d83..45d8db6 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # Open Multi-Agent -Build AI agent teams that decompose goals into tasks automatically. Define agents with roles and tools, describe a goal — the framework plans the task graph, schedules dependencies, and runs everything in parallel. +TypeScript framework for multi-agent orchestration. One `runTeam()` call from goal to result — the framework decomposes it into tasks, resolves dependencies, and runs agents in parallel. -3 runtime dependencies. 27 source files. One `runTeam()` call from goal to result. +3 runtime dependencies · 27 source files · Deploys anywhere Node.js runs [![GitHub stars](https://img.shields.io/github/stars/JackChen-me/open-multi-agent)](https://github.com/JackChen-me/open-multi-agent/stargazers) [![license](https://img.shields.io/github/license/JackChen-me/open-multi-agent)](./LICENSE) @@ -12,14 +12,14 @@ Build AI agent teams that decompose goals into tasks automatically. Define agent ## Why Open Multi-Agent? -- **Auto Task Decomposition** — Describe a goal in plain text. A built-in coordinator agent breaks it into a task DAG with dependencies and assignees — no manual orchestration needed. -- **Multi-Agent Teams** — Define agents with different roles, tools, and even different models. They collaborate through a message bus and shared memory. -- **Task DAG Scheduling** — Tasks have dependencies. The framework resolves them topologically — dependent tasks wait, independent tasks run in parallel. +- **Goal In, Result Out** — `runTeam(team, "Build a REST API")`. A coordinator agent auto-decomposes the goal into a task DAG with dependencies and assignees, runs independent tasks in parallel, and synthesizes the final output. No manual task definitions or graph wiring required. +- **TypeScript-Native** — Built for the Node.js ecosystem. `npm install`, import, run. No Python runtime, no subprocess bridge, no sidecar services. Embed in Express, Next.js, serverless functions, or CI/CD pipelines. +- **Auditable and Lightweight** — 3 runtime dependencies (`@anthropic-ai/sdk`, `openai`, `zod`). 27 source files. The entire codebase is readable in an afternoon. - **Model Agnostic** — Claude, GPT, Gemma 4, and local models (Ollama, vLLM, LM Studio) in the same team. Swap models per agent via `baseURL`. +- **Multi-Agent Collaboration** — Agents with different roles, tools, and models collaborate through a message bus and shared memory. - **Structured Output** — Add `outputSchema` (Zod) to any agent. Output is parsed as JSON, validated, and auto-retried once on failure. Access typed results via `result.structured`. - **Task Retry** — Set `maxRetries` on tasks for automatic retry with exponential backoff. Failed attempts accumulate token usage for accurate billing. - **Observability** — Optional `onTrace` callback emits structured spans for every LLM call, tool execution, task, and agent run — with timing, token usage, and a shared `runId` for correlation. Zero overhead when not subscribed, zero extra dependencies. -- **In-Process Execution** — No subprocess overhead. Everything runs in one Node.js process. Deploy to serverless, Docker, CI/CD. ## Quick Start diff --git a/README_zh.md b/README_zh.md index 458d6de..a419674 100644 --- a/README_zh.md +++ b/README_zh.md @@ -1,8 +1,8 @@ # Open Multi-Agent -构建能自动拆解目标的 AI 智能体团队。定义智能体的角色和工具,描述一个目标——框架自动规划任务图、调度依赖、并行执行。 +TypeScript 多智能体编排框架。一次 `runTeam()` 调用从目标到结果——框架自动拆解任务、解析依赖、并行执行。 -3 个运行时依赖,27 个源文件,一次 `runTeam()` 调用从目标到结果。 +3 个运行时依赖 · 27 个源文件 · Node.js 能跑的地方都能部署 [![GitHub stars](https://img.shields.io/github/stars/JackChen-me/open-multi-agent)](https://github.com/JackChen-me/open-multi-agent/stargazers) [![license](https://img.shields.io/github/license/JackChen-me/open-multi-agent)](./LICENSE) @@ -12,14 +12,14 @@ ## 为什么选择 Open Multi-Agent? -- **自动任务拆解** — 用自然语言描述目标,内置的协调者智能体自动将其拆解为带依赖关系和分配的任务图——无需手动编排。 -- **多智能体团队** — 定义不同角色、工具甚至不同模型的智能体。它们通过消息总线和共享内存协作。 -- **任务 DAG 调度** — 任务之间存在依赖关系。框架进行拓扑排序——有依赖的任务等待,无依赖的任务并行执行。 +- **目标进,结果出** — `runTeam(team, "构建一个 REST API")`。协调者智能体自动将目标拆解为带依赖关系的任务图,分配给对应智能体,独立任务并行执行,最终合成输出。无需手动定义任务或编排流程图。 +- **TypeScript 原生** — 为 Node.js 生态而生。`npm install` 即用,无需 Python 运行时、无子进程桥接、无额外基础设施。可嵌入 Express、Next.js、Serverless 函数或 CI/CD 流水线。 +- **可审计、极轻量** — 3 个运行时依赖(`@anthropic-ai/sdk`、`openai`、`zod`),27 个源文件。一个下午就能读完全部源码。 - **模型无关** — Claude、GPT、Gemma 4 和本地模型(Ollama、vLLM、LM Studio)可以在同一个团队中使用。通过 `baseURL` 即可接入任何 OpenAI 兼容服务。 +- **多智能体协作** — 定义不同角色、工具和模型的智能体,通过消息总线和共享内存协作。 - **结构化输出** — 为任意智能体添加 `outputSchema`(Zod),输出自动解析为 JSON 并校验,校验失败自动重试一次。通过 `result.structured` 获取类型化结果。 - **任务重试** — 为任务设置 `maxRetries`,失败时自动指数退避重试。所有尝试的 token 用量累计,确保计费准确。 - **可观测性** — 可选的 `onTrace` 回调为每次 LLM 调用、工具执行、任务和智能体运行发出结构化 span 事件——包含耗时、token 用量和共享的 `runId` 用于关联追踪。未订阅时零开销,零额外依赖。 -- **进程内执行** — 没有子进程开销。所有内容在一个 Node.js 进程中运行。可部署到 Serverless、Docker、CI/CD。 ## 快速开始 From fea9df3f7acaa460c80802991dca4a0f7680bf90 Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 17:38:41 +0800 Subject: [PATCH 37/38] chore: add coverage/ to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 523e756..f321a49 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ node_modules/ dist/ +coverage/ *.tgz .DS_Store promo-*.md From 071d5dce61d48ba27b3f3d3efafe7761422830c7 Mon Sep 17 00:00:00 2001 From: JackChen Date: Fri, 3 Apr 2026 19:18:52 +0800 Subject: [PATCH 38/38] docs: add Latent Space mention to README --- README.md | 2 +- README_zh.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 45d8db6..d9b5d39 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ TypeScript framework for multi-agent orchestration. One `runTeam()` call from goal to result — the framework decomposes it into tasks, resolves dependencies, and runs agents in parallel. -3 runtime dependencies · 27 source files · Deploys anywhere Node.js runs +3 runtime dependencies · 27 source files · Deploys anywhere Node.js runs · Mentioned in [Latent Space](https://www.latent.space/p/ainews-a-quiet-april-fools) AI News [![GitHub stars](https://img.shields.io/github/stars/JackChen-me/open-multi-agent)](https://github.com/JackChen-me/open-multi-agent/stargazers) [![license](https://img.shields.io/github/license/JackChen-me/open-multi-agent)](./LICENSE) diff --git a/README_zh.md b/README_zh.md index a419674..a8b680c 100644 --- a/README_zh.md +++ b/README_zh.md @@ -2,7 +2,7 @@ TypeScript 多智能体编排框架。一次 `runTeam()` 调用从目标到结果——框架自动拆解任务、解析依赖、并行执行。 -3 个运行时依赖 · 27 个源文件 · Node.js 能跑的地方都能部署 +3 个运行时依赖 · 27 个源文件 · Node.js 能跑的地方都能部署 · 被 [Latent Space](https://www.latent.space/p/ainews-a-quiet-april-fools) AI News 提及(AI 工程领域头部 Newsletter,17 万+订阅者) [![GitHub stars](https://img.shields.io/github/stars/JackChen-me/open-multi-agent)](https://github.com/JackChen-me/open-multi-agent/stargazers) [![license](https://img.shields.io/github/license/JackChen-me/open-multi-agent)](./LICENSE)