diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..8f43f71 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,40 @@ +--- +name: Bug Report +about: Report a bug to help us improve +title: "[Bug] " +labels: bug +assignees: '' +--- + +## Describe the bug + +A clear and concise description of what the bug is. + +## To Reproduce + +Steps to reproduce the behavior: + +1. Configure agent with '...' +2. Call `runTeam(...)` with '...' +3. See error + +## Expected behavior + +A clear description of what you expected to happen. + +## Error output + +``` +Paste any error messages or logs here +``` + +## Environment + +- OS: [e.g. macOS 14, Ubuntu 22.04] +- Node.js version: [e.g. 20.11] +- Package version: [e.g. 0.1.0] +- LLM provider: [e.g. Anthropic, OpenAI] + +## Additional context + +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..c31759e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,23 @@ +--- +name: Feature Request +about: Suggest an idea for this project +title: "[Feature] " +labels: enhancement +assignees: '' +--- + +## Problem + +A clear description of the problem or limitation you're experiencing. + +## Proposed Solution + +Describe what you'd like to happen. + +## Alternatives Considered + +Any alternative solutions or features you've considered. + +## Additional context + +Add any other context, code examples, or screenshots about the feature request here. diff --git a/.gitignore b/.gitignore index 523e756..f321a49 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ node_modules/ dist/ +coverage/ *.tgz .DS_Store promo-*.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..6cbeb45 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,80 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Commands + +```bash +npm run build # Compile TypeScript (src/ → dist/) +npm run dev # Watch mode compilation +npm run lint # Type-check only (tsc --noEmit) +npm test # Run all tests (vitest run) +npm run test:watch # Vitest watch mode +``` + +Tests live in `tests/` (vitest). Examples in `examples/` are standalone scripts requiring API keys (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`). + +## Architecture + +ES module TypeScript framework for multi-agent orchestration. Three runtime dependencies: `@anthropic-ai/sdk`, `openai`, `zod`. + +### Core Execution Flow + +**`OpenMultiAgent`** (`src/orchestrator/orchestrator.ts`) is the top-level public API with three execution modes: + +1. **`runAgent(config, prompt)`** — single agent, one-shot +2. **`runTeam(team, goal)`** — automatic orchestration: a temporary "coordinator" agent decomposes the goal into a task DAG via LLM call, then tasks execute in dependency order +3. **`runTasks(team, tasks)`** — explicit task pipeline with user-defined dependencies + +### The Coordinator Pattern (runTeam) + +This is the framework's key feature. When `runTeam()` is called: +1. A coordinator agent receives the goal + agent roster and produces a JSON task array (title, description, assignee, dependsOn) +2. `TaskQueue` resolves dependencies topologically — independent tasks run in parallel, dependent tasks wait +3. `Scheduler` auto-assigns any unassigned tasks (strategies: `dependency-first` default, `round-robin`, `least-busy`, `capability-match`) +4. Each task result is written to `SharedMemory` so subsequent agents see prior results +5. The coordinator synthesizes all task results into a final output + +### Layer Map + +| Layer | Files | Responsibility | +|-------|-------|----------------| +| Orchestrator | `orchestrator/orchestrator.ts`, `orchestrator/scheduler.ts` | Top-level API, task decomposition, coordinator pattern | +| Team | `team/team.ts`, `team/messaging.ts` | Agent roster, MessageBus (point-to-point + broadcast), SharedMemory binding | +| Agent | `agent/agent.ts`, `agent/runner.ts`, `agent/pool.ts`, `agent/structured-output.ts` | Agent lifecycle (idle→running→completed/error), conversation loop, concurrency pool with Semaphore, structured output validation | +| Task | `task/queue.ts`, `task/task.ts` | Dependency-aware queue, auto-unblock on completion, cascade failure to dependents | +| Tool | `tool/framework.ts`, `tool/executor.ts`, `tool/built-in/` | `defineTool()` with Zod schemas, ToolRegistry, parallel batch execution with concurrency semaphore | +| LLM | `llm/adapter.ts`, `llm/anthropic.ts`, `llm/openai.ts` | `LLMAdapter` interface (`chat` + `stream`), factory `createAdapter()` | +| Memory | `memory/shared.ts`, `memory/store.ts` | Namespaced key-value store (`agentName/key`), markdown summary injection into prompts | +| Types | `types.ts` | All interfaces in one file to avoid circular deps | +| Exports | `index.ts` | Public API surface | + +### Agent Conversation Loop (AgentRunner) + +`AgentRunner.run()`: send messages → extract tool-use blocks → execute tools in parallel batch → append results → loop until `end_turn` or `maxTurns` exhausted. Accumulates `TokenUsage` across all turns. + +### Concurrency Control + +Two independent semaphores: `AgentPool` (max concurrent agent runs, default 5) and `ToolExecutor` (max concurrent tool calls, default 4). + +### Structured Output + +Optional `outputSchema` (Zod) on `AgentConfig`. When set, the agent's final output is parsed as JSON and validated. On validation failure, one retry with error feedback is attempted. Validated data is available via `result.structured`. Logic lives in `agent/structured-output.ts`, wired into `Agent.executeRun()`. + +### Task Retry + +Optional `maxRetries`, `retryDelayMs`, `retryBackoff` on task config (used via `runTasks()`). `executeWithRetry()` in `orchestrator.ts` handles the retry loop with exponential backoff (capped at 30s). Token usage is accumulated across all attempts. Emits `task_retry` event via `onProgress`. + +### Error Handling + +- Tool errors → caught, returned as `ToolResult(isError: true)`, never thrown +- Task failures → retry if `maxRetries > 0`, then cascade to all dependents; independent tasks continue +- LLM API errors → propagate to caller + +### Built-in Tools + +`bash`, `file_read`, `file_write`, `file_edit`, `grep` — registered via `registerBuiltInTools(registry)`. + +### Adding an LLM Adapter + +Implement `LLMAdapter` interface with `chat(messages, options)` and `stream(messages, options)`, then register in `createAdapter()` factory in `src/llm/adapter.ts`. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..1036d4e --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,48 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a positive experience for everyone, regardless of background or +identity. + +## Our Standards + +Examples of behavior that contributes to a positive environment: + +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive feedback +- Focusing on what is best for the community +- Showing empathy towards other community members + +Examples of unacceptable behavior: + +- Trolling, insulting or derogatory comments, and personal attacks +- Public or private unwelcome conduct +- Publishing others' private information without explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate or harmful. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. + +## Enforcement + +Instances of unacceptable behavior may be reported to the community leaders +responsible for enforcement at **jack@yuanasi.com**. All complaints will be +reviewed and investigated promptly and fairly. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org), +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html). diff --git a/DECISIONS.md b/DECISIONS.md new file mode 100644 index 0000000..a16151f --- /dev/null +++ b/DECISIONS.md @@ -0,0 +1,43 @@ +# Architecture Decisions + +This document records deliberate "won't do" decisions for the project. These are features we evaluated and chose NOT to implement — not because they're bad ideas, but because they conflict with our positioning as the **simplest multi-agent framework**. + +If you're considering a PR in any of these areas, please open a discussion first. + +## Won't Do + +### 1. Agent Handoffs + +**What**: Agent A transfers an in-progress conversation to Agent B (like OpenAI Agents SDK `handoff()`). + +**Why not**: Handoffs are a different paradigm from our task-based model. Our tasks have clear boundaries — one agent, one task, one result. Handoffs blur those boundaries and add state-transfer complexity. Users who need handoffs likely need a different framework (OpenAI Agents SDK is purpose-built for this). + +### 2. State Persistence / Checkpointing + +**What**: Save workflow state to a database so long-running workflows can resume after crashes (like LangGraph checkpointing). + +**Why not**: Requires a storage backend (SQLite, Redis, Postgres), schema migrations, and serialization logic. This is enterprise infrastructure — it triples the complexity surface. Our target users run workflows that complete in seconds to minutes, not hours. If you need checkpointing, LangGraph is the right tool. + +**Related**: Closing #20 with this rationale. + +### 3. A2A Protocol (Agent-to-Agent) + +**What**: Google's open protocol for agents on different servers to discover and communicate with each other. + +**Why not**: Too early — the spec is still evolving and adoption is minimal. Our users run agents in a single process, not across distributed services. If A2A matures and there's real demand, we can revisit. Today it would add complexity for zero practical benefit. + +### 4. MCP Integration (Model Context Protocol) + +**What**: Anthropic's protocol for connecting LLMs to external tools and data sources. + +**Why not**: MCP is valuable but targets a different layer. Our `defineTool()` API already lets users wrap any external service as a tool in ~10 lines of code. Adding MCP would mean maintaining protocol compatibility, transport layers, and tool discovery — complexity that serves tool platform builders, not our target users who just want to run agent teams. + +### 5. Dashboard / Visualization + +**What**: Built-in web UI to visualize task DAGs, agent activity, and token usage. + +**Why not**: We expose data, we don't build UI. The `onProgress` callback and upcoming `onTrace` (#18) give users all the raw data. They can pipe it into Grafana, build a custom dashboard, or use console logs. Shipping a web UI means owning a frontend stack, which is outside our scope. + +--- + +*Last updated: 2026-04-03* diff --git a/README.md b/README.md index e3e4bef..35d8715 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # Open Multi-Agent -Build AI agent teams that decompose goals into tasks automatically. Define agents with roles and tools, describe a goal — the framework plans the task graph, schedules dependencies, and runs everything in parallel. +TypeScript framework for multi-agent orchestration. One `runTeam()` call from goal to result — the framework decomposes it into tasks, resolves dependencies, and runs agents in parallel. -3 runtime dependencies. 27 source files. One `runTeam()` call from goal to result. +3 runtime dependencies · 27 source files · Deploys anywhere Node.js runs · Mentioned in [Latent Space](https://www.latent.space/p/ainews-a-quiet-april-fools) AI News [![GitHub stars](https://img.shields.io/github/stars/JackChen-me/open-multi-agent)](https://github.com/JackChen-me/open-multi-agent/stargazers) [![license](https://img.shields.io/github/license/JackChen-me/open-multi-agent)](./LICENSE) @@ -12,11 +12,14 @@ Build AI agent teams that decompose goals into tasks automatically. Define agent ## Why Open Multi-Agent? -- **Auto Task Decomposition** — Describe a goal in plain text. A built-in coordinator agent breaks it into a task DAG with dependencies and assignees — no manual orchestration needed. -- **Multi-Agent Teams** — Define agents with different roles, tools, and even different models. They collaborate through a message bus and shared memory. -- **Task DAG Scheduling** — Tasks have dependencies. The framework resolves them topologically — dependent tasks wait, independent tasks run in parallel. -- **Model Agnostic** — Claude, GPT, Gemini, and local models (Ollama, vLLM, LM Studio) in the same team. Swap models per agent via `baseURL`. -- **In-Process Execution** — No subprocess overhead. Everything runs in one Node.js process. Deploy to serverless, Docker, CI/CD. +- **Goal In, Result Out** — `runTeam(team, "Build a REST API")`. A coordinator agent auto-decomposes the goal into a task DAG with dependencies and assignees, runs independent tasks in parallel, and synthesizes the final output. No manual task definitions or graph wiring required. +- **TypeScript-Native** — Built for the Node.js ecosystem. `npm install`, import, run. No Python runtime, no subprocess bridge, no sidecar services. Embed in Express, Next.js, serverless functions, or CI/CD pipelines. +- **Auditable and Lightweight** — 3 runtime dependencies (`@anthropic-ai/sdk`, `openai`, `zod`). 27 source files. The entire codebase is readable in an afternoon. +- **Model Agnostic** — Claude, GPT, Gemma 4, and local models (Ollama, vLLM, LM Studio) in the same team. Swap models per agent via `baseURL`. +- **Multi-Agent Collaboration** — Agents with different roles, tools, and models collaborate through a message bus and shared memory. +- **Structured Output** — Add `outputSchema` (Zod) to any agent. Output is parsed as JSON, validated, and auto-retried once on failure. Access typed results via `result.structured`. +- **Task Retry** — Set `maxRetries` on tasks for automatic retry with exponential backoff. Failed attempts accumulate token usage for accurate billing. +- **Observability** — Optional `onTrace` callback emits structured spans for every LLM call, tool execution, task, and agent run — with timing, token usage, and a shared `runId` for correlation. Zero overhead when not subscribed, zero extra dependencies. ## Quick Start @@ -103,12 +106,6 @@ Tokens: 12847 output tokens | Auto-orchestrated team | `runTeam()` | Give a goal, framework plans and executes | | Explicit pipeline | `runTasks()` | You define the task graph and assignments | -## Contributors - - - - - ## Examples All examples are runnable scripts in [`examples/`](./examples/). Run any of them with `npx tsx`: @@ -126,6 +123,11 @@ npx tsx examples/01-single-agent.ts | [05 — Copilot](examples/05-copilot-test.ts) | GitHub Copilot as an LLM provider | | [06 — Local Model](examples/06-local-model.ts) | Ollama + Claude in one pipeline via `baseURL` (works with vLLM, LM Studio, etc.) | | [07 — Fan-Out / Aggregate](examples/07-fan-out-aggregate.ts) | `runParallel()` MapReduce — 3 analysts in parallel, then synthesize | +| [08 — Gemma 4 Local](examples/08-gemma4-local.ts) | `runTasks()` + `runTeam()` with local Gemma 4 via Ollama — zero API cost | +| [09 — Structured Output](examples/09-structured-output.ts) | `outputSchema` (Zod) on AgentConfig — validated JSON via `result.structured` | +| [10 — Task Retry](examples/10-task-retry.ts) | `maxRetries` / `retryDelayMs` / `retryBackoff` with `task_retry` progress events | +| [11 — Trace Observability](examples/11-trace-observability.ts) | `onTrace` callback — structured spans for LLM calls, tools, tasks, and agents | +| [12 — Grok](examples/12-grok.ts) | Same as example 02 (`runTeam()` collaboration) with Grok (`XAI_API_KEY`) | ## Architecture @@ -185,11 +187,27 @@ npx tsx examples/01-single-agent.ts |----------|--------|---------|--------| | Anthropic (Claude) | `provider: 'anthropic'` | `ANTHROPIC_API_KEY` | Verified | | OpenAI (GPT) | `provider: 'openai'` | `OPENAI_API_KEY` | Verified | +| Grok (xAI) | `provider: 'grok'` | `XAI_API_KEY` | Verified | | GitHub Copilot | `provider: 'copilot'` | `GITHUB_TOKEN` | Verified | | Gemini | `provider: 'gemini'` | `GEMINI_API_KEY` | Verified | | Ollama / vLLM / LM Studio | `provider: 'openai'` + `baseURL` | — | Verified | -Any OpenAI-compatible API should work via `provider: 'openai'` + `baseURL` (DeepSeek, Groq, Mistral, Qwen, MiniMax, etc.). These providers have not been fully verified yet — contributions welcome via [#25](https://github.com/JackChen-me/open-multi-agent/issues/25). +Verified local models with tool-calling: **Gemma 4** (see [example 08](examples/08-gemma4-local.ts)). + +Any OpenAI-compatible API should work via `provider: 'openai'` + `baseURL` (DeepSeek, Groq, Mistral, Qwen, MiniMax, etc.). **Grok now has first-class support** via `provider: 'grok'`. + +### LLM Configuration Examples + +```typescript +const grokAgent: AgentConfig = { + name: 'grok-agent', + provider: 'grok', + model: 'grok-4', + systemPrompt: 'You are a helpful assistant.', +} +``` + +(Set your `XAI_API_KEY` environment variable — no `baseURL` needed anymore.) ## Contributing @@ -199,9 +217,25 @@ Issues, feature requests, and PRs are welcome. Some areas where contributions wo - **Examples** — Real-world workflows and use cases. - **Documentation** — Guides, tutorials, and API docs. +## Author + +> JackChen — Ex PM (¥100M+ revenue), now indie builder. Follow on [X](https://x.com/JackChen_x) for AI Agent insights. + +## Contributors + + + + + ## Star History -[![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260402b)](https://star-history.com/#JackChen-me/open-multi-agent&Date) + + + + + Star History Chart + + ## License diff --git a/README_zh.md b/README_zh.md index 6a0b46d..7a4acc1 100644 --- a/README_zh.md +++ b/README_zh.md @@ -1,8 +1,8 @@ # Open Multi-Agent -构建能自动拆解目标的 AI 智能体团队。定义智能体的角色和工具,描述一个目标——框架自动规划任务图、调度依赖、并行执行。 +TypeScript 多智能体编排框架。一次 `runTeam()` 调用从目标到结果——框架自动拆解任务、解析依赖、并行执行。 -3 个运行时依赖,27 个源文件,一次 `runTeam()` 调用从目标到结果。 +3 个运行时依赖 · 27 个源文件 · Node.js 能跑的地方都能部署 · 被 [Latent Space](https://www.latent.space/p/ainews-a-quiet-april-fools) AI News 提及(AI 工程领域头部 Newsletter,17 万+订阅者) [![GitHub stars](https://img.shields.io/github/stars/JackChen-me/open-multi-agent)](https://github.com/JackChen-me/open-multi-agent/stargazers) [![license](https://img.shields.io/github/license/JackChen-me/open-multi-agent)](./LICENSE) @@ -12,11 +12,14 @@ ## 为什么选择 Open Multi-Agent? -- **自动任务拆解** — 用自然语言描述目标,内置的协调者智能体自动将其拆解为带依赖关系和分配的任务图——无需手动编排。 -- **多智能体团队** — 定义不同角色、工具甚至不同模型的智能体。它们通过消息总线和共享内存协作。 -- **任务 DAG 调度** — 任务之间存在依赖关系。框架进行拓扑排序——有依赖的任务等待,无依赖的任务并行执行。 -- **模型无关** — Claude、GPT、Gemini 和本地模型(Ollama、vLLM、LM Studio)可以在同一个团队中使用。通过 `baseURL` 即可接入任何 OpenAI 兼容服务。 -- **进程内执行** — 没有子进程开销。所有内容在一个 Node.js 进程中运行。可部署到 Serverless、Docker、CI/CD。 +- **目标进,结果出** — `runTeam(team, "构建一个 REST API")`。协调者智能体自动将目标拆解为带依赖关系的任务图,分配给对应智能体,独立任务并行执行,最终合成输出。无需手动定义任务或编排流程图。 +- **TypeScript 原生** — 为 Node.js 生态而生。`npm install` 即用,无需 Python 运行时、无子进程桥接、无额外基础设施。可嵌入 Express、Next.js、Serverless 函数或 CI/CD 流水线。 +- **可审计、极轻量** — 3 个运行时依赖(`@anthropic-ai/sdk`、`openai`、`zod`),27 个源文件。一个下午就能读完全部源码。 +- **模型无关** — Claude、GPT、Gemma 4 和本地模型(Ollama、vLLM、LM Studio)可以在同一个团队中使用。通过 `baseURL` 即可接入任何 OpenAI 兼容服务。 +- **多智能体协作** — 定义不同角色、工具和模型的智能体,通过消息总线和共享内存协作。 +- **结构化输出** — 为任意智能体添加 `outputSchema`(Zod),输出自动解析为 JSON 并校验,校验失败自动重试一次。通过 `result.structured` 获取类型化结果。 +- **任务重试** — 为任务设置 `maxRetries`,失败时自动指数退避重试。所有尝试的 token 用量累计,确保计费准确。 +- **可观测性** — 可选的 `onTrace` 回调为每次 LLM 调用、工具执行、任务和智能体运行发出结构化 span 事件——包含耗时、token 用量和共享的 `runId` 用于关联追踪。未订阅时零开销,零额外依赖。 ## 快速开始 @@ -90,10 +93,6 @@ Success: true Tokens: 12847 output tokens ``` -## 作者 - -> JackChen — 前 WPS 产品经理,现独立创业者。关注小红书[「杰克西|硅基杠杆」](https://www.xiaohongshu.com/user/profile/5a1bdc1e4eacab4aa39ea6d6),持续获取我的 AI Agent 观点和思考。 - ## 三种运行模式 | 模式 | 方法 | 适用场景 | @@ -102,12 +101,6 @@ Tokens: 12847 output tokens | 自动编排团队 | `runTeam()` | 给一个目标,框架自动规划和执行 | | 显式任务管线 | `runTasks()` | 你自己定义任务图和分配 | -## 贡献者 - - - - - ## 示例 所有示例都是可运行脚本,位于 [`examples/`](./examples/) 目录。使用 `npx tsx` 运行: @@ -125,6 +118,10 @@ npx tsx examples/01-single-agent.ts | [05 — Copilot](examples/05-copilot-test.ts) | GitHub Copilot 作为 LLM 提供者 | | [06 — 本地模型](examples/06-local-model.ts) | Ollama + Claude 混合流水线,通过 `baseURL` 接入(兼容 vLLM、LM Studio 等) | | [07 — 扇出聚合](examples/07-fan-out-aggregate.ts) | `runParallel()` MapReduce — 3 个分析师并行,然后综合 | +| [08 — Gemma 4 本地](examples/08-gemma4-local.ts) | `runTasks()` + `runTeam()` 本地 Gemma 4 via Ollama — 零 API 费用 | +| [09 — 结构化输出](examples/09-structured-output.ts) | `outputSchema`(Zod)— 校验 JSON 输出,通过 `result.structured` 获取 | +| [10 — 任务重试](examples/10-task-retry.ts) | `maxRetries` / `retryDelayMs` / `retryBackoff` + `task_retry` 进度事件 | +| [11 — 可观测性](examples/11-trace-observability.ts) | `onTrace` 回调 — LLM 调用、工具、任务、智能体的结构化 span 事件 | ## 架构 @@ -188,6 +185,8 @@ npx tsx examples/01-single-agent.ts | Gemini | `provider: 'gemini'` | `GEMINI_API_KEY` | 已验证 | | Ollama / vLLM / LM Studio | `provider: 'openai'` + `baseURL` | — | 已验证 | +已验证支持 tool-calling 的本地模型:**Gemma 4**(见[示例 08](examples/08-gemma4-local.ts))。 + 任何 OpenAI 兼容 API 均可通过 `provider: 'openai'` + `baseURL` 接入(DeepSeek、Groq、Mistral、Qwen、MiniMax 等)。这些 Provider 尚未完整验证——欢迎通过 [#25](https://github.com/JackChen-me/open-multi-agent/issues/25) 贡献验证。 ## 参与贡献 @@ -198,9 +197,25 @@ npx tsx examples/01-single-agent.ts - **示例** — 真实场景的工作流和用例。 - **文档** — 指南、教程和 API 文档。 +## 作者 + +> JackChen — 前 WPS 产品经理,现独立创业者。关注小红书[「杰克西|硅基杠杆」](https://www.xiaohongshu.com/user/profile/5a1bdc1e4eacab4aa39ea6d6),持续获取我的 AI Agent 观点和思考。 + +## 贡献者 + + + + + ## Star 趋势 -[![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260402b)](https://star-history.com/#JackChen-me/open-multi-agent&Date) + + + + + Star History Chart + + ## 许可证 diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..235d6d9 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,17 @@ +# Security Policy + +## Supported Versions + +| Version | Supported | +|---------|-----------| +| latest | Yes | + +## Reporting a Vulnerability + +If you discover a security vulnerability, please report it responsibly via email: + +**jack@yuanasi.com** + +Please do **not** open a public GitHub issue for security vulnerabilities. + +We will acknowledge receipt within 48 hours and aim to provide a fix or mitigation plan within 7 days. diff --git a/examples/08-gemma4-local.ts b/examples/08-gemma4-local.ts new file mode 100644 index 0000000..0d31853 --- /dev/null +++ b/examples/08-gemma4-local.ts @@ -0,0 +1,192 @@ +/** + * Example 08 — Gemma 4 Local (100% Local, Zero API Cost) + * + * Demonstrates both execution modes with a fully local Gemma 4 model via + * Ollama. No cloud API keys needed — everything runs on your machine. + * + * Part 1 — runTasks(): explicit task pipeline (researcher → summarizer) + * Part 2 — runTeam(): auto-orchestration where Gemma 4 acts as coordinator, + * decomposes the goal into tasks, and synthesises the final result + * + * This is the hardest test for a local model — runTeam() requires it to + * produce valid JSON for task decomposition AND do tool-calling for execution. + * Gemma 4 e2b (5.1B params) handles both reliably. + * + * Run: + * no_proxy=localhost npx tsx examples/08-gemma4-local.ts + * + * Prerequisites: + * 1. Ollama >= 0.20.0 installed and running: https://ollama.com + * 2. Pull the model: ollama pull gemma4:e2b + * (or gemma4:e4b for better quality on machines with more RAM) + * 3. No API keys needed! + * + * Note: The no_proxy=localhost prefix is needed if you have an HTTP proxy + * configured, since the OpenAI SDK would otherwise route Ollama requests + * through the proxy. + */ + +import { OpenMultiAgent } from '../src/index.js' +import type { AgentConfig, OrchestratorEvent, Task } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Configuration — change this to match your Ollama setup +// --------------------------------------------------------------------------- + +// See available tags at https://ollama.com/library/gemma4 +const OLLAMA_MODEL = 'gemma4:e2b' // or 'gemma4:e4b', 'gemma4:26b' +const OLLAMA_BASE_URL = 'http://localhost:11434/v1' +const OUTPUT_DIR = '/tmp/gemma4-demo' + +// --------------------------------------------------------------------------- +// Agents +// --------------------------------------------------------------------------- + +const researcher: AgentConfig = { + name: 'researcher', + model: OLLAMA_MODEL, + provider: 'openai', + baseURL: OLLAMA_BASE_URL, + apiKey: 'ollama', // placeholder — Ollama ignores this, but the OpenAI SDK requires a non-empty value + systemPrompt: `You are a system researcher. Use bash to run non-destructive, +read-only commands (uname -a, sw_vers, df -h, uptime, etc.) and report results. +Use file_write to save reports when asked.`, + tools: ['bash', 'file_write'], + maxTurns: 8, +} + +const summarizer: AgentConfig = { + name: 'summarizer', + model: OLLAMA_MODEL, + provider: 'openai', + baseURL: OLLAMA_BASE_URL, + apiKey: 'ollama', + systemPrompt: `You are a technical writer. Read files and produce concise, +structured Markdown summaries. Use file_write to save reports when asked.`, + tools: ['file_read', 'file_write'], + maxTurns: 4, +} + +// --------------------------------------------------------------------------- +// Progress handler +// --------------------------------------------------------------------------- + +function handleProgress(event: OrchestratorEvent): void { + const ts = new Date().toISOString().slice(11, 23) + switch (event.type) { + case 'task_start': { + const task = event.data as Task | undefined + console.log(`[${ts}] TASK START "${task?.title ?? event.task}" → ${task?.assignee ?? '?'}`) + break + } + case 'task_complete': + console.log(`[${ts}] TASK DONE "${event.task}"`) + break + case 'agent_start': + console.log(`[${ts}] AGENT START ${event.agent}`) + break + case 'agent_complete': + console.log(`[${ts}] AGENT DONE ${event.agent}`) + break + case 'error': + console.error(`[${ts}] ERROR ${event.agent ?? ''} task=${event.task ?? '?'}`) + break + } +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Part 1: runTasks() — Explicit task pipeline +// ═══════════════════════════════════════════════════════════════════════════ + +console.log('Part 1: runTasks() — Explicit Pipeline') +console.log('='.repeat(60)) +console.log(` model → ${OLLAMA_MODEL} via Ollama`) +console.log(` pipeline → researcher gathers info → summarizer writes summary`) +console.log() + +const orchestrator1 = new OpenMultiAgent({ + defaultModel: OLLAMA_MODEL, + maxConcurrency: 1, // local model serves one request at a time + onProgress: handleProgress, +}) + +const team1 = orchestrator1.createTeam('explicit', { + name: 'explicit', + agents: [researcher, summarizer], + sharedMemory: true, +}) + +const tasks = [ + { + title: 'Gather system information', + description: `Use bash to run system info commands (uname -a, sw_vers, sysctl, df -h, uptime). +Then write a structured Markdown report to ${OUTPUT_DIR}/system-report.md with sections: +OS, Hardware, Disk, and Uptime.`, + assignee: 'researcher', + }, + { + title: 'Summarize the report', + description: `Read the file at ${OUTPUT_DIR}/system-report.md. +Produce a concise one-paragraph executive summary of the system information.`, + assignee: 'summarizer', + dependsOn: ['Gather system information'], + }, +] + +const start1 = Date.now() +const result1 = await orchestrator1.runTasks(team1, tasks) + +console.log(`\nSuccess: ${result1.success} Time: ${((Date.now() - start1) / 1000).toFixed(1)}s`) +console.log(`Tokens — input: ${result1.totalTokenUsage.input_tokens}, output: ${result1.totalTokenUsage.output_tokens}`) + +const summary = result1.agentResults.get('summarizer') +if (summary?.success) { + console.log('\nSummary (from local Gemma 4):') + console.log('-'.repeat(60)) + console.log(summary.output) + console.log('-'.repeat(60)) +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Part 2: runTeam() — Auto-orchestration (Gemma 4 as coordinator) +// ═══════════════════════════════════════════════════════════════════════════ + +console.log('\n\nPart 2: runTeam() — Auto-Orchestration') +console.log('='.repeat(60)) +console.log(` coordinator → auto-created by runTeam(), also Gemma 4`) +console.log(` goal → given in natural language, framework plans everything`) +console.log() + +const orchestrator2 = new OpenMultiAgent({ + defaultModel: OLLAMA_MODEL, + defaultProvider: 'openai', + defaultBaseURL: OLLAMA_BASE_URL, + defaultApiKey: 'ollama', + maxConcurrency: 1, + onProgress: handleProgress, +}) + +const team2 = orchestrator2.createTeam('auto', { + name: 'auto', + agents: [researcher, summarizer], + sharedMemory: true, +}) + +const goal = `Check this machine's Node.js version, npm version, and OS info, +then write a short Markdown summary report to /tmp/gemma4-auto/report.md` + +const start2 = Date.now() +const result2 = await orchestrator2.runTeam(team2, goal) + +console.log(`\nSuccess: ${result2.success} Time: ${((Date.now() - start2) / 1000).toFixed(1)}s`) +console.log(`Tokens — input: ${result2.totalTokenUsage.input_tokens}, output: ${result2.totalTokenUsage.output_tokens}`) + +const coordResult = result2.agentResults.get('coordinator') +if (coordResult?.success) { + console.log('\nFinal synthesis (from local Gemma 4 coordinator):') + console.log('-'.repeat(60)) + console.log(coordResult.output) + console.log('-'.repeat(60)) +} + +console.log('\nAll processing done locally. $0 API cost.') diff --git a/examples/09-structured-output.ts b/examples/09-structured-output.ts new file mode 100644 index 0000000..2ffc29e --- /dev/null +++ b/examples/09-structured-output.ts @@ -0,0 +1,73 @@ +/** + * Example 09 — Structured Output + * + * Demonstrates `outputSchema` on AgentConfig. The agent's response is + * automatically parsed as JSON and validated against a Zod schema. + * On validation failure, the framework retries once with error feedback. + * + * The validated result is available via `result.structured`. + * + * Run: + * npx tsx examples/09-structured-output.ts + * + * Prerequisites: + * ANTHROPIC_API_KEY env var must be set. + */ + +import { z } from 'zod' +import { OpenMultiAgent } from '../src/index.js' +import type { AgentConfig } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Define a Zod schema for the expected output +// --------------------------------------------------------------------------- + +const ReviewAnalysis = z.object({ + summary: z.string().describe('One-sentence summary of the review'), + sentiment: z.enum(['positive', 'negative', 'neutral']), + confidence: z.number().min(0).max(1).describe('How confident the analysis is'), + keyTopics: z.array(z.string()).describe('Main topics mentioned in the review'), +}) + +type ReviewAnalysis = z.infer + +// --------------------------------------------------------------------------- +// Agent with outputSchema +// --------------------------------------------------------------------------- + +const analyst: AgentConfig = { + name: 'analyst', + model: 'claude-sonnet-4-6', + systemPrompt: 'You are a product review analyst. Analyze the given review and extract structured insights.', + outputSchema: ReviewAnalysis, +} + +// --------------------------------------------------------------------------- +// Run +// --------------------------------------------------------------------------- + +const orchestrator = new OpenMultiAgent({ defaultModel: 'claude-sonnet-4-6' }) + +const reviews = [ + 'This keyboard is amazing! The mechanical switches feel incredible and the RGB lighting is stunning. Build quality is top-notch. Only downside is the price.', + 'Terrible experience. The product arrived broken, customer support was unhelpful, and the return process took 3 weeks.', + 'It works fine. Nothing special, nothing bad. Does what it says on the box.', +] + +console.log('Analyzing product reviews with structured output...\n') + +for (const review of reviews) { + const result = await orchestrator.runAgent(analyst, `Analyze this review: "${review}"`) + + if (result.structured) { + const data = result.structured as ReviewAnalysis + console.log(`Sentiment: ${data.sentiment} (confidence: ${data.confidence})`) + console.log(`Summary: ${data.summary}`) + console.log(`Topics: ${data.keyTopics.join(', ')}`) + } else { + console.log(`Validation failed. Raw output: ${result.output.slice(0, 100)}`) + } + + console.log(`Tokens: ${result.tokenUsage.input_tokens} in / ${result.tokenUsage.output_tokens} out`) + console.log('---') +} diff --git a/examples/10-task-retry.ts b/examples/10-task-retry.ts new file mode 100644 index 0000000..5f53e5e --- /dev/null +++ b/examples/10-task-retry.ts @@ -0,0 +1,132 @@ +/** + * Example 10 — Task Retry with Exponential Backoff + * + * Demonstrates `maxRetries`, `retryDelayMs`, and `retryBackoff` on task config. + * When a task fails, the framework automatically retries with exponential + * backoff. The `onProgress` callback receives `task_retry` events so you can + * log retry attempts in real time. + * + * Scenario: a two-step pipeline where the first task (data fetch) is configured + * to retry on failure, and the second task (analysis) depends on it. + * + * Run: + * npx tsx examples/10-task-retry.ts + * + * Prerequisites: + * ANTHROPIC_API_KEY env var must be set. + */ + +import { OpenMultiAgent } from '../src/index.js' +import type { AgentConfig, OrchestratorEvent } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Agents +// --------------------------------------------------------------------------- + +const fetcher: AgentConfig = { + name: 'fetcher', + model: 'claude-sonnet-4-6', + systemPrompt: `You are a data-fetching agent. When given a topic, produce a short +JSON summary with 3-5 key facts. Output ONLY valid JSON, no markdown fences. +Example: {"topic":"...", "facts":["fact1","fact2","fact3"]}`, + maxTurns: 2, +} + +const analyst: AgentConfig = { + name: 'analyst', + model: 'claude-sonnet-4-6', + systemPrompt: `You are a data analyst. Read the fetched data from shared memory +and produce a brief analysis (3-4 sentences) highlighting trends or insights.`, + maxTurns: 2, +} + +// --------------------------------------------------------------------------- +// Progress handler — watch for task_retry events +// --------------------------------------------------------------------------- + +function handleProgress(event: OrchestratorEvent): void { + const ts = new Date().toISOString().slice(11, 23) + + switch (event.type) { + case 'task_start': + console.log(`[${ts}] TASK START "${event.task}" (agent: ${event.agent})`) + break + case 'task_complete': + console.log(`[${ts}] TASK DONE "${event.task}"`) + break + case 'task_retry': { + const d = event.data as { attempt: number; maxAttempts: number; error: string; nextDelayMs: number } + console.log(`[${ts}] TASK RETRY "${event.task}" — attempt ${d.attempt}/${d.maxAttempts}, next in ${d.nextDelayMs}ms`) + console.log(` error: ${d.error.slice(0, 120)}`) + break + } + case 'error': + console.log(`[${ts}] ERROR "${event.task}" agent=${event.agent}`) + break + } +} + +// --------------------------------------------------------------------------- +// Orchestrator + team +// --------------------------------------------------------------------------- + +const orchestrator = new OpenMultiAgent({ + defaultModel: 'claude-sonnet-4-6', + onProgress: handleProgress, +}) + +const team = orchestrator.createTeam('retry-demo', { + name: 'retry-demo', + agents: [fetcher, analyst], + sharedMemory: true, +}) + +// --------------------------------------------------------------------------- +// Tasks — fetcher has retry config, analyst depends on it +// --------------------------------------------------------------------------- + +const tasks = [ + { + title: 'Fetch data', + description: 'Fetch key facts about the adoption of TypeScript in open-source projects as of 2024. Output a JSON object with a "topic" and "facts" array.', + assignee: 'fetcher', + // Retry config: up to 2 retries, 500ms base delay, 2x backoff (500ms, 1000ms) + maxRetries: 2, + retryDelayMs: 500, + retryBackoff: 2, + }, + { + title: 'Analyze data', + description: 'Read the fetched data from shared memory and produce a 3-4 sentence analysis of TypeScript adoption trends.', + assignee: 'analyst', + dependsOn: ['Fetch data'], + // No retry — if analysis fails, just report the error + }, +] + +// --------------------------------------------------------------------------- +// Run +// --------------------------------------------------------------------------- + +console.log('Task Retry Example') +console.log('='.repeat(60)) +console.log('Pipeline: fetch (with retry) → analyze') +console.log(`Retry config: maxRetries=2, delay=500ms, backoff=2x`) +console.log('='.repeat(60)) +console.log() + +const result = await orchestrator.runTasks(team, tasks) + +// --------------------------------------------------------------------------- +// Summary +// --------------------------------------------------------------------------- + +console.log('\n' + '='.repeat(60)) +console.log(`Overall success: ${result.success}`) +console.log(`Tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`) + +for (const [name, r] of result.agentResults) { + const icon = r.success ? 'OK ' : 'FAIL' + console.log(` [${icon}] ${name}`) + console.log(` ${r.output.slice(0, 200)}`) +} diff --git a/examples/11-trace-observability.ts b/examples/11-trace-observability.ts new file mode 100644 index 0000000..20b463e --- /dev/null +++ b/examples/11-trace-observability.ts @@ -0,0 +1,133 @@ +/** + * Example 11 — Trace Observability + * + * Demonstrates the `onTrace` callback for lightweight observability. Every LLM + * call, tool execution, task lifecycle, and agent run emits a structured trace + * event with timing data and token usage — giving you full visibility into + * what's happening inside a multi-agent run. + * + * Trace events share a `runId` for correlation, so you can reconstruct the + * full execution timeline. Pipe them into your own logging, OpenTelemetry, or + * dashboard. + * + * Run: + * npx tsx examples/11-trace-observability.ts + * + * Prerequisites: + * ANTHROPIC_API_KEY env var must be set. + */ + +import { OpenMultiAgent } from '../src/index.js' +import type { AgentConfig, TraceEvent } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Agents +// --------------------------------------------------------------------------- + +const researcher: AgentConfig = { + name: 'researcher', + model: 'claude-sonnet-4-6', + systemPrompt: 'You are a research assistant. Provide concise, factual answers.', + maxTurns: 2, +} + +const writer: AgentConfig = { + name: 'writer', + model: 'claude-sonnet-4-6', + systemPrompt: 'You are a technical writer. Summarize research into clear prose.', + maxTurns: 2, +} + +// --------------------------------------------------------------------------- +// Trace handler — log every span with timing +// --------------------------------------------------------------------------- + +function handleTrace(event: TraceEvent): void { + const dur = `${event.durationMs}ms`.padStart(7) + + switch (event.type) { + case 'llm_call': + console.log( + ` [LLM] ${dur} agent=${event.agent} model=${event.model} turn=${event.turn}` + + ` tokens=${event.tokens.input_tokens}in/${event.tokens.output_tokens}out`, + ) + break + case 'tool_call': + console.log( + ` [TOOL] ${dur} agent=${event.agent} tool=${event.tool}` + + ` error=${event.isError}`, + ) + break + case 'task': + console.log( + ` [TASK] ${dur} task="${event.taskTitle}" agent=${event.agent}` + + ` success=${event.success} retries=${event.retries}`, + ) + break + case 'agent': + console.log( + ` [AGENT] ${dur} agent=${event.agent} turns=${event.turns}` + + ` tools=${event.toolCalls} tokens=${event.tokens.input_tokens}in/${event.tokens.output_tokens}out`, + ) + break + } +} + +// --------------------------------------------------------------------------- +// Orchestrator + team +// --------------------------------------------------------------------------- + +const orchestrator = new OpenMultiAgent({ + defaultModel: 'claude-sonnet-4-6', + onTrace: handleTrace, +}) + +const team = orchestrator.createTeam('trace-demo', { + name: 'trace-demo', + agents: [researcher, writer], + sharedMemory: true, +}) + +// --------------------------------------------------------------------------- +// Tasks — researcher first, then writer summarizes +// --------------------------------------------------------------------------- + +const tasks = [ + { + title: 'Research topic', + description: 'List 5 key benefits of TypeScript for large codebases. Be concise.', + assignee: 'researcher', + }, + { + title: 'Write summary', + description: 'Read the research from shared memory and write a 3-sentence summary.', + assignee: 'writer', + dependsOn: ['Research topic'], + }, +] + +// --------------------------------------------------------------------------- +// Run +// --------------------------------------------------------------------------- + +console.log('Trace Observability Example') +console.log('='.repeat(60)) +console.log('Pipeline: research → write (with full trace output)') +console.log('='.repeat(60)) +console.log() + +const result = await orchestrator.runTasks(team, tasks) + +// --------------------------------------------------------------------------- +// Summary +// --------------------------------------------------------------------------- + +console.log('\n' + '='.repeat(60)) +console.log(`Overall success: ${result.success}`) +console.log(`Tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`) + +for (const [name, r] of result.agentResults) { + const icon = r.success ? 'OK ' : 'FAIL' + console.log(` [${icon}] ${name}`) + console.log(` ${r.output.slice(0, 200)}`) +} diff --git a/examples/12-grok.ts b/examples/12-grok.ts new file mode 100644 index 0000000..d4ed08b --- /dev/null +++ b/examples/12-grok.ts @@ -0,0 +1,154 @@ +/** + * Example 12 — Multi-Agent Team Collaboration with Grok (xAI) + * + * Three specialized agents (architect, developer, reviewer) collaborate via `runTeam()` + * to build a minimal Express.js REST API. Every agent uses Grok's coding-optimized model. + * + * Run: + * npx tsx examples/12-grok.ts + * + * Prerequisites: + * XAI_API_KEY environment variable must be set. + */ + +import { OpenMultiAgent } from '../src/index.js' +import type { AgentConfig, OrchestratorEvent } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Agent definitions (all using grok-code-fast-1) +// --------------------------------------------------------------------------- +const architect: AgentConfig = { + name: 'architect', + model: 'grok-code-fast-1', + provider: 'grok', + systemPrompt: `You are a software architect with deep experience in Node.js and REST API design. +Your job is to design clear, production-quality API contracts and file/directory structures. +Output concise plans in markdown — no unnecessary prose.`, + tools: ['bash', 'file_write'], + maxTurns: 5, + temperature: 0.2, +} + +const developer: AgentConfig = { + name: 'developer', + model: 'grok-code-fast-1', + provider: 'grok', + systemPrompt: `You are a TypeScript/Node.js developer. You implement what the architect specifies. +Write clean, runnable code with proper error handling. Use the tools to write files and run tests.`, + tools: ['bash', 'file_read', 'file_write', 'file_edit'], + maxTurns: 12, + temperature: 0.1, +} + +const reviewer: AgentConfig = { + name: 'reviewer', + model: 'grok-code-fast-1', + provider: 'grok', + systemPrompt: `You are a senior code reviewer. Review code for correctness, security, and clarity. +Provide a structured review with: LGTM items, suggestions, and any blocking issues. +Read files using the tools before reviewing.`, + tools: ['bash', 'file_read', 'grep'], + maxTurns: 5, + temperature: 0.3, +} + +// --------------------------------------------------------------------------- +// Progress tracking +// --------------------------------------------------------------------------- +const startTimes = new Map() + +function handleProgress(event: OrchestratorEvent): void { + const ts = new Date().toISOString().slice(11, 23) // HH:MM:SS.mmm + switch (event.type) { + case 'agent_start': + startTimes.set(event.agent ?? '', Date.now()) + console.log(`[${ts}] AGENT START → ${event.agent}`) + break + case 'agent_complete': { + const elapsed = Date.now() - (startTimes.get(event.agent ?? '') ?? Date.now()) + console.log(`[${ts}] AGENT DONE ← ${event.agent} (${elapsed}ms)`) + break + } + case 'task_start': + console.log(`[${ts}] TASK START ↓ ${event.task}`) + break + case 'task_complete': + console.log(`[${ts}] TASK DONE ↑ ${event.task}`) + break + case 'message': + console.log(`[${ts}] MESSAGE • ${event.agent} → (team)`) + break + case 'error': + console.error(`[${ts}] ERROR ✗ agent=${event.agent} task=${event.task}`) + if (event.data instanceof Error) console.error(` ${event.data.message}`) + break + } +} + +// --------------------------------------------------------------------------- +// Orchestrate +// --------------------------------------------------------------------------- +const orchestrator = new OpenMultiAgent({ + defaultModel: 'grok-code-fast-1', + defaultProvider: 'grok', + maxConcurrency: 1, // sequential for readable output + onProgress: handleProgress, +}) + +const team = orchestrator.createTeam('api-team', { + name: 'api-team', + agents: [architect, developer, reviewer], + sharedMemory: true, + maxConcurrency: 1, +}) + +console.log(`Team "${team.name}" created with agents: ${team.getAgents().map(a => a.name).join(', ')}`) +console.log('\nStarting team run...\n') +console.log('='.repeat(60)) + +const goal = `Create a minimal Express.js REST API in /tmp/express-api/ with: +- GET /health → { status: "ok" } +- GET /users → returns a hardcoded array of 2 user objects +- POST /users → accepts { name, email } body, logs it, returns 201 +- Proper error handling middleware +- The server should listen on port 3001 +- Include a package.json with the required dependencies` + +const result = await orchestrator.runTeam(team, goal) + +console.log('\n' + '='.repeat(60)) + +// --------------------------------------------------------------------------- +// Results +// --------------------------------------------------------------------------- +console.log('\nTeam run complete.') +console.log(`Success: ${result.success}`) +console.log(`Total tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`) + +console.log('\nPer-agent results:') +for (const [agentName, agentResult] of result.agentResults) { + const status = agentResult.success ? 'OK' : 'FAILED' + const tools = agentResult.toolCalls.length + console.log(` ${agentName.padEnd(12)} [${status}] tool_calls=${tools}`) + if (!agentResult.success) { + console.log(` Error: ${agentResult.output.slice(0, 120)}`) + } +} + +// Sample outputs +const developerResult = result.agentResults.get('developer') +if (developerResult?.success) { + console.log('\nDeveloper output (last 600 chars):') + console.log('─'.repeat(60)) + const out = developerResult.output + console.log(out.length > 600 ? '...' + out.slice(-600) : out) + console.log('─'.repeat(60)) +} + +const reviewerResult = result.agentResults.get('reviewer') +if (reviewerResult?.success) { + console.log('\nReviewer output:') + console.log('─'.repeat(60)) + console.log(reviewerResult.output) + console.log('─'.repeat(60)) +} \ No newline at end of file diff --git a/package.json b/package.json index ddd6a11..287dba9 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@jackchen_me/open-multi-agent", - "version": "0.1.0", + "version": "0.2.0", "description": "Production-grade multi-agent orchestration framework. Model-agnostic, supports team collaboration, task scheduling, and inter-agent communication.", "type": "module", "main": "dist/index.js", diff --git a/src/agent/agent.ts b/src/agent/agent.ts index 4ef392e..58a1df3 100644 --- a/src/agent/agent.ts +++ b/src/agent/agent.ts @@ -32,10 +32,16 @@ import type { TokenUsage, ToolUseContext, } from '../types.js' +import { emitTrace, generateRunId } from '../utils/trace.js' import type { ToolDefinition as FrameworkToolDefinition, ToolRegistry } from '../tool/framework.js' import type { ToolExecutor } from '../tool/executor.js' import { createAdapter } from '../llm/adapter.js' -import { AgentRunner, type RunnerOptions, type RunOptions } from './runner.js' +import { AgentRunner, type RunnerOptions, type RunOptions, type RunResult } from './runner.js' +import { + buildStructuredOutputInstruction, + extractJSON, + validateOutput, +} from './structured-output.js' // --------------------------------------------------------------------------- // Internal helpers @@ -111,9 +117,18 @@ export class Agent { const provider = this.config.provider ?? 'anthropic' const adapter = await createAdapter(provider, this.config.apiKey, this.config.baseURL) + // Append structured-output instructions when an outputSchema is configured. + let effectiveSystemPrompt = this.config.systemPrompt + if (this.config.outputSchema) { + const instruction = buildStructuredOutputInstruction(this.config.outputSchema) + effectiveSystemPrompt = effectiveSystemPrompt + ? effectiveSystemPrompt + '\n' + instruction + : instruction + } + const runnerOptions: RunnerOptions = { model: this.config.model, - systemPrompt: this.config.systemPrompt, + systemPrompt: effectiveSystemPrompt, maxTurns: this.config.maxTurns, maxTokens: this.config.maxTokens, temperature: this.config.temperature, @@ -144,12 +159,12 @@ export class Agent { * * Use this for one-shot queries where past context is irrelevant. */ - async run(prompt: string): Promise { + async run(prompt: string, runOptions?: Partial): Promise { const messages: LLMMessage[] = [ { role: 'user', content: [{ type: 'text', text: prompt }] }, ] - return this.executeRun(messages) + return this.executeRun(messages, runOptions) } /** @@ -160,6 +175,7 @@ export class Agent { * * Use this for multi-turn interactions. */ + // TODO(#18): accept optional RunOptions to forward trace context async prompt(message: string): Promise { const userMessage: LLMMessage = { role: 'user', @@ -183,6 +199,7 @@ export class Agent { * * Like {@link run}, this does not use or update the persistent history. */ + // TODO(#18): accept optional RunOptions to forward trace context async *stream(prompt: string): AsyncGenerator { const messages: LLMMessage[] = [ { role: 'user', content: [{ type: 'text', text: prompt }] }, @@ -252,33 +269,165 @@ export class Agent { * Shared execution path used by both `run` and `prompt`. * Handles state transitions and error wrapping. */ - private async executeRun(messages: LLMMessage[]): Promise { + private async executeRun( + messages: LLMMessage[], + callerOptions?: Partial, + ): Promise { this.transitionTo('running') + const agentStartMs = Date.now() + try { const runner = await this.getRunner() + const internalOnMessage = (msg: LLMMessage) => { + this.state.messages.push(msg) + callerOptions?.onMessage?.(msg) + } + // Auto-generate runId when onTrace is provided but runId is missing + const needsRunId = callerOptions?.onTrace && !callerOptions.runId const runOptions: RunOptions = { - onMessage: msg => { - this.state.messages.push(msg) - }, + ...callerOptions, + onMessage: internalOnMessage, + ...(needsRunId ? { runId: generateRunId() } : undefined), } const result = await runner.run(messages, runOptions) - this.state.tokenUsage = addUsage(this.state.tokenUsage, result.tokenUsage) - this.transitionTo('completed') - return this.toAgentRunResult(result, true) + // --- Structured output validation --- + if (this.config.outputSchema) { + const validated = await this.validateStructuredOutput( + messages, + result, + runner, + runOptions, + ) + this.emitAgentTrace(callerOptions, agentStartMs, validated) + return validated + } + + this.transitionTo('completed') + const agentResult = this.toAgentRunResult(result, true) + this.emitAgentTrace(callerOptions, agentStartMs, agentResult) + return agentResult } catch (err) { const error = err instanceof Error ? err : new Error(String(err)) this.transitionToError(error) - return { + const errorResult: AgentRunResult = { success: false, output: error.message, messages: [], tokenUsage: ZERO_USAGE, toolCalls: [], + structured: undefined, + } + this.emitAgentTrace(callerOptions, agentStartMs, errorResult) + return errorResult + } + } + + /** Emit an `agent` trace event if `onTrace` is provided. */ + private emitAgentTrace( + options: Partial | undefined, + startMs: number, + result: AgentRunResult, + ): void { + if (!options?.onTrace) return + const endMs = Date.now() + emitTrace(options.onTrace, { + type: 'agent', + runId: options.runId ?? '', + taskId: options.taskId, + agent: options.traceAgent ?? this.name, + turns: result.messages.filter(m => m.role === 'assistant').length, + tokens: result.tokenUsage, + toolCalls: result.toolCalls.length, + startMs, + endMs, + durationMs: endMs - startMs, + }) + } + + /** + * Validate agent output against the configured `outputSchema`. + * On first validation failure, retry once with error feedback. + */ + private async validateStructuredOutput( + originalMessages: LLMMessage[], + result: RunResult, + runner: AgentRunner, + runOptions: RunOptions, + ): Promise { + const schema = this.config.outputSchema! + + // First attempt + let firstAttemptError: unknown + try { + const parsed = extractJSON(result.output) + const validated = validateOutput(schema, parsed) + this.transitionTo('completed') + return this.toAgentRunResult(result, true, validated) + } catch (e) { + firstAttemptError = e + } + + // Retry: send full context + error feedback + const errorMsg = firstAttemptError instanceof Error + ? firstAttemptError.message + : String(firstAttemptError) + + const errorFeedbackMessage: LLMMessage = { + role: 'user' as const, + content: [{ + type: 'text' as const, + text: [ + 'Your previous response did not produce valid JSON matching the required schema.', + '', + `Error: ${errorMsg}`, + '', + 'Please try again. Respond with ONLY valid JSON, no other text.', + ].join('\n'), + }], + } + + const retryMessages: LLMMessage[] = [ + ...originalMessages, + ...result.messages, + errorFeedbackMessage, + ] + + const retryResult = await runner.run(retryMessages, runOptions) + this.state.tokenUsage = addUsage(this.state.tokenUsage, retryResult.tokenUsage) + + const mergedTokenUsage = addUsage(result.tokenUsage, retryResult.tokenUsage) + // Include the error feedback turn to maintain alternating user/assistant roles, + // which is required by Anthropic's API for subsequent prompt() calls. + const mergedMessages = [...result.messages, errorFeedbackMessage, ...retryResult.messages] + const mergedToolCalls = [...result.toolCalls, ...retryResult.toolCalls] + + try { + const parsed = extractJSON(retryResult.output) + const validated = validateOutput(schema, parsed) + this.transitionTo('completed') + return { + success: true, + output: retryResult.output, + messages: mergedMessages, + tokenUsage: mergedTokenUsage, + toolCalls: mergedToolCalls, + structured: validated, + } + } catch { + // Retry also failed + this.transitionTo('completed') + return { + success: false, + output: retryResult.output, + messages: mergedMessages, + tokenUsage: mergedTokenUsage, + toolCalls: mergedToolCalls, + structured: undefined, } } } @@ -331,8 +480,9 @@ export class Agent { // ------------------------------------------------------------------------- private toAgentRunResult( - result: import('./runner.js').RunResult, + result: RunResult, success: boolean, + structured?: unknown, ): AgentRunResult { return { success, @@ -340,6 +490,7 @@ export class Agent { messages: result.messages, tokenUsage: result.tokenUsage, toolCalls: result.toolCalls, + structured, } } diff --git a/src/agent/pool.ts b/src/agent/pool.ts index 915f361..aba0eb8 100644 --- a/src/agent/pool.ts +++ b/src/agent/pool.ts @@ -21,6 +21,7 @@ */ import type { AgentRunResult } from '../types.js' +import type { RunOptions } from './runner.js' import type { Agent } from './agent.js' import { Semaphore } from '../utils/semaphore.js' @@ -123,12 +124,16 @@ export class AgentPool { * * @throws {Error} If the agent name is not found. */ - async run(agentName: string, prompt: string): Promise { + async run( + agentName: string, + prompt: string, + runOptions?: Partial, + ): Promise { const agent = this.requireAgent(agentName) await this.semaphore.acquire() try { - return await agent.run(prompt) + return await agent.run(prompt, runOptions) } finally { this.semaphore.release() } @@ -144,6 +149,7 @@ export class AgentPool { * * @param tasks - Array of `{ agent, prompt }` descriptors. */ + // TODO(#18): accept RunOptions per task to forward trace context async runParallel( tasks: ReadonlyArray<{ readonly agent: string; readonly prompt: string }>, ): Promise> { @@ -182,6 +188,7 @@ export class AgentPool { * * @throws {Error} If the pool is empty. */ + // TODO(#18): accept RunOptions to forward trace context async runAny(prompt: string): Promise { const allAgents = this.list() if (allAgents.length === 0) { diff --git a/src/agent/runner.ts b/src/agent/runner.ts index 13667db..113f93c 100644 --- a/src/agent/runner.ts +++ b/src/agent/runner.ts @@ -25,7 +25,9 @@ import type { ToolUseContext, LLMAdapter, LLMChatOptions, + TraceEvent, } from '../types.js' +import { emitTrace } from '../utils/trace.js' import type { ToolRegistry } from '../tool/framework.js' import type { ToolExecutor } from '../tool/executor.js' @@ -76,6 +78,14 @@ export interface RunOptions { readonly onToolResult?: (name: string, result: ToolResult) => void /** Fired after each complete {@link LLMMessage} is appended. */ readonly onMessage?: (message: LLMMessage) => void + /** Trace callback for observability spans. Async callbacks are safe. */ + readonly onTrace?: (event: TraceEvent) => void | Promise + /** Run ID for trace correlation. */ + readonly runId?: string + /** Task ID for trace correlation. */ + readonly taskId?: string + /** Agent name for trace correlation (overrides RunnerOptions.agentName). */ + readonly traceAgent?: string } /** The aggregated result returned when a full run completes. */ @@ -254,7 +264,23 @@ export class AgentRunner { // ------------------------------------------------------------------ // Step 1: Call the LLM and collect the full response for this turn. // ------------------------------------------------------------------ + const llmStartMs = Date.now() const response = await this.adapter.chat(conversationMessages, baseChatOptions) + if (options.onTrace) { + const llmEndMs = Date.now() + emitTrace(options.onTrace, { + type: 'llm_call', + runId: options.runId ?? '', + taskId: options.taskId, + agent: options.traceAgent ?? this.options.agentName ?? 'unknown', + model: this.options.model, + turn: turns, + tokens: response.usage, + startMs: llmStartMs, + endMs: llmEndMs, + durationMs: llmEndMs - llmStartMs, + }) + } totalUsage = addTokenUsage(totalUsage, response.usage) @@ -319,10 +345,25 @@ export class AgentRunner { result = { data: message, isError: true } } - const duration = Date.now() - startTime + const endTime = Date.now() + const duration = endTime - startTime options.onToolResult?.(block.name, result) + if (options.onTrace) { + emitTrace(options.onTrace, { + type: 'tool_call', + runId: options.runId ?? '', + taskId: options.taskId, + agent: options.traceAgent ?? this.options.agentName ?? 'unknown', + tool: block.name, + isError: result.isError ?? false, + startMs: startTime, + endMs: endTime, + durationMs: duration, + }) + } + const record: ToolCallRecord = { toolName: block.name, input: block.input, diff --git a/src/agent/structured-output.ts b/src/agent/structured-output.ts new file mode 100644 index 0000000..3da0f06 --- /dev/null +++ b/src/agent/structured-output.ts @@ -0,0 +1,126 @@ +/** + * @fileoverview Structured output utilities for agent responses. + * + * Provides JSON extraction, Zod validation, and system-prompt injection so + * that agents can return typed, schema-validated output. + */ + +import { type ZodSchema } from 'zod' +import { zodToJsonSchema } from '../tool/framework.js' + +// --------------------------------------------------------------------------- +// System-prompt instruction builder +// --------------------------------------------------------------------------- + +/** + * Build a JSON-mode instruction block to append to the agent's system prompt. + * + * Converts the Zod schema to JSON Schema and formats it as a clear directive + * for the LLM to respond with valid JSON matching the schema. + */ +export function buildStructuredOutputInstruction(schema: ZodSchema): string { + const jsonSchema = zodToJsonSchema(schema) + return [ + '', + '## Output Format (REQUIRED)', + 'You MUST respond with ONLY valid JSON that conforms to the following JSON Schema.', + 'Do NOT include any text, markdown fences, or explanation outside the JSON object.', + 'Do NOT wrap the JSON in ```json code fences.', + '', + '```', + JSON.stringify(jsonSchema, null, 2), + '```', + ].join('\n') +} + +// --------------------------------------------------------------------------- +// JSON extraction +// --------------------------------------------------------------------------- + +/** + * Attempt to extract and parse JSON from the agent's raw text output. + * + * Handles three cases in order: + * 1. The output is already valid JSON (ideal case) + * 2. The output contains a ` ```json ` fenced block + * 3. The output contains a bare JSON object/array (first `{`/`[` to last `}`/`]`) + * + * @throws {Error} when no valid JSON can be extracted + */ +export function extractJSON(raw: string): unknown { + const trimmed = raw.trim() + + // Case 1: Direct parse + try { + return JSON.parse(trimmed) + } catch { + // Continue to fallback strategies + } + + // Case 2a: Prefer ```json tagged fence + const jsonFenceMatch = trimmed.match(/```json\s*([\s\S]*?)```/) + if (jsonFenceMatch?.[1]) { + try { + return JSON.parse(jsonFenceMatch[1].trim()) + } catch { + // Continue + } + } + + // Case 2b: Fall back to bare ``` fence + const bareFenceMatch = trimmed.match(/```\s*([\s\S]*?)```/) + if (bareFenceMatch?.[1]) { + try { + return JSON.parse(bareFenceMatch[1].trim()) + } catch { + // Continue + } + } + + // Case 3: Find first { to last } (object) + const objStart = trimmed.indexOf('{') + const objEnd = trimmed.lastIndexOf('}') + if (objStart !== -1 && objEnd > objStart) { + try { + return JSON.parse(trimmed.slice(objStart, objEnd + 1)) + } catch { + // Fall through + } + } + + // Case 3b: Find first [ to last ] (array) + const arrStart = trimmed.indexOf('[') + const arrEnd = trimmed.lastIndexOf(']') + if (arrStart !== -1 && arrEnd > arrStart) { + try { + return JSON.parse(trimmed.slice(arrStart, arrEnd + 1)) + } catch { + // Fall through + } + } + + throw new Error( + `Failed to extract JSON from output. Raw output begins with: "${trimmed.slice(0, 100)}"`, + ) +} + +// --------------------------------------------------------------------------- +// Zod validation +// --------------------------------------------------------------------------- + +/** + * Validate a parsed JSON value against a Zod schema. + * + * @returns The validated (and potentially transformed) value on success. + * @throws {Error} with a human-readable Zod error message on failure. + */ +export function validateOutput(schema: ZodSchema, data: unknown): unknown { + const result = schema.safeParse(data) + if (result.success) { + return result.data + } + const issues = result.error.issues + .map(issue => ` - ${issue.path.length > 0 ? issue.path.join('.') : '(root)'}: ${issue.message}`) + .join('\n') + throw new Error(`Output validation failed:\n${issues}`) +} diff --git a/src/index.ts b/src/index.ts index 814996f..312f852 100644 --- a/src/index.ts +++ b/src/index.ts @@ -54,7 +54,7 @@ // Orchestrator (primary entry point) // --------------------------------------------------------------------------- -export { OpenMultiAgent } from './orchestrator/orchestrator.js' +export { OpenMultiAgent, executeWithRetry, computeRetryDelay } from './orchestrator/orchestrator.js' export { Scheduler } from './orchestrator/scheduler.js' export type { SchedulingStrategy } from './orchestrator/scheduler.js' @@ -63,6 +63,7 @@ export type { SchedulingStrategy } from './orchestrator/scheduler.js' // --------------------------------------------------------------------------- export { Agent } from './agent/agent.js' +export { buildStructuredOutputInstruction, extractJSON, validateOutput } from './agent/structured-output.js' export { AgentPool, Semaphore } from './agent/pool.js' export type { PoolStatus } from './agent/pool.js' @@ -160,7 +161,18 @@ export type { OrchestratorConfig, OrchestratorEvent, + // Trace + TraceEventType, + TraceEventBase, + TraceEvent, + LLMCallTrace, + ToolCallTrace, + TaskTrace, + AgentTrace, + // Memory MemoryEntry, MemoryStore, } from './types.js' + +export { generateRunId } from './utils/trace.js' diff --git a/src/llm/adapter.ts b/src/llm/adapter.ts index 1283d90..dc4fe82 100644 --- a/src/llm/adapter.ts +++ b/src/llm/adapter.ts @@ -38,7 +38,7 @@ import type { LLMAdapter } from '../types.js' * Additional providers can be integrated by implementing {@link LLMAdapter} * directly and bypassing this factory. */ -export type SupportedProvider = 'anthropic' | 'copilot' | 'gemini' | 'openai' +export type SupportedProvider = 'anthropic' | 'copilot' | 'grok' | 'openai' | 'gemini' /** * Instantiate the appropriate {@link LLMAdapter} for the given provider. @@ -48,6 +48,7 @@ export type SupportedProvider = 'anthropic' | 'copilot' | 'gemini' | 'openai' * - `anthropic` → `ANTHROPIC_API_KEY` * - `openai` → `OPENAI_API_KEY` * - `gemini` → `GEMINI_API_KEY` / `GOOGLE_API_KEY` + * - `grok` → `XAI_API_KEY` * - `copilot` → `GITHUB_COPILOT_TOKEN` / `GITHUB_TOKEN`, or interactive * OAuth2 device flow if neither is set * @@ -84,6 +85,10 @@ export async function createAdapter( const { OpenAIAdapter } = await import('./openai.js') return new OpenAIAdapter(apiKey, baseURL) } + case 'grok': { + const { GrokAdapter } = await import('./grok.js') + return new GrokAdapter(apiKey, baseURL) + } default: { // The `never` cast here makes TypeScript enforce exhaustiveness. const _exhaustive: never = provider diff --git a/src/llm/grok.ts b/src/llm/grok.ts new file mode 100644 index 0000000..31ef49c --- /dev/null +++ b/src/llm/grok.ts @@ -0,0 +1,29 @@ +/** + * @fileoverview Grok (xAI) adapter. + * + * Thin wrapper around OpenAIAdapter that hard-codes the official xAI endpoint + * and XAI_API_KEY environment variable fallback. + */ + +import { OpenAIAdapter } from './openai.js' + +/** + * LLM adapter for Grok models (grok-4 series and future models). + * + * Thread-safe. Can be shared across agents. + * + * Usage: + * provider: 'grok' + * model: 'grok-4' (or any current Grok model name) + */ +export class GrokAdapter extends OpenAIAdapter { + readonly name = 'grok' + + constructor(apiKey?: string, baseURL?: string) { + // Allow override of baseURL (for proxies or future changes) but default to official xAI endpoint. + super( + apiKey ?? process.env['XAI_API_KEY'], + baseURL ?? 'https://api.x.ai/v1' + ) + } +} diff --git a/src/llm/openai.ts b/src/llm/openai.ts index 568f94e..e3f166f 100644 --- a/src/llm/openai.ts +++ b/src/llm/openai.ts @@ -65,7 +65,7 @@ import { * Thread-safe — a single instance may be shared across concurrent agent runs. */ export class OpenAIAdapter implements LLMAdapter { - readonly name = 'openai' + readonly name: string = 'openai' readonly #client: OpenAI diff --git a/src/orchestrator/orchestrator.ts b/src/orchestrator/orchestrator.ts index 1da8fb5..86f16c0 100644 --- a/src/orchestrator/orchestrator.ts +++ b/src/orchestrator/orchestrator.ts @@ -52,8 +52,10 @@ import type { TeamRunResult, TokenUsage, } from '../types.js' +import type { RunOptions } from '../agent/runner.js' import { Agent } from '../agent/agent.js' import { AgentPool } from '../agent/pool.js' +import { emitTrace, generateRunId } from '../utils/trace.js' import { ToolRegistry } from '../tool/framework.js' import { ToolExecutor } from '../tool/executor.js' import { registerBuiltInTools } from '../tool/built-in/index.js' @@ -92,6 +94,105 @@ function buildAgent(config: AgentConfig): Agent { return new Agent(config, registry, executor) } +/** Promise-based delay. */ +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)) +} + +/** Maximum delay cap to prevent runaway exponential backoff (30 seconds). */ +const MAX_RETRY_DELAY_MS = 30_000 + +/** + * Compute the retry delay for a given attempt, capped at {@link MAX_RETRY_DELAY_MS}. + */ +export function computeRetryDelay( + baseDelay: number, + backoff: number, + attempt: number, +): number { + return Math.min(baseDelay * backoff ** (attempt - 1), MAX_RETRY_DELAY_MS) +} + +/** + * Execute an agent task with optional retry and exponential backoff. + * + * Exported for testability — called internally by {@link executeQueue}. + * + * @param run - The function that executes the task (typically `pool.run`). + * @param task - The task to execute (retry config read from its fields). + * @param onRetry - Called before each retry sleep with event data. + * @param delayFn - Injectable delay function (defaults to real `sleep`). + * @returns The final {@link AgentRunResult} from the last attempt. + */ +export async function executeWithRetry( + run: () => Promise, + task: Task, + onRetry?: (data: { attempt: number; maxAttempts: number; error: string; nextDelayMs: number }) => void, + delayFn: (ms: number) => Promise = sleep, +): Promise { + const rawRetries = Number.isFinite(task.maxRetries) ? task.maxRetries! : 0 + const maxAttempts = Math.max(0, rawRetries) + 1 + const baseDelay = Math.max(0, Number.isFinite(task.retryDelayMs) ? task.retryDelayMs! : 1000) + const backoff = Math.max(1, Number.isFinite(task.retryBackoff) ? task.retryBackoff! : 2) + + let lastError: string = '' + // Accumulate token usage across all attempts so billing/observability + // reflects the true cost of retries. + let totalUsage: TokenUsage = { input_tokens: 0, output_tokens: 0 } + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + try { + const result = await run() + totalUsage = { + input_tokens: totalUsage.input_tokens + result.tokenUsage.input_tokens, + output_tokens: totalUsage.output_tokens + result.tokenUsage.output_tokens, + } + + if (result.success) { + return { ...result, tokenUsage: totalUsage } + } + lastError = result.output + + // Failure — retry or give up + if (attempt < maxAttempts) { + const delay = computeRetryDelay(baseDelay, backoff, attempt) + onRetry?.({ attempt, maxAttempts, error: lastError, nextDelayMs: delay }) + await delayFn(delay) + continue + } + + return { ...result, tokenUsage: totalUsage } + } catch (err) { + lastError = err instanceof Error ? err.message : String(err) + + if (attempt < maxAttempts) { + const delay = computeRetryDelay(baseDelay, backoff, attempt) + onRetry?.({ attempt, maxAttempts, error: lastError, nextDelayMs: delay }) + await delayFn(delay) + continue + } + + // All retries exhausted — return a failure result + return { + success: false, + output: lastError, + messages: [], + tokenUsage: totalUsage, + toolCalls: [], + } + } + } + + // Should not be reached, but TypeScript needs a return + return { + success: false, + output: lastError, + messages: [], + tokenUsage: totalUsage, + toolCalls: [], + } +} + // --------------------------------------------------------------------------- // Parsed task spec (result of coordinator decomposition) // --------------------------------------------------------------------------- @@ -161,6 +262,8 @@ interface RunContext { readonly scheduler: Scheduler readonly agentResults: Map readonly config: OrchestratorConfig + /** Trace run ID, present when `onTrace` is configured. */ + readonly runId?: string } /** @@ -239,49 +342,76 @@ async function executeQueue( // Build the prompt: inject shared memory context + task description const prompt = await buildTaskPrompt(task, team) - try { - const result = await pool.run(assignee, prompt) - ctx.agentResults.set(`${assignee}:${task.id}`, result) + // Build trace context for this task's agent run + const traceOptions: Partial | undefined = config.onTrace + ? { onTrace: config.onTrace, runId: ctx.runId ?? '', taskId: task.id, traceAgent: assignee } + : undefined - if (result.success) { - // Persist result into shared memory so other agents can read it - const sharedMem = team.getSharedMemoryInstance() - if (sharedMem) { - await sharedMem.write(assignee, `task:${task.id}:result`, result.output) - } - - queue.complete(task.id, result.output) + const taskStartMs = config.onTrace ? Date.now() : 0 + let retryCount = 0 + const result = await executeWithRetry( + () => pool.run(assignee, prompt, traceOptions), + task, + (retryData) => { + retryCount++ config.onProgress?.({ - type: 'task_complete', + type: 'task_retry', task: task.id, agent: assignee, - data: result, + data: retryData, } satisfies OrchestratorEvent) + }, + ) - config.onProgress?.({ - type: 'agent_complete', - agent: assignee, - task: task.id, - data: result, - } satisfies OrchestratorEvent) - } else { - queue.fail(task.id, result.output) - config.onProgress?.({ - type: 'error', - task: task.id, - agent: assignee, - data: result, - } satisfies OrchestratorEvent) + // Emit task trace + if (config.onTrace) { + const taskEndMs = Date.now() + emitTrace(config.onTrace, { + type: 'task', + runId: ctx.runId ?? '', + taskId: task.id, + taskTitle: task.title, + agent: assignee, + success: result.success, + retries: retryCount, + startMs: taskStartMs, + endMs: taskEndMs, + durationMs: taskEndMs - taskStartMs, + }) + } + + ctx.agentResults.set(`${assignee}:${task.id}`, result) + + if (result.success) { + // Persist result into shared memory so other agents can read it + const sharedMem = team.getSharedMemoryInstance() + if (sharedMem) { + await sharedMem.write(assignee, `task:${task.id}:result`, result.output) } - } catch (err) { - const message = err instanceof Error ? err.message : String(err) - queue.fail(task.id, message) + + queue.complete(task.id, result.output) + + config.onProgress?.({ + type: 'task_complete', + task: task.id, + agent: assignee, + data: result, + } satisfies OrchestratorEvent) + + config.onProgress?.({ + type: 'agent_complete', + agent: assignee, + task: task.id, + data: result, + } satisfies OrchestratorEvent) + } else { + queue.fail(task.id, result.output) config.onProgress?.({ type: 'error', task: task.id, agent: assignee, - data: err, + data: result, } satisfies OrchestratorEvent) } }) @@ -341,8 +471,8 @@ async function buildTaskPrompt(task: Task, team: Team): Promise { */ export class OpenMultiAgent { private readonly config: Required< - Omit - > & Pick + Omit + > & Pick private readonly teams: Map = new Map() private completedTaskCount = 0 @@ -363,6 +493,7 @@ export class OpenMultiAgent { defaultBaseURL: config.defaultBaseURL, defaultApiKey: config.defaultApiKey, onProgress: config.onProgress, + onTrace: config.onTrace, } } @@ -420,7 +551,11 @@ export class OpenMultiAgent { data: { prompt }, }) - const result = await agent.run(prompt) + const traceOptions: Partial | undefined = this.config.onTrace + ? { onTrace: this.config.onTrace, runId: generateRunId(), traceAgent: config.name } + : undefined + + const result = await agent.run(prompt, traceOptions) this.config.onProgress?.({ type: 'agent_complete', @@ -478,6 +613,7 @@ export class OpenMultiAgent { const decompositionPrompt = this.buildDecompositionPrompt(goal, agentConfigs) const coordinatorAgent = buildAgent(coordinatorConfig) + const runId = this.config.onTrace ? generateRunId() : undefined this.config.onProgress?.({ type: 'agent_start', @@ -485,7 +621,10 @@ export class OpenMultiAgent { data: { phase: 'decomposition', goal }, }) - const decompositionResult = await coordinatorAgent.run(decompositionPrompt) + const decompTraceOptions: Partial | undefined = this.config.onTrace + ? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator' } + : undefined + const decompositionResult = await coordinatorAgent.run(decompositionPrompt, decompTraceOptions) const agentResults = new Map() agentResults.set('coordinator:decompose', decompositionResult) @@ -529,6 +668,7 @@ export class OpenMultiAgent { scheduler, agentResults, config: this.config, + runId, } await executeQueue(queue, ctx) @@ -537,7 +677,10 @@ export class OpenMultiAgent { // Step 5: Coordinator synthesises final result // ------------------------------------------------------------------ const synthesisPrompt = await this.buildSynthesisPrompt(goal, queue.list(), team) - const synthesisResult = await coordinatorAgent.run(synthesisPrompt) + const synthTraceOptions: Partial | undefined = this.config.onTrace + ? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator' } + : undefined + const synthesisResult = await coordinatorAgent.run(synthesisPrompt, synthTraceOptions) agentResults.set('coordinator', synthesisResult) this.config.onProgress?.({ @@ -574,6 +717,9 @@ export class OpenMultiAgent { description: string assignee?: string dependsOn?: string[] + maxRetries?: number + retryDelayMs?: number + retryBackoff?: number }>, ): Promise { const agentConfigs = team.getAgents() @@ -586,6 +732,9 @@ export class OpenMultiAgent { description: t.description, assignee: t.assignee, dependsOn: t.dependsOn, + maxRetries: t.maxRetries, + retryDelayMs: t.retryDelayMs, + retryBackoff: t.retryBackoff, })), agentConfigs, queue, @@ -601,6 +750,7 @@ export class OpenMultiAgent { scheduler, agentResults, config: this.config, + runId: this.config.onTrace ? generateRunId() : undefined, } await executeQueue(queue, ctx) @@ -743,7 +893,11 @@ export class OpenMultiAgent { * then resolving them to real IDs before adding tasks to the queue. */ private loadSpecsIntoQueue( - specs: ReadonlyArray, + specs: ReadonlyArray, agentConfigs: AgentConfig[], queue: TaskQueue, ): void { @@ -760,6 +914,9 @@ export class OpenMultiAgent { assignee: spec.assignee && agentNames.has(spec.assignee) ? spec.assignee : undefined, + maxRetries: spec.maxRetries, + retryDelayMs: spec.retryDelayMs, + retryBackoff: spec.retryBackoff, }) titleToId.set(spec.title.toLowerCase().trim(), task.id) createdTasks.push(task) @@ -837,13 +994,15 @@ export class OpenMultiAgent { if (!existing) { collapsed.set(agentName, result) } else { - // Merge multiple results for the same agent (multi-task case) + // Merge multiple results for the same agent (multi-task case). + // Keep the latest `structured` value (last completed task wins). collapsed.set(agentName, { success: existing.success && result.success, output: [existing.output, result.output].filter(Boolean).join('\n\n---\n\n'), messages: [...existing.messages, ...result.messages], tokenUsage: addUsage(existing.tokenUsage, result.tokenUsage), toolCalls: [...existing.toolCalls, ...result.toolCalls], + structured: result.structured !== undefined ? result.structured : existing.structured, }) } diff --git a/src/task/task.ts b/src/task/task.ts index 9a11476..d74e70b 100644 --- a/src/task/task.ts +++ b/src/task/task.ts @@ -31,6 +31,9 @@ export function createTask(input: { description: string assignee?: string dependsOn?: string[] + maxRetries?: number + retryDelayMs?: number + retryBackoff?: number }): Task { const now = new Date() return { @@ -43,6 +46,9 @@ export function createTask(input: { result: undefined, createdAt: now, updatedAt: now, + maxRetries: input.maxRetries, + retryDelayMs: input.retryDelayMs, + retryBackoff: input.retryBackoff, } } diff --git a/src/types.ts b/src/types.ts index af876dc..22f2e70 100644 --- a/src/types.ts +++ b/src/types.ts @@ -186,7 +186,7 @@ export interface ToolDefinition> { export interface AgentConfig { readonly name: string readonly model: string - readonly provider?: 'anthropic' | 'copilot' | 'gemini' | 'openai' + readonly provider?: 'anthropic' | 'copilot' | 'grok' | 'openai' | 'gemini' /** * Custom base URL for OpenAI-compatible APIs (Ollama, vLLM, LM Studio, etc.). * Note: local servers that don't require auth still need `apiKey` set to a @@ -201,6 +201,12 @@ export interface AgentConfig { readonly maxTurns?: number readonly maxTokens?: number readonly temperature?: number + /** + * Optional Zod schema for structured output. When set, the agent's final + * output is parsed as JSON and validated against this schema. A single + * retry with error feedback is attempted on validation failure. + */ + readonly outputSchema?: ZodSchema } /** Lifecycle state tracked during an agent run. */ @@ -227,6 +233,12 @@ export interface AgentRunResult { readonly messages: LLMMessage[] readonly tokenUsage: TokenUsage readonly toolCalls: ToolCallRecord[] + /** + * Parsed and validated structured output when `outputSchema` is set on the + * agent config. `undefined` when no schema is configured or validation + * failed after retry. + */ + readonly structured?: unknown } // --------------------------------------------------------------------------- @@ -269,6 +281,12 @@ export interface Task { result?: string readonly createdAt: Date updatedAt: Date + /** Maximum number of retry attempts on failure (default: 0 — no retry). */ + readonly maxRetries?: number + /** Base delay in ms before the first retry (default: 1000). */ + readonly retryDelayMs?: number + /** Exponential backoff multiplier (default: 2). */ + readonly retryBackoff?: number } // --------------------------------------------------------------------------- @@ -282,6 +300,7 @@ export interface OrchestratorEvent { | 'agent_complete' | 'task_start' | 'task_complete' + | 'task_retry' | 'message' | 'error' readonly agent?: string @@ -293,12 +312,72 @@ export interface OrchestratorEvent { export interface OrchestratorConfig { readonly maxConcurrency?: number readonly defaultModel?: string - readonly defaultProvider?: 'anthropic' | 'copilot' | 'gemini' | 'openai' + readonly defaultProvider?: 'anthropic' | 'copilot' | 'grok' | 'openai' | 'gemini' readonly defaultBaseURL?: string readonly defaultApiKey?: string - onProgress?: (event: OrchestratorEvent) => void + readonly onProgress?: (event: OrchestratorEvent) => void + readonly onTrace?: (event: TraceEvent) => void | Promise } +// --------------------------------------------------------------------------- +// Trace events — lightweight observability spans +// --------------------------------------------------------------------------- + +/** Trace event type discriminants. */ +export type TraceEventType = 'llm_call' | 'tool_call' | 'task' | 'agent' + +/** Shared fields present on every trace event. */ +export interface TraceEventBase { + /** Unique identifier for the entire run (runTeam / runTasks / runAgent call). */ + readonly runId: string + readonly type: TraceEventType + /** Unix epoch ms when the span started. */ + readonly startMs: number + /** Unix epoch ms when the span ended. */ + readonly endMs: number + /** Wall-clock duration in milliseconds (`endMs - startMs`). */ + readonly durationMs: number + /** Agent name associated with this span. */ + readonly agent: string + /** Task ID associated with this span. */ + readonly taskId?: string +} + +/** Emitted for each LLM API call (one per agent turn). */ +export interface LLMCallTrace extends TraceEventBase { + readonly type: 'llm_call' + readonly model: string + readonly turn: number + readonly tokens: TokenUsage +} + +/** Emitted for each tool execution. */ +export interface ToolCallTrace extends TraceEventBase { + readonly type: 'tool_call' + readonly tool: string + readonly isError: boolean +} + +/** Emitted when a task completes (wraps the full retry sequence). */ +export interface TaskTrace extends TraceEventBase { + readonly type: 'task' + readonly taskId: string + readonly taskTitle: string + readonly success: boolean + readonly retries: number +} + +/** Emitted when an agent run completes (wraps the full conversation loop). */ +export interface AgentTrace extends TraceEventBase { + readonly type: 'agent' + readonly turns: number + readonly tokens: TokenUsage + readonly toolCalls: number +} + +/** Discriminated union of all trace event types. */ +export type TraceEvent = LLMCallTrace | ToolCallTrace | TaskTrace | AgentTrace + // --------------------------------------------------------------------------- // Memory // --------------------------------------------------------------------------- diff --git a/src/utils/trace.ts b/src/utils/trace.ts new file mode 100644 index 0000000..4f01f5f --- /dev/null +++ b/src/utils/trace.ts @@ -0,0 +1,34 @@ +/** + * @fileoverview Trace emission utilities for the observability layer. + */ + +import { randomUUID } from 'node:crypto' +import type { TraceEvent } from '../types.js' + +/** + * Safely emit a trace event. Swallows callback errors so a broken + * subscriber never crashes agent execution. + */ +export function emitTrace( + fn: ((event: TraceEvent) => void | Promise) | undefined, + event: TraceEvent, +): void { + if (!fn) return + try { + // Guard async callbacks: if fn returns a Promise, swallow its rejection + // so an async onTrace never produces an unhandled promise rejection. + const result = fn(event) as unknown + if (result && typeof (result as Promise).catch === 'function') { + ;(result as Promise).catch(noop) + } + } catch { + // Intentionally swallowed — observability must never break execution. + } +} + +function noop() {} + +/** Generate a unique run ID for trace correlation. */ +export function generateRunId(): string { + return randomUUID() +} diff --git a/tests/grok-adapter.test.ts b/tests/grok-adapter.test.ts new file mode 100644 index 0000000..bb459b2 --- /dev/null +++ b/tests/grok-adapter.test.ts @@ -0,0 +1,74 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest' + +// --------------------------------------------------------------------------- +// Mock OpenAI constructor (must be hoisted for Vitest) +// --------------------------------------------------------------------------- +const OpenAIMock = vi.hoisted(() => vi.fn()) + +vi.mock('openai', () => ({ + default: OpenAIMock, +})) + +import { GrokAdapter } from '../src/llm/grok.js' +import { createAdapter } from '../src/llm/adapter.js' + +// --------------------------------------------------------------------------- +// GrokAdapter tests +// --------------------------------------------------------------------------- + +describe('GrokAdapter', () => { + beforeEach(() => { + OpenAIMock.mockClear() + }) + + it('has name "grok"', () => { + const adapter = new GrokAdapter() + expect(adapter.name).toBe('grok') + }) + + it('uses XAI_API_KEY by default', () => { + const original = process.env['XAI_API_KEY'] + process.env['XAI_API_KEY'] = 'xai-test-key-123' + + try { + new GrokAdapter() + expect(OpenAIMock).toHaveBeenCalledWith( + expect.objectContaining({ + apiKey: 'xai-test-key-123', + baseURL: 'https://api.x.ai/v1', + }) + ) + } finally { + if (original === undefined) { + delete process.env['XAI_API_KEY'] + } else { + process.env['XAI_API_KEY'] = original + } + } + }) + + it('uses official xAI baseURL by default', () => { + new GrokAdapter('some-key') + expect(OpenAIMock).toHaveBeenCalledWith( + expect.objectContaining({ + apiKey: 'some-key', + baseURL: 'https://api.x.ai/v1', + }) + ) + }) + + it('allows overriding apiKey and baseURL', () => { + new GrokAdapter('custom-key', 'https://custom.endpoint/v1') + expect(OpenAIMock).toHaveBeenCalledWith( + expect.objectContaining({ + apiKey: 'custom-key', + baseURL: 'https://custom.endpoint/v1', + }) + ) + }) + + it('createAdapter("grok") returns GrokAdapter instance', async () => { + const adapter = await createAdapter('grok') + expect(adapter).toBeInstanceOf(GrokAdapter) + }) +}) \ No newline at end of file diff --git a/tests/structured-output.test.ts b/tests/structured-output.test.ts new file mode 100644 index 0000000..27f9201 --- /dev/null +++ b/tests/structured-output.test.ts @@ -0,0 +1,331 @@ +import { describe, it, expect } from 'vitest' +import { z } from 'zod' +import { + buildStructuredOutputInstruction, + extractJSON, + validateOutput, +} from '../src/agent/structured-output.js' +import { Agent } from '../src/agent/agent.js' +import { AgentRunner } from '../src/agent/runner.js' +import { ToolRegistry } from '../src/tool/framework.js' +import { ToolExecutor } from '../src/tool/executor.js' +import type { AgentConfig, LLMAdapter, LLMResponse } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Mock LLM adapter factory +// --------------------------------------------------------------------------- + +function mockAdapter(responses: string[]): LLMAdapter { + let callIndex = 0 + return { + name: 'mock', + async chat() { + const text = responses[callIndex++] ?? '' + return { + id: `mock-${callIndex}`, + content: [{ type: 'text' as const, text }], + model: 'mock-model', + stop_reason: 'end_turn', + usage: { input_tokens: 10, output_tokens: 20 }, + } satisfies LLMResponse + }, + async *stream() { + /* unused in these tests */ + }, + } +} + +// --------------------------------------------------------------------------- +// extractJSON +// --------------------------------------------------------------------------- + +describe('extractJSON', () => { + it('parses clean JSON', () => { + expect(extractJSON('{"a":1}')).toEqual({ a: 1 }) + }) + + it('parses JSON wrapped in ```json fence', () => { + const raw = 'Here is the result:\n```json\n{"a":1}\n```\nDone.' + expect(extractJSON(raw)).toEqual({ a: 1 }) + }) + + it('parses JSON wrapped in bare ``` fence', () => { + const raw = '```\n{"a":1}\n```' + expect(extractJSON(raw)).toEqual({ a: 1 }) + }) + + it('extracts embedded JSON object from surrounding text', () => { + const raw = 'The answer is {"summary":"hello","score":5} as shown above.' + expect(extractJSON(raw)).toEqual({ summary: 'hello', score: 5 }) + }) + + it('extracts JSON array', () => { + expect(extractJSON('[1,2,3]')).toEqual([1, 2, 3]) + }) + + it('extracts embedded JSON array from surrounding text', () => { + const raw = 'Here: [{"a":1},{"a":2}] end' + expect(extractJSON(raw)).toEqual([{ a: 1 }, { a: 2 }]) + }) + + it('throws on non-JSON text', () => { + expect(() => extractJSON('just plain text')).toThrow('Failed to extract JSON') + }) + + it('throws on empty string', () => { + expect(() => extractJSON('')).toThrow('Failed to extract JSON') + }) +}) + +// --------------------------------------------------------------------------- +// validateOutput +// --------------------------------------------------------------------------- + +describe('validateOutput', () => { + const schema = z.object({ + summary: z.string(), + score: z.number().min(0).max(10), + }) + + it('returns validated data on success', () => { + const data = { summary: 'hello', score: 5 } + expect(validateOutput(schema, data)).toEqual(data) + }) + + it('throws on missing field', () => { + expect(() => validateOutput(schema, { summary: 'hello' })).toThrow( + 'Output validation failed', + ) + }) + + it('throws on wrong type', () => { + expect(() => + validateOutput(schema, { summary: 'hello', score: 'not a number' }), + ).toThrow('Output validation failed') + }) + + it('throws on value out of range', () => { + expect(() => + validateOutput(schema, { summary: 'hello', score: 99 }), + ).toThrow('Output validation failed') + }) + + it('applies Zod transforms', () => { + const transformSchema = z.object({ + name: z.string().transform(s => s.toUpperCase()), + }) + const result = validateOutput(transformSchema, { name: 'alice' }) + expect(result).toEqual({ name: 'ALICE' }) + }) + + it('strips unknown keys with strict schema', () => { + const strictSchema = z.object({ a: z.number() }).strict() + expect(() => + validateOutput(strictSchema, { a: 1, b: 2 }), + ).toThrow('Output validation failed') + }) + + it('shows (root) for root-level errors', () => { + const stringSchema = z.string() + expect(() => validateOutput(stringSchema, 42)).toThrow('(root)') + }) +}) + +// --------------------------------------------------------------------------- +// buildStructuredOutputInstruction +// --------------------------------------------------------------------------- + +describe('buildStructuredOutputInstruction', () => { + it('includes the JSON Schema representation', () => { + const schema = z.object({ + summary: z.string(), + score: z.number(), + }) + const instruction = buildStructuredOutputInstruction(schema) + + expect(instruction).toContain('Output Format (REQUIRED)') + expect(instruction).toContain('"type": "object"') + expect(instruction).toContain('"summary"') + expect(instruction).toContain('"score"') + expect(instruction).toContain('ONLY valid JSON') + }) + + it('includes description from Zod schema', () => { + const schema = z.object({ + name: z.string().describe('The person name'), + }) + const instruction = buildStructuredOutputInstruction(schema) + expect(instruction).toContain('The person name') + }) +}) + +// --------------------------------------------------------------------------- +// Agent integration (mocked LLM) +// --------------------------------------------------------------------------- + +/** + * Build an Agent with a mocked LLM adapter by injecting an AgentRunner + * directly into the Agent's private `runner` field, bypassing `createAdapter`. + */ +function buildMockAgent(config: AgentConfig, responses: string[]): Agent { + const adapter = mockAdapter(responses) + const registry = new ToolRegistry() + const executor = new ToolExecutor(registry) + const agent = new Agent(config, registry, executor) + + // Inject a pre-built runner so `getRunner()` returns it without calling createAdapter. + const runner = new AgentRunner(adapter, registry, executor, { + model: config.model, + systemPrompt: config.systemPrompt, + maxTurns: config.maxTurns, + maxTokens: config.maxTokens, + temperature: config.temperature, + agentName: config.name, + }) + ;(agent as any).runner = runner + + return agent +} + +describe('Agent structured output (end-to-end)', () => { + const schema = z.object({ + summary: z.string(), + sentiment: z.enum(['positive', 'negative', 'neutral']), + confidence: z.number().min(0).max(1), + }) + + const baseConfig: AgentConfig = { + name: 'test-agent', + model: 'mock-model', + systemPrompt: 'You are a test agent.', + outputSchema: schema, + } + + it('happy path: valid JSON on first attempt', async () => { + const validJSON = JSON.stringify({ + summary: 'Great product', + sentiment: 'positive', + confidence: 0.95, + }) + + const agent = buildMockAgent(baseConfig, [validJSON]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(true) + expect(result.structured).toEqual({ + summary: 'Great product', + sentiment: 'positive', + confidence: 0.95, + }) + }) + + it('retry: invalid first attempt, valid second attempt', async () => { + const invalidJSON = JSON.stringify({ + summary: 'Great product', + sentiment: 'INVALID_VALUE', + confidence: 0.95, + }) + const validJSON = JSON.stringify({ + summary: 'Great product', + sentiment: 'positive', + confidence: 0.95, + }) + + const agent = buildMockAgent(baseConfig, [invalidJSON, validJSON]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(true) + expect(result.structured).toEqual({ + summary: 'Great product', + sentiment: 'positive', + confidence: 0.95, + }) + // Token usage should reflect both attempts + expect(result.tokenUsage.input_tokens).toBe(20) // 10 + 10 + expect(result.tokenUsage.output_tokens).toBe(40) // 20 + 20 + }) + + it('both attempts fail: success=false, structured=undefined', async () => { + const bad1 = '{"summary": "ok", "sentiment": "WRONG"}' + const bad2 = '{"summary": "ok", "sentiment": "ALSO_WRONG"}' + + const agent = buildMockAgent(baseConfig, [bad1, bad2]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(false) + expect(result.structured).toBeUndefined() + }) + + it('no outputSchema: original behavior, structured is undefined', async () => { + const configNoSchema: AgentConfig = { + name: 'plain-agent', + model: 'mock-model', + systemPrompt: 'You are a test agent.', + } + + const agent = buildMockAgent(configNoSchema, ['Just plain text output']) + const result = await agent.run('Hello') + + expect(result.success).toBe(true) + expect(result.output).toBe('Just plain text output') + expect(result.structured).toBeUndefined() + }) + + it('handles JSON wrapped in markdown fence', async () => { + const fenced = '```json\n{"summary":"ok","sentiment":"neutral","confidence":0.5}\n```' + + const agent = buildMockAgent(baseConfig, [fenced]) + const result = await agent.run('Analyze') + + expect(result.success).toBe(true) + expect(result.structured).toEqual({ + summary: 'ok', + sentiment: 'neutral', + confidence: 0.5, + }) + }) + + it('non-JSON output triggers retry, valid JSON on retry succeeds', async () => { + const nonJSON = 'I am not sure how to analyze this.' + const validJSON = JSON.stringify({ + summary: 'Uncertain', + sentiment: 'neutral', + confidence: 0.1, + }) + + const agent = buildMockAgent(baseConfig, [nonJSON, validJSON]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(true) + expect(result.structured).toEqual({ + summary: 'Uncertain', + sentiment: 'neutral', + confidence: 0.1, + }) + }) + + it('non-JSON output on both attempts: success=false', async () => { + const agent = buildMockAgent(baseConfig, [ + 'Sorry, I cannot do that.', + 'Still cannot do it.', + ]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(false) + expect(result.structured).toBeUndefined() + }) + + it('token usage on first-attempt success reflects single call only', async () => { + const validJSON = JSON.stringify({ + summary: 'Good', + sentiment: 'positive', + confidence: 0.9, + }) + + const agent = buildMockAgent(baseConfig, [validJSON]) + const result = await agent.run('Analyze') + + expect(result.tokenUsage.input_tokens).toBe(10) + expect(result.tokenUsage.output_tokens).toBe(20) + }) +}) diff --git a/tests/task-retry.test.ts b/tests/task-retry.test.ts new file mode 100644 index 0000000..56bdb76 --- /dev/null +++ b/tests/task-retry.test.ts @@ -0,0 +1,368 @@ +import { describe, it, expect, vi } from 'vitest' +import { createTask } from '../src/task/task.js' +import { executeWithRetry, computeRetryDelay } from '../src/orchestrator/orchestrator.js' +import type { AgentRunResult } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +const SUCCESS_RESULT: AgentRunResult = { + success: true, + output: 'done', + messages: [], + tokenUsage: { input_tokens: 10, output_tokens: 20 }, + toolCalls: [], +} + +const FAILURE_RESULT: AgentRunResult = { + success: false, + output: 'agent failed', + messages: [], + tokenUsage: { input_tokens: 10, output_tokens: 20 }, + toolCalls: [], +} + +/** No-op delay for tests. */ +const noDelay = () => Promise.resolve() + +// --------------------------------------------------------------------------- +// computeRetryDelay +// --------------------------------------------------------------------------- + +describe('computeRetryDelay', () => { + it('computes exponential backoff', () => { + expect(computeRetryDelay(1000, 2, 1)).toBe(1000) // 1000 * 2^0 + expect(computeRetryDelay(1000, 2, 2)).toBe(2000) // 1000 * 2^1 + expect(computeRetryDelay(1000, 2, 3)).toBe(4000) // 1000 * 2^2 + }) + + it('caps at 30 seconds', () => { + // 1000 * 2^20 = 1,048,576,000 — way over cap + expect(computeRetryDelay(1000, 2, 21)).toBe(30_000) + }) + + it('handles backoff of 1 (constant delay)', () => { + expect(computeRetryDelay(500, 1, 1)).toBe(500) + expect(computeRetryDelay(500, 1, 5)).toBe(500) + }) +}) + +// --------------------------------------------------------------------------- +// createTask: retry fields +// --------------------------------------------------------------------------- + +describe('createTask with retry fields', () => { + it('passes through retry config', () => { + const t = createTask({ + title: 'Retry task', + description: 'test', + maxRetries: 3, + retryDelayMs: 500, + retryBackoff: 1.5, + }) + expect(t.maxRetries).toBe(3) + expect(t.retryDelayMs).toBe(500) + expect(t.retryBackoff).toBe(1.5) + }) + + it('defaults retry fields to undefined', () => { + const t = createTask({ title: 'No retry', description: 'test' }) + expect(t.maxRetries).toBeUndefined() + expect(t.retryDelayMs).toBeUndefined() + expect(t.retryBackoff).toBeUndefined() + }) +}) + +// --------------------------------------------------------------------------- +// executeWithRetry — tests the real exported function +// --------------------------------------------------------------------------- + +describe('executeWithRetry', () => { + it('succeeds on first attempt with no retry config', async () => { + const run = vi.fn().mockResolvedValue(SUCCESS_RESULT) + const task = createTask({ title: 'Simple', description: 'test' }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(true) + expect(result.output).toBe('done') + expect(run).toHaveBeenCalledTimes(1) + }) + + it('succeeds on first attempt even when maxRetries > 0', async () => { + const run = vi.fn().mockResolvedValue(SUCCESS_RESULT) + const task = createTask({ + title: 'Has retries', + description: 'test', + maxRetries: 3, + }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(true) + expect(run).toHaveBeenCalledTimes(1) + }) + + it('retries on exception and succeeds on second attempt', async () => { + const run = vi.fn() + .mockRejectedValueOnce(new Error('transient error')) + .mockResolvedValueOnce(SUCCESS_RESULT) + + const task = createTask({ + title: 'Retry task', + description: 'test', + maxRetries: 2, + retryDelayMs: 100, + retryBackoff: 2, + }) + + const retryEvents: unknown[] = [] + const result = await executeWithRetry( + run, + task, + (data) => retryEvents.push(data), + noDelay, + ) + + expect(result.success).toBe(true) + expect(run).toHaveBeenCalledTimes(2) + expect(retryEvents).toHaveLength(1) + expect(retryEvents[0]).toEqual({ + attempt: 1, + maxAttempts: 3, + error: 'transient error', + nextDelayMs: 100, // 100 * 2^0 + }) + }) + + it('retries on success:false and succeeds on second attempt', async () => { + const run = vi.fn() + .mockResolvedValueOnce(FAILURE_RESULT) + .mockResolvedValueOnce(SUCCESS_RESULT) + + const task = createTask({ + title: 'Retry task', + description: 'test', + maxRetries: 1, + retryDelayMs: 50, + }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(true) + expect(run).toHaveBeenCalledTimes(2) + }) + + it('exhausts all retries on persistent exception', async () => { + const run = vi.fn().mockRejectedValue(new Error('persistent error')) + + const task = createTask({ + title: 'Always fails', + description: 'test', + maxRetries: 2, + retryDelayMs: 10, + retryBackoff: 1, + }) + + const retryEvents: unknown[] = [] + const result = await executeWithRetry( + run, + task, + (data) => retryEvents.push(data), + noDelay, + ) + + expect(result.success).toBe(false) + expect(result.output).toBe('persistent error') + expect(run).toHaveBeenCalledTimes(3) // 1 initial + 2 retries + expect(retryEvents).toHaveLength(2) + }) + + it('exhausts all retries on persistent success:false', async () => { + const run = vi.fn().mockResolvedValue(FAILURE_RESULT) + + const task = createTask({ + title: 'Always fails', + description: 'test', + maxRetries: 1, + }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(false) + expect(result.output).toBe('agent failed') + expect(run).toHaveBeenCalledTimes(2) + }) + + it('emits correct exponential backoff delays', async () => { + const run = vi.fn().mockRejectedValue(new Error('error')) + + const task = createTask({ + title: 'Backoff test', + description: 'test', + maxRetries: 3, + retryDelayMs: 100, + retryBackoff: 2, + }) + + const retryEvents: Array<{ nextDelayMs: number }> = [] + await executeWithRetry( + run, + task, + (data) => retryEvents.push(data), + noDelay, + ) + + expect(retryEvents).toHaveLength(3) + expect(retryEvents[0]!.nextDelayMs).toBe(100) // 100 * 2^0 + expect(retryEvents[1]!.nextDelayMs).toBe(200) // 100 * 2^1 + expect(retryEvents[2]!.nextDelayMs).toBe(400) // 100 * 2^2 + }) + + it('no retry events when maxRetries is 0 (default)', async () => { + const run = vi.fn().mockRejectedValue(new Error('fail')) + const task = createTask({ title: 'No retry', description: 'test' }) + + const retryEvents: unknown[] = [] + const result = await executeWithRetry( + run, + task, + (data) => retryEvents.push(data), + noDelay, + ) + + expect(result.success).toBe(false) + expect(run).toHaveBeenCalledTimes(1) + expect(retryEvents).toHaveLength(0) + }) + + it('calls the delay function with computed delay', async () => { + const run = vi.fn() + .mockRejectedValueOnce(new Error('error')) + .mockResolvedValueOnce(SUCCESS_RESULT) + + const task = createTask({ + title: 'Delay test', + description: 'test', + maxRetries: 1, + retryDelayMs: 250, + retryBackoff: 3, + }) + + const mockDelay = vi.fn().mockResolvedValue(undefined) + await executeWithRetry(run, task, undefined, mockDelay) + + expect(mockDelay).toHaveBeenCalledTimes(1) + expect(mockDelay).toHaveBeenCalledWith(250) // 250 * 3^0 + }) + + it('caps delay at 30 seconds', async () => { + const run = vi.fn() + .mockRejectedValueOnce(new Error('error')) + .mockResolvedValueOnce(SUCCESS_RESULT) + + const task = createTask({ + title: 'Cap test', + description: 'test', + maxRetries: 1, + retryDelayMs: 50_000, + retryBackoff: 2, + }) + + const mockDelay = vi.fn().mockResolvedValue(undefined) + await executeWithRetry(run, task, undefined, mockDelay) + + expect(mockDelay).toHaveBeenCalledWith(30_000) // capped + }) + + it('accumulates token usage across retry attempts', async () => { + const failResult: AgentRunResult = { + ...FAILURE_RESULT, + tokenUsage: { input_tokens: 100, output_tokens: 50 }, + } + const successResult: AgentRunResult = { + ...SUCCESS_RESULT, + tokenUsage: { input_tokens: 200, output_tokens: 80 }, + } + + const run = vi.fn() + .mockResolvedValueOnce(failResult) + .mockResolvedValueOnce(failResult) + .mockResolvedValueOnce(successResult) + + const task = createTask({ + title: 'Token test', + description: 'test', + maxRetries: 2, + retryDelayMs: 10, + }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(true) + // 100+100+200 input, 50+50+80 output + expect(result.tokenUsage.input_tokens).toBe(400) + expect(result.tokenUsage.output_tokens).toBe(180) + }) + + it('accumulates token usage even when all retries fail', async () => { + const failResult: AgentRunResult = { + ...FAILURE_RESULT, + tokenUsage: { input_tokens: 50, output_tokens: 30 }, + } + + const run = vi.fn().mockResolvedValue(failResult) + + const task = createTask({ + title: 'Token fail test', + description: 'test', + maxRetries: 1, + }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(false) + // 50+50 input, 30+30 output (2 attempts) + expect(result.tokenUsage.input_tokens).toBe(100) + expect(result.tokenUsage.output_tokens).toBe(60) + }) + + it('clamps negative maxRetries to 0 (single attempt)', async () => { + const run = vi.fn().mockRejectedValue(new Error('fail')) + + const task = createTask({ + title: 'Negative retry', + description: 'test', + maxRetries: -5, + }) + // Manually set negative value since createTask doesn't validate + ;(task as any).maxRetries = -5 + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(false) + expect(run).toHaveBeenCalledTimes(1) // exactly 1 attempt, no retries + }) + + it('clamps backoff below 1 to 1 (constant delay)', async () => { + const run = vi.fn() + .mockRejectedValueOnce(new Error('error')) + .mockResolvedValueOnce(SUCCESS_RESULT) + + const task = createTask({ + title: 'Bad backoff', + description: 'test', + maxRetries: 1, + retryDelayMs: 100, + retryBackoff: -2, + }) + ;(task as any).retryBackoff = -2 + + const mockDelay = vi.fn().mockResolvedValue(undefined) + await executeWithRetry(run, task, undefined, mockDelay) + + // backoff clamped to 1, so delay = 100 * 1^0 = 100 + expect(mockDelay).toHaveBeenCalledWith(100) + }) +}) diff --git a/tests/trace.test.ts b/tests/trace.test.ts new file mode 100644 index 0000000..fbeb78c --- /dev/null +++ b/tests/trace.test.ts @@ -0,0 +1,453 @@ +import { describe, it, expect, vi } from 'vitest' +import { z } from 'zod' +import { Agent } from '../src/agent/agent.js' +import { AgentRunner, type RunOptions } from '../src/agent/runner.js' +import { ToolRegistry, defineTool } from '../src/tool/framework.js' +import { ToolExecutor } from '../src/tool/executor.js' +import { executeWithRetry } from '../src/orchestrator/orchestrator.js' +import { emitTrace, generateRunId } from '../src/utils/trace.js' +import { createTask } from '../src/task/task.js' +import type { + AgentConfig, + AgentRunResult, + LLMAdapter, + LLMResponse, + TraceEvent, +} from '../src/types.js' + +// --------------------------------------------------------------------------- +// Mock adapters +// --------------------------------------------------------------------------- + +function mockAdapter(responses: LLMResponse[]): LLMAdapter { + let callIndex = 0 + return { + name: 'mock', + async chat() { + return responses[callIndex++]! + }, + async *stream() { + /* unused */ + }, + } +} + +function textResponse(text: string): LLMResponse { + return { + id: `resp-${Math.random().toString(36).slice(2)}`, + content: [{ type: 'text' as const, text }], + model: 'mock-model', + stop_reason: 'end_turn', + usage: { input_tokens: 10, output_tokens: 20 }, + } +} + +function toolUseResponse(toolName: string, input: Record): LLMResponse { + return { + id: `resp-${Math.random().toString(36).slice(2)}`, + content: [ + { + type: 'tool_use' as const, + id: `tu-${Math.random().toString(36).slice(2)}`, + name: toolName, + input, + }, + ], + model: 'mock-model', + stop_reason: 'tool_use', + usage: { input_tokens: 15, output_tokens: 25 }, + } +} + +function buildMockAgent( + config: AgentConfig, + responses: LLMResponse[], + registry?: ToolRegistry, + executor?: ToolExecutor, +): Agent { + const reg = registry ?? new ToolRegistry() + const exec = executor ?? new ToolExecutor(reg) + const adapter = mockAdapter(responses) + const agent = new Agent(config, reg, exec) + + const runner = new AgentRunner(adapter, reg, exec, { + model: config.model, + systemPrompt: config.systemPrompt, + maxTurns: config.maxTurns, + maxTokens: config.maxTokens, + temperature: config.temperature, + agentName: config.name, + }) + ;(agent as any).runner = runner + + return agent +} + +// --------------------------------------------------------------------------- +// emitTrace helper +// --------------------------------------------------------------------------- + +describe('emitTrace', () => { + it('does nothing when fn is undefined', () => { + // Should not throw + emitTrace(undefined, { + type: 'agent', + runId: 'r1', + agent: 'a', + turns: 1, + tokens: { input_tokens: 0, output_tokens: 0 }, + toolCalls: 0, + startMs: 0, + endMs: 0, + durationMs: 0, + }) + }) + + it('calls fn with the event', () => { + const fn = vi.fn() + const event: TraceEvent = { + type: 'agent', + runId: 'r1', + agent: 'a', + turns: 1, + tokens: { input_tokens: 0, output_tokens: 0 }, + toolCalls: 0, + startMs: 0, + endMs: 0, + durationMs: 0, + } + emitTrace(fn, event) + expect(fn).toHaveBeenCalledWith(event) + }) + + it('swallows errors thrown by callback', () => { + const fn = () => { throw new Error('boom') } + expect(() => + emitTrace(fn, { + type: 'agent', + runId: 'r1', + agent: 'a', + turns: 1, + tokens: { input_tokens: 0, output_tokens: 0 }, + toolCalls: 0, + startMs: 0, + endMs: 0, + durationMs: 0, + }), + ).not.toThrow() + }) + + it('swallows rejected promises from async callbacks', async () => { + // An async onTrace that rejects should not produce unhandled rejection + const fn = async () => { throw new Error('async boom') } + emitTrace(fn as unknown as (event: TraceEvent) => void, { + type: 'agent', + runId: 'r1', + agent: 'a', + turns: 1, + tokens: { input_tokens: 0, output_tokens: 0 }, + toolCalls: 0, + startMs: 0, + endMs: 0, + durationMs: 0, + }) + // If the rejection is not caught, vitest will fail with unhandled rejection. + // Give the microtask queue a tick to surface any unhandled rejection. + await new Promise(resolve => setTimeout(resolve, 10)) + }) +}) + +describe('generateRunId', () => { + it('returns a UUID string', () => { + const id = generateRunId() + expect(id).toMatch(/^[0-9a-f-]{36}$/) + }) + + it('returns unique IDs', () => { + const ids = new Set(Array.from({ length: 100 }, generateRunId)) + expect(ids.size).toBe(100) + }) +}) + +// --------------------------------------------------------------------------- +// AgentRunner trace events +// --------------------------------------------------------------------------- + +describe('AgentRunner trace events', () => { + it('emits llm_call trace for each LLM turn', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + const executor = new ToolExecutor(registry) + const adapter = mockAdapter([textResponse('Hello!')]) + + const runner = new AgentRunner(adapter, registry, executor, { + model: 'test-model', + agentName: 'test-agent', + }) + + const runOptions: RunOptions = { + onTrace: (e) => traces.push(e), + runId: 'run-1', + traceAgent: 'test-agent', + } + + await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'hi' }] }], + runOptions, + ) + + const llmTraces = traces.filter(t => t.type === 'llm_call') + expect(llmTraces).toHaveLength(1) + + const llm = llmTraces[0]! + expect(llm.type).toBe('llm_call') + expect(llm.runId).toBe('run-1') + expect(llm.agent).toBe('test-agent') + expect(llm.model).toBe('test-model') + expect(llm.turn).toBe(1) + expect(llm.tokens).toEqual({ input_tokens: 10, output_tokens: 20 }) + expect(llm.durationMs).toBeGreaterThanOrEqual(0) + expect(llm.startMs).toBeLessThanOrEqual(llm.endMs) + }) + + it('emits tool_call trace with correct fields', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'echo', + description: 'echoes', + inputSchema: z.object({ msg: z.string() }), + execute: async ({ msg }) => ({ data: msg }), + }), + ) + const executor = new ToolExecutor(registry) + const adapter = mockAdapter([ + toolUseResponse('echo', { msg: 'hello' }), + textResponse('Done'), + ]) + + const runner = new AgentRunner(adapter, registry, executor, { + model: 'test-model', + agentName: 'tooler', + }) + + await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'test' }] }], + { onTrace: (e) => traces.push(e), runId: 'run-2', traceAgent: 'tooler' }, + ) + + const toolTraces = traces.filter(t => t.type === 'tool_call') + expect(toolTraces).toHaveLength(1) + + const tool = toolTraces[0]! + expect(tool.type).toBe('tool_call') + expect(tool.runId).toBe('run-2') + expect(tool.agent).toBe('tooler') + expect(tool.tool).toBe('echo') + expect(tool.isError).toBe(false) + expect(tool.durationMs).toBeGreaterThanOrEqual(0) + }) + + it('tool_call trace has isError: true on tool failure', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'boom', + description: 'fails', + inputSchema: z.object({}), + execute: async () => { throw new Error('fail') }, + }), + ) + const executor = new ToolExecutor(registry) + const adapter = mockAdapter([ + toolUseResponse('boom', {}), + textResponse('Handled'), + ]) + + const runner = new AgentRunner(adapter, registry, executor, { + model: 'test-model', + agentName: 'err-agent', + }) + + await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'test' }] }], + { onTrace: (e) => traces.push(e), runId: 'run-3', traceAgent: 'err-agent' }, + ) + + const toolTraces = traces.filter(t => t.type === 'tool_call') + expect(toolTraces).toHaveLength(1) + expect(toolTraces[0]!.isError).toBe(true) + }) + + it('does not call Date.now for LLM timing when onTrace is absent', async () => { + // This test just verifies no errors occur when onTrace is not provided + const registry = new ToolRegistry() + const executor = new ToolExecutor(registry) + const adapter = mockAdapter([textResponse('hi')]) + + const runner = new AgentRunner(adapter, registry, executor, { + model: 'test-model', + }) + + const result = await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'test' }] }], + {}, + ) + + expect(result.output).toBe('hi') + }) +}) + +// --------------------------------------------------------------------------- +// Agent-level trace events +// --------------------------------------------------------------------------- + +describe('Agent trace events', () => { + it('emits agent trace with turns, tokens, and toolCalls', async () => { + const traces: TraceEvent[] = [] + const config: AgentConfig = { + name: 'my-agent', + model: 'mock-model', + systemPrompt: 'You are a test.', + } + + const agent = buildMockAgent(config, [textResponse('Hello world')]) + + const runOptions: Partial = { + onTrace: (e) => traces.push(e), + runId: 'run-agent-1', + traceAgent: 'my-agent', + } + + const result = await agent.run('Say hello', runOptions) + expect(result.success).toBe(true) + + const agentTraces = traces.filter(t => t.type === 'agent') + expect(agentTraces).toHaveLength(1) + + const at = agentTraces[0]! + expect(at.type).toBe('agent') + expect(at.runId).toBe('run-agent-1') + expect(at.agent).toBe('my-agent') + expect(at.turns).toBe(1) // one assistant message + expect(at.tokens).toEqual({ input_tokens: 10, output_tokens: 20 }) + expect(at.toolCalls).toBe(0) + expect(at.durationMs).toBeGreaterThanOrEqual(0) + }) + + it('all traces share the same runId', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'greet', + description: 'greets', + inputSchema: z.object({ name: z.string() }), + execute: async ({ name }) => ({ data: `Hi ${name}` }), + }), + ) + const executor = new ToolExecutor(registry) + const config: AgentConfig = { + name: 'multi-trace-agent', + model: 'mock-model', + tools: ['greet'], + } + + const agent = buildMockAgent( + config, + [ + toolUseResponse('greet', { name: 'world' }), + textResponse('Done'), + ], + registry, + executor, + ) + + const runId = 'shared-run-id' + await agent.run('test', { + onTrace: (e) => traces.push(e), + runId, + traceAgent: 'multi-trace-agent', + }) + + // Should have: 2 llm_call, 1 tool_call, 1 agent + expect(traces.length).toBeGreaterThanOrEqual(4) + + for (const trace of traces) { + expect(trace.runId).toBe(runId) + } + }) + + it('onTrace error does not break agent execution', async () => { + const config: AgentConfig = { + name: 'resilient-agent', + model: 'mock-model', + } + + const agent = buildMockAgent(config, [textResponse('OK')]) + + const result = await agent.run('test', { + onTrace: () => { throw new Error('callback exploded') }, + runId: 'run-err', + traceAgent: 'resilient-agent', + }) + + // The run should still succeed despite the broken callback + expect(result.success).toBe(true) + expect(result.output).toBe('OK') + }) + + it('per-turn token usage in llm_call traces', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'noop', + description: 'noop', + inputSchema: z.object({}), + execute: async () => ({ data: 'ok' }), + }), + ) + const executor = new ToolExecutor(registry) + + // Two LLM calls: first triggers a tool, second is the final response + const resp1: LLMResponse = { + id: 'r1', + content: [{ type: 'tool_use', id: 'tu1', name: 'noop', input: {} }], + model: 'mock-model', + stop_reason: 'tool_use', + usage: { input_tokens: 100, output_tokens: 50 }, + } + const resp2: LLMResponse = { + id: 'r2', + content: [{ type: 'text', text: 'Final answer' }], + model: 'mock-model', + stop_reason: 'end_turn', + usage: { input_tokens: 200, output_tokens: 100 }, + } + + const adapter = mockAdapter([resp1, resp2]) + const runner = new AgentRunner(adapter, registry, executor, { + model: 'mock-model', + agentName: 'token-agent', + }) + + await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'go' }] }], + { onTrace: (e) => traces.push(e), runId: 'run-tok', traceAgent: 'token-agent' }, + ) + + const llmTraces = traces.filter(t => t.type === 'llm_call') + expect(llmTraces).toHaveLength(2) + + // Each trace carries its own turn's token usage, not the aggregate + expect(llmTraces[0]!.tokens).toEqual({ input_tokens: 100, output_tokens: 50 }) + expect(llmTraces[1]!.tokens).toEqual({ input_tokens: 200, output_tokens: 100 }) + + // Turn numbers should be sequential + expect(llmTraces[0]!.turn).toBe(1) + expect(llmTraces[1]!.turn).toBe(2) + }) +})