diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..8f43f71 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,40 @@ +--- +name: Bug Report +about: Report a bug to help us improve +title: "[Bug] " +labels: bug +assignees: '' +--- + +## Describe the bug + +A clear and concise description of what the bug is. + +## To Reproduce + +Steps to reproduce the behavior: + +1. Configure agent with '...' +2. Call `runTeam(...)` with '...' +3. See error + +## Expected behavior + +A clear description of what you expected to happen. + +## Error output + +``` +Paste any error messages or logs here +``` + +## Environment + +- OS: [e.g. macOS 14, Ubuntu 22.04] +- Node.js version: [e.g. 20.11] +- Package version: [e.g. 0.1.0] +- LLM provider: [e.g. Anthropic, OpenAI] + +## Additional context + +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..c31759e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,23 @@ +--- +name: Feature Request +about: Suggest an idea for this project +title: "[Feature] " +labels: enhancement +assignees: '' +--- + +## Problem + +A clear description of the problem or limitation you're experiencing. + +## Proposed Solution + +Describe what you'd like to happen. + +## Alternatives Considered + +Any alternative solutions or features you've considered. + +## Additional context + +Add any other context, code examples, or screenshots about the feature request here. diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..739d91d --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,14 @@ +## What + + + +## Why + + + +## Checklist + +- [ ] `npm run lint` passes +- [ ] `npm test` passes +- [ ] Added/updated tests for changed behavior +- [ ] No new runtime dependencies (or justified in the PR description) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..6f5b577 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,23 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + node-version: [18, 20, 22] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node-version }} + cache: npm + - run: npm ci + - run: npm run lint + - run: npm test diff --git a/.gitignore b/.gitignore index 523e756..f321a49 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ node_modules/ dist/ +coverage/ *.tgz .DS_Store promo-*.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..6cbeb45 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,80 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Commands + +```bash +npm run build # Compile TypeScript (src/ → dist/) +npm run dev # Watch mode compilation +npm run lint # Type-check only (tsc --noEmit) +npm test # Run all tests (vitest run) +npm run test:watch # Vitest watch mode +``` + +Tests live in `tests/` (vitest). Examples in `examples/` are standalone scripts requiring API keys (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`). + +## Architecture + +ES module TypeScript framework for multi-agent orchestration. Three runtime dependencies: `@anthropic-ai/sdk`, `openai`, `zod`. + +### Core Execution Flow + +**`OpenMultiAgent`** (`src/orchestrator/orchestrator.ts`) is the top-level public API with three execution modes: + +1. **`runAgent(config, prompt)`** — single agent, one-shot +2. **`runTeam(team, goal)`** — automatic orchestration: a temporary "coordinator" agent decomposes the goal into a task DAG via LLM call, then tasks execute in dependency order +3. **`runTasks(team, tasks)`** — explicit task pipeline with user-defined dependencies + +### The Coordinator Pattern (runTeam) + +This is the framework's key feature. When `runTeam()` is called: +1. A coordinator agent receives the goal + agent roster and produces a JSON task array (title, description, assignee, dependsOn) +2. `TaskQueue` resolves dependencies topologically — independent tasks run in parallel, dependent tasks wait +3. `Scheduler` auto-assigns any unassigned tasks (strategies: `dependency-first` default, `round-robin`, `least-busy`, `capability-match`) +4. Each task result is written to `SharedMemory` so subsequent agents see prior results +5. The coordinator synthesizes all task results into a final output + +### Layer Map + +| Layer | Files | Responsibility | +|-------|-------|----------------| +| Orchestrator | `orchestrator/orchestrator.ts`, `orchestrator/scheduler.ts` | Top-level API, task decomposition, coordinator pattern | +| Team | `team/team.ts`, `team/messaging.ts` | Agent roster, MessageBus (point-to-point + broadcast), SharedMemory binding | +| Agent | `agent/agent.ts`, `agent/runner.ts`, `agent/pool.ts`, `agent/structured-output.ts` | Agent lifecycle (idle→running→completed/error), conversation loop, concurrency pool with Semaphore, structured output validation | +| Task | `task/queue.ts`, `task/task.ts` | Dependency-aware queue, auto-unblock on completion, cascade failure to dependents | +| Tool | `tool/framework.ts`, `tool/executor.ts`, `tool/built-in/` | `defineTool()` with Zod schemas, ToolRegistry, parallel batch execution with concurrency semaphore | +| LLM | `llm/adapter.ts`, `llm/anthropic.ts`, `llm/openai.ts` | `LLMAdapter` interface (`chat` + `stream`), factory `createAdapter()` | +| Memory | `memory/shared.ts`, `memory/store.ts` | Namespaced key-value store (`agentName/key`), markdown summary injection into prompts | +| Types | `types.ts` | All interfaces in one file to avoid circular deps | +| Exports | `index.ts` | Public API surface | + +### Agent Conversation Loop (AgentRunner) + +`AgentRunner.run()`: send messages → extract tool-use blocks → execute tools in parallel batch → append results → loop until `end_turn` or `maxTurns` exhausted. Accumulates `TokenUsage` across all turns. + +### Concurrency Control + +Two independent semaphores: `AgentPool` (max concurrent agent runs, default 5) and `ToolExecutor` (max concurrent tool calls, default 4). + +### Structured Output + +Optional `outputSchema` (Zod) on `AgentConfig`. When set, the agent's final output is parsed as JSON and validated. On validation failure, one retry with error feedback is attempted. Validated data is available via `result.structured`. Logic lives in `agent/structured-output.ts`, wired into `Agent.executeRun()`. + +### Task Retry + +Optional `maxRetries`, `retryDelayMs`, `retryBackoff` on task config (used via `runTasks()`). `executeWithRetry()` in `orchestrator.ts` handles the retry loop with exponential backoff (capped at 30s). Token usage is accumulated across all attempts. Emits `task_retry` event via `onProgress`. + +### Error Handling + +- Tool errors → caught, returned as `ToolResult(isError: true)`, never thrown +- Task failures → retry if `maxRetries > 0`, then cascade to all dependents; independent tasks continue +- LLM API errors → propagate to caller + +### Built-in Tools + +`bash`, `file_read`, `file_write`, `file_edit`, `grep` — registered via `registerBuiltInTools(registry)`. + +### Adding an LLM Adapter + +Implement `LLMAdapter` interface with `chat(messages, options)` and `stream(messages, options)`, then register in `createAdapter()` factory in `src/llm/adapter.ts`. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..1036d4e --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,48 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a positive experience for everyone, regardless of background or +identity. + +## Our Standards + +Examples of behavior that contributes to a positive environment: + +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive feedback +- Focusing on what is best for the community +- Showing empathy towards other community members + +Examples of unacceptable behavior: + +- Trolling, insulting or derogatory comments, and personal attacks +- Public or private unwelcome conduct +- Publishing others' private information without explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate or harmful. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. + +## Enforcement + +Instances of unacceptable behavior may be reported to the community leaders +responsible for enforcement at **jack@yuanasi.com**. All complaints will be +reviewed and investigated promptly and fairly. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org), +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html). diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..e17dd36 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,72 @@ +# Contributing + +Thanks for your interest in contributing to Open Multi-Agent! This guide covers the basics to get you started. + +## Setup + +```bash +git clone https://github.com/JackChen-me/open-multi-agent.git +cd open-multi-agent +npm install +``` + +Requires Node.js >= 18. + +## Development Commands + +```bash +npm run build # Compile TypeScript (src/ → dist/) +npm run dev # Watch mode compilation +npm run lint # Type-check (tsc --noEmit) +npm test # Run all tests (vitest) +npm run test:watch # Vitest watch mode +``` + +## Running Tests + +All tests live in `tests/`. They test core modules (TaskQueue, SharedMemory, ToolExecutor, Semaphore) without requiring API keys or network access. + +```bash +npm test +``` + +Every PR must pass `npm run lint && npm test`. CI runs both automatically on Node 18, 20, and 22. + +## Making a Pull Request + +1. Fork the repo and create a branch from `main` +2. Make your changes +3. Add or update tests if you changed behavior +4. Run `npm run lint && npm test` locally +5. Open a PR against `main` + +### PR Checklist + +- [ ] `npm run lint` passes +- [ ] `npm test` passes +- [ ] New behavior has test coverage +- [ ] Linked to a relevant issue (if one exists) + +## Code Style + +- TypeScript strict mode, ES modules (`.js` extensions in imports) +- No additional linter/formatter configured — follow existing patterns +- Keep dependencies minimal (currently 3 runtime deps: `@anthropic-ai/sdk`, `openai`, `zod`) + +## Architecture Overview + +See the [README](./README.md#architecture) for an architecture diagram. Key entry points: + +- **Orchestrator**: `src/orchestrator/orchestrator.ts` — top-level API +- **Task system**: `src/task/queue.ts`, `src/task/task.ts` — dependency DAG +- **Agent**: `src/agent/runner.ts` — conversation loop +- **Tools**: `src/tool/framework.ts`, `src/tool/executor.ts` — tool registry and execution +- **LLM adapters**: `src/llm/` — Anthropic, OpenAI, Copilot + +## Where to Contribute + +Check the [issues](https://github.com/JackChen-me/open-multi-agent/issues) page. Issues labeled `good first issue` are scoped and approachable. Issues labeled `help wanted` are larger but well-defined. + +## License + +By contributing, you agree that your contributions will be licensed under the MIT License. diff --git a/DECISIONS.md b/DECISIONS.md new file mode 100644 index 0000000..a16151f --- /dev/null +++ b/DECISIONS.md @@ -0,0 +1,43 @@ +# Architecture Decisions + +This document records deliberate "won't do" decisions for the project. These are features we evaluated and chose NOT to implement — not because they're bad ideas, but because they conflict with our positioning as the **simplest multi-agent framework**. + +If you're considering a PR in any of these areas, please open a discussion first. + +## Won't Do + +### 1. Agent Handoffs + +**What**: Agent A transfers an in-progress conversation to Agent B (like OpenAI Agents SDK `handoff()`). + +**Why not**: Handoffs are a different paradigm from our task-based model. Our tasks have clear boundaries — one agent, one task, one result. Handoffs blur those boundaries and add state-transfer complexity. Users who need handoffs likely need a different framework (OpenAI Agents SDK is purpose-built for this). + +### 2. State Persistence / Checkpointing + +**What**: Save workflow state to a database so long-running workflows can resume after crashes (like LangGraph checkpointing). + +**Why not**: Requires a storage backend (SQLite, Redis, Postgres), schema migrations, and serialization logic. This is enterprise infrastructure — it triples the complexity surface. Our target users run workflows that complete in seconds to minutes, not hours. If you need checkpointing, LangGraph is the right tool. + +**Related**: Closing #20 with this rationale. + +### 3. A2A Protocol (Agent-to-Agent) + +**What**: Google's open protocol for agents on different servers to discover and communicate with each other. + +**Why not**: Too early — the spec is still evolving and adoption is minimal. Our users run agents in a single process, not across distributed services. If A2A matures and there's real demand, we can revisit. Today it would add complexity for zero practical benefit. + +### 4. MCP Integration (Model Context Protocol) + +**What**: Anthropic's protocol for connecting LLMs to external tools and data sources. + +**Why not**: MCP is valuable but targets a different layer. Our `defineTool()` API already lets users wrap any external service as a tool in ~10 lines of code. Adding MCP would mean maintaining protocol compatibility, transport layers, and tool discovery — complexity that serves tool platform builders, not our target users who just want to run agent teams. + +### 5. Dashboard / Visualization + +**What**: Built-in web UI to visualize task DAGs, agent activity, and token usage. + +**Why not**: We expose data, we don't build UI. The `onProgress` callback and upcoming `onTrace` (#18) give users all the raw data. They can pipe it into Grafana, build a custom dashboard, or use console logs. Shipping a web UI means owning a frontend stack, which is outside our scope. + +--- + +*Last updated: 2026-04-03* diff --git a/README.md b/README.md index 31d3509..d9b5d39 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # Open Multi-Agent -Build AI agent teams that work together. One agent plans, another implements, a third reviews — the framework handles task scheduling, dependencies, and communication automatically. +TypeScript framework for multi-agent orchestration. One `runTeam()` call from goal to result — the framework decomposes it into tasks, resolves dependencies, and runs agents in parallel. + +3 runtime dependencies · 27 source files · Deploys anywhere Node.js runs · Mentioned in [Latent Space](https://www.latent.space/p/ainews-a-quiet-april-fools) AI News [![GitHub stars](https://img.shields.io/github/stars/JackChen-me/open-multi-agent)](https://github.com/JackChen-me/open-multi-agent/stargazers) [![license](https://img.shields.io/github/license/JackChen-me/open-multi-agent)](./LICENSE) @@ -10,40 +12,26 @@ Build AI agent teams that work together. One agent plans, another implements, a ## Why Open Multi-Agent? -- **Multi-Agent Teams** — Define agents with different roles, tools, and even different models. They collaborate through a message bus and shared memory. -- **Task DAG Scheduling** — Tasks have dependencies. The framework resolves them topologically — dependent tasks wait, independent tasks run in parallel. -- **Model Agnostic** — Claude and GPT in the same team. Swap models per agent. Bring your own adapter for any LLM. -- **In-Process Execution** — No subprocess overhead. Everything runs in one Node.js process. Deploy to serverless, Docker, CI/CD. +- **Goal In, Result Out** — `runTeam(team, "Build a REST API")`. A coordinator agent auto-decomposes the goal into a task DAG with dependencies and assignees, runs independent tasks in parallel, and synthesizes the final output. No manual task definitions or graph wiring required. +- **TypeScript-Native** — Built for the Node.js ecosystem. `npm install`, import, run. No Python runtime, no subprocess bridge, no sidecar services. Embed in Express, Next.js, serverless functions, or CI/CD pipelines. +- **Auditable and Lightweight** — 3 runtime dependencies (`@anthropic-ai/sdk`, `openai`, `zod`). 27 source files. The entire codebase is readable in an afternoon. +- **Model Agnostic** — Claude, GPT, Gemma 4, and local models (Ollama, vLLM, LM Studio) in the same team. Swap models per agent via `baseURL`. +- **Multi-Agent Collaboration** — Agents with different roles, tools, and models collaborate through a message bus and shared memory. +- **Structured Output** — Add `outputSchema` (Zod) to any agent. Output is parsed as JSON, validated, and auto-retried once on failure. Access typed results via `result.structured`. +- **Task Retry** — Set `maxRetries` on tasks for automatic retry with exponential backoff. Failed attempts accumulate token usage for accurate billing. +- **Observability** — Optional `onTrace` callback emits structured spans for every LLM call, tool execution, task, and agent run — with timing, token usage, and a shared `runId` for correlation. Zero overhead when not subscribed, zero extra dependencies. ## Quick Start +Requires Node.js >= 18. + ```bash npm install @jackchen_me/open-multi-agent ``` -Set `ANTHROPIC_API_KEY` (and optionally `OPENAI_API_KEY`) in your environment. +Set `ANTHROPIC_API_KEY` (and optionally `OPENAI_API_KEY` or `GITHUB_TOKEN` for Copilot) in your environment. Local models via Ollama require no API key — see [example 06](examples/06-local-model.ts). -```typescript -import { OpenMultiAgent } from '@jackchen_me/open-multi-agent' - -const orchestrator = new OpenMultiAgent({ defaultModel: 'claude-sonnet-4-6' }) - -// One agent, one task -const result = await orchestrator.runAgent( - { - name: 'coder', - model: 'claude-sonnet-4-6', - tools: ['bash', 'file_write'], - }, - 'Write a TypeScript function that reverses a string, save it to /tmp/reverse.ts, and run it.', -) - -console.log(result.output) -``` - -## Multi-Agent Team - -This is where it gets interesting. Three agents, one goal: +Three agents, one goal — the framework handles the rest: ```typescript import { OpenMultiAgent } from '@jackchen_me/open-multi-agent' @@ -88,132 +76,52 @@ console.log(`Success: ${result.success}`) console.log(`Tokens: ${result.totalTokenUsage.output_tokens} output tokens`) ``` -## More Examples +What happens under the hood: -
-Task Pipeline — explicit control over task graph and assignments - -```typescript -const result = await orchestrator.runTasks(team, [ - { - title: 'Design the data model', - description: 'Write a TypeScript interface spec to /tmp/spec.md', - assignee: 'architect', - }, - { - title: 'Implement the module', - description: 'Read /tmp/spec.md and implement the module in /tmp/src/', - assignee: 'developer', - dependsOn: ['Design the data model'], // blocked until design completes - }, - { - title: 'Write tests', - description: 'Read the implementation and write Vitest tests.', - assignee: 'developer', - dependsOn: ['Implement the module'], - }, - { - title: 'Review code', - description: 'Review /tmp/src/ and produce a structured code review.', - assignee: 'reviewer', - dependsOn: ['Implement the module'], // can run in parallel with tests - }, -]) +``` +agent_start coordinator +task_start architect +task_complete architect +task_start developer +task_start developer // independent tasks run in parallel +task_complete developer +task_start reviewer // unblocked after implementation +task_complete developer +task_complete reviewer +agent_complete coordinator // synthesizes final result +Success: true +Tokens: 12847 output tokens ``` -
+## Three Ways to Run -
-Custom Tools — define tools with Zod schemas +| Mode | Method | When to use | +|------|--------|-------------| +| Single agent | `runAgent()` | One agent, one prompt — simplest entry point | +| Auto-orchestrated team | `runTeam()` | Give a goal, framework plans and executes | +| Explicit pipeline | `runTasks()` | You define the task graph and assignments | -```typescript -import { z } from 'zod' -import { defineTool, Agent, ToolRegistry, ToolExecutor, registerBuiltInTools } from '@jackchen_me/open-multi-agent' +## Examples -const searchTool = defineTool({ - name: 'web_search', - description: 'Search the web and return the top results.', - inputSchema: z.object({ - query: z.string().describe('The search query.'), - maxResults: z.number().optional().describe('Number of results (default 5).'), - }), - execute: async ({ query, maxResults = 5 }) => { - const results = await mySearchProvider(query, maxResults) - return { data: JSON.stringify(results), isError: false } - }, -}) +All examples are runnable scripts in [`examples/`](./examples/). Run any of them with `npx tsx`: -const registry = new ToolRegistry() -registerBuiltInTools(registry) -registry.register(searchTool) - -const executor = new ToolExecutor(registry) -const agent = new Agent( - { name: 'researcher', model: 'claude-sonnet-4-6', tools: ['web_search'] }, - registry, - executor, -) - -const result = await agent.run('Find the three most recent TypeScript releases.') +```bash +npx tsx examples/01-single-agent.ts ``` -
- -
-Multi-Model Teams — mix Claude and GPT in one workflow - -```typescript -const claudeAgent: AgentConfig = { - name: 'strategist', - model: 'claude-opus-4-6', - provider: 'anthropic', - systemPrompt: 'You plan high-level approaches.', - tools: ['file_write'], -} - -const gptAgent: AgentConfig = { - name: 'implementer', - model: 'gpt-5.4', - provider: 'openai', - systemPrompt: 'You implement plans as working code.', - tools: ['bash', 'file_read', 'file_write'], -} - -const team = orchestrator.createTeam('mixed-team', { - name: 'mixed-team', - agents: [claudeAgent, gptAgent], - sharedMemory: true, -}) - -const result = await orchestrator.runTeam(team, 'Build a CLI tool that converts JSON to CSV.') -``` - -
- -
-Streaming Output - -```typescript -import { Agent, ToolRegistry, ToolExecutor, registerBuiltInTools } from '@jackchen_me/open-multi-agent' - -const registry = new ToolRegistry() -registerBuiltInTools(registry) -const executor = new ToolExecutor(registry) - -const agent = new Agent( - { name: 'writer', model: 'claude-sonnet-4-6', maxTurns: 3 }, - registry, - executor, -) - -for await (const event of agent.stream('Explain monads in two sentences.')) { - if (event.type === 'text' && typeof event.data === 'string') { - process.stdout.write(event.data) - } -} -``` - -
+| Example | What it shows | +|---------|---------------| +| [01 — Single Agent](examples/01-single-agent.ts) | `runAgent()` one-shot, `stream()` streaming, `prompt()` multi-turn | +| [02 — Team Collaboration](examples/02-team-collaboration.ts) | `runTeam()` auto-orchestration with coordinator pattern | +| [03 — Task Pipeline](examples/03-task-pipeline.ts) | `runTasks()` explicit dependency graph (design → implement → test + review) | +| [04 — Multi-Model Team](examples/04-multi-model-team.ts) | `defineTool()` custom tools, mixed Anthropic + OpenAI providers, `AgentPool` | +| [05 — Copilot](examples/05-copilot-test.ts) | GitHub Copilot as an LLM provider | +| [06 — Local Model](examples/06-local-model.ts) | Ollama + Claude in one pipeline via `baseURL` (works with vLLM, LM Studio, etc.) | +| [07 — Fan-Out / Aggregate](examples/07-fan-out-aggregate.ts) | `runParallel()` MapReduce — 3 analysts in parallel, then synthesize | +| [08 — Gemma 4 Local](examples/08-gemma4-local.ts) | `runTasks()` + `runTeam()` with local Gemma 4 via Ollama — zero API cost | +| [09 — Structured Output](examples/09-structured-output.ts) | `outputSchema` (Zod) on AgentConfig — validated JSON via `result.structured` | +| [10 — Task Retry](examples/10-task-retry.ts) | `maxRetries` / `retryDelayMs` / `retryBackoff` with `task_retry` progress events | +| [11 — Trace Observability](examples/11-trace-observability.ts) | `onTrace` callback — structured spans for LLM calls, tools, tasks, and agents | ## Architecture @@ -246,6 +154,7 @@ for await (const event of agent.stream('Explain monads in two sentences.')) { │ - prompt() │───►│ LLMAdapter │ │ - stream() │ │ - AnthropicAdapter │ └────────┬──────────┘ │ - OpenAIAdapter │ + │ │ - CopilotAdapter │ │ └──────────────────────┘ ┌────────▼──────────┐ │ AgentRunner │ ┌──────────────────────┐ @@ -265,17 +174,46 @@ for await (const event of agent.stream('Explain monads in two sentences.')) { | `file_edit` | Edit a file by replacing an exact string match. | | `grep` | Search file contents with regex. Uses ripgrep when available, falls back to Node.js. | +## Supported Providers + +| Provider | Config | Env var | Status | +|----------|--------|---------|--------| +| Anthropic (Claude) | `provider: 'anthropic'` | `ANTHROPIC_API_KEY` | Verified | +| OpenAI (GPT) | `provider: 'openai'` | `OPENAI_API_KEY` | Verified | +| GitHub Copilot | `provider: 'copilot'` | `GITHUB_TOKEN` | Verified | +| Ollama / vLLM / LM Studio | `provider: 'openai'` + `baseURL` | — | Verified | + +Verified local models with tool-calling: **Gemma 4** (see [example 08](examples/08-gemma4-local.ts)). + +Any OpenAI-compatible API should work via `provider: 'openai'` + `baseURL` (DeepSeek, Groq, Mistral, Qwen, MiniMax, etc.). These providers have not been fully verified yet — contributions welcome via [#25](https://github.com/JackChen-me/open-multi-agent/issues/25). + ## Contributing Issues, feature requests, and PRs are welcome. Some areas where contributions would be especially valuable: -- **LLM Adapters** — Ollama, llama.cpp, vLLM, Gemini. The `LLMAdapter` interface requires just two methods: `chat()` and `stream()`. +- **Provider integrations** — Verify and document OpenAI-compatible providers (DeepSeek, Groq, Qwen, MiniMax, etc.) via `baseURL`. See [#25](https://github.com/JackChen-me/open-multi-agent/issues/25). For providers that are NOT OpenAI-compatible (e.g. Gemini), a new `LLMAdapter` implementation is welcome — the interface requires just two methods: `chat()` and `stream()`. - **Examples** — Real-world workflows and use cases. - **Documentation** — Guides, tutorials, and API docs. +## Author + +> JackChen — Ex PM (¥100M+ revenue), now indie builder. Follow on [X](https://x.com/JackChen_x) for AI Agent insights. + +## Contributors + + + + + ## Star History -[![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260402)](https://star-history.com/#JackChen-me/open-multi-agent&Date) + + + + + Star History Chart + + ## License diff --git a/README_zh.md b/README_zh.md index e9a3f00..a8b680c 100644 --- a/README_zh.md +++ b/README_zh.md @@ -1,6 +1,8 @@ # Open Multi-Agent -构建能协同工作的 AI 智能体团队。一个智能体负责规划,一个负责实现,一个负责审查——框架自动处理任务调度、依赖关系和智能体间通信。 +TypeScript 多智能体编排框架。一次 `runTeam()` 调用从目标到结果——框架自动拆解任务、解析依赖、并行执行。 + +3 个运行时依赖 · 27 个源文件 · Node.js 能跑的地方都能部署 · 被 [Latent Space](https://www.latent.space/p/ainews-a-quiet-april-fools) AI News 提及(AI 工程领域头部 Newsletter,17 万+订阅者) [![GitHub stars](https://img.shields.io/github/stars/JackChen-me/open-multi-agent)](https://github.com/JackChen-me/open-multi-agent/stargazers) [![license](https://img.shields.io/github/license/JackChen-me/open-multi-agent)](./LICENSE) @@ -10,40 +12,26 @@ ## 为什么选择 Open Multi-Agent? -- **多智能体团队** — 定义不同角色、工具甚至不同模型的智能体。它们通过消息总线和共享内存协作。 -- **任务 DAG 调度** — 任务之间存在依赖关系。框架进行拓扑排序——有依赖的任务等待,无依赖的任务并行执行。 -- **模型无关** — Claude 和 GPT 可以在同一个团队中使用。每个智能体可以单独配置模型。你也可以为任何 LLM 编写自己的适配器。 -- **进程内执行** — 没有子进程开销。所有内容在一个 Node.js 进程中运行。可部署到 Serverless、Docker、CI/CD。 +- **目标进,结果出** — `runTeam(team, "构建一个 REST API")`。协调者智能体自动将目标拆解为带依赖关系的任务图,分配给对应智能体,独立任务并行执行,最终合成输出。无需手动定义任务或编排流程图。 +- **TypeScript 原生** — 为 Node.js 生态而生。`npm install` 即用,无需 Python 运行时、无子进程桥接、无额外基础设施。可嵌入 Express、Next.js、Serverless 函数或 CI/CD 流水线。 +- **可审计、极轻量** — 3 个运行时依赖(`@anthropic-ai/sdk`、`openai`、`zod`),27 个源文件。一个下午就能读完全部源码。 +- **模型无关** — Claude、GPT、Gemma 4 和本地模型(Ollama、vLLM、LM Studio)可以在同一个团队中使用。通过 `baseURL` 即可接入任何 OpenAI 兼容服务。 +- **多智能体协作** — 定义不同角色、工具和模型的智能体,通过消息总线和共享内存协作。 +- **结构化输出** — 为任意智能体添加 `outputSchema`(Zod),输出自动解析为 JSON 并校验,校验失败自动重试一次。通过 `result.structured` 获取类型化结果。 +- **任务重试** — 为任务设置 `maxRetries`,失败时自动指数退避重试。所有尝试的 token 用量累计,确保计费准确。 +- **可观测性** — 可选的 `onTrace` 回调为每次 LLM 调用、工具执行、任务和智能体运行发出结构化 span 事件——包含耗时、token 用量和共享的 `runId` 用于关联追踪。未订阅时零开销,零额外依赖。 ## 快速开始 +需要 Node.js >= 18。 + ```bash npm install @jackchen_me/open-multi-agent ``` -在环境变量中设置 `ANTHROPIC_API_KEY`(以及可选的 `OPENAI_API_KEY`)。 +在环境变量中设置 `ANTHROPIC_API_KEY`(以及可选的 `OPENAI_API_KEY` 或用于 Copilot 的 `GITHUB_TOKEN`)。通过 Ollama 使用本地模型无需 API key — 参见 [example 06](examples/06-local-model.ts)。 -```typescript -import { OpenMultiAgent } from '@jackchen_me/open-multi-agent' - -const orchestrator = new OpenMultiAgent({ defaultModel: 'claude-sonnet-4-6' }) - -// 一个智能体,一个任务 -const result = await orchestrator.runAgent( - { - name: 'coder', - model: 'claude-sonnet-4-6', - tools: ['bash', 'file_write'], - }, - 'Write a TypeScript function that reverses a string, save it to /tmp/reverse.ts, and run it.', -) - -console.log(result.output) -``` - -## 多智能体团队 - -这才是有意思的地方。三个智能体,一个目标: +三个智能体,一个目标——框架处理剩下的一切: ```typescript import { OpenMultiAgent } from '@jackchen_me/open-multi-agent' @@ -88,132 +76,52 @@ console.log(`成功: ${result.success}`) console.log(`Token 用量: ${result.totalTokenUsage.output_tokens} output tokens`) ``` -## 更多示例 +执行过程: -
-任务流水线 — 显式控制任务图和分配 - -```typescript -const result = await orchestrator.runTasks(team, [ - { - title: 'Design the data model', - description: 'Write a TypeScript interface spec to /tmp/spec.md', - assignee: 'architect', - }, - { - title: 'Implement the module', - description: 'Read /tmp/spec.md and implement the module in /tmp/src/', - assignee: 'developer', - dependsOn: ['Design the data model'], // 等待设计完成后才开始 - }, - { - title: 'Write tests', - description: 'Read the implementation and write Vitest tests.', - assignee: 'developer', - dependsOn: ['Implement the module'], - }, - { - title: 'Review code', - description: 'Review /tmp/src/ and produce a structured code review.', - assignee: 'reviewer', - dependsOn: ['Implement the module'], // 可以和测试并行执行 - }, -]) +``` +agent_start coordinator +task_start architect +task_complete architect +task_start developer +task_start developer // 无依赖的任务并行执行 +task_complete developer +task_start reviewer // 实现完成后自动解锁 +task_complete developer +task_complete reviewer +agent_complete coordinator // 综合所有结果 +Success: true +Tokens: 12847 output tokens ``` -
+## 三种运行模式 -
-自定义工具 — 使用 Zod schema 定义工具 +| 模式 | 方法 | 适用场景 | +|------|------|----------| +| 单智能体 | `runAgent()` | 一个智能体,一个提示词——最简入口 | +| 自动编排团队 | `runTeam()` | 给一个目标,框架自动规划和执行 | +| 显式任务管线 | `runTasks()` | 你自己定义任务图和分配 | -```typescript -import { z } from 'zod' -import { defineTool, Agent, ToolRegistry, ToolExecutor, registerBuiltInTools } from '@jackchen_me/open-multi-agent' +## 示例 -const searchTool = defineTool({ - name: 'web_search', - description: 'Search the web and return the top results.', - inputSchema: z.object({ - query: z.string().describe('The search query.'), - maxResults: z.number().optional().describe('Number of results (default 5).'), - }), - execute: async ({ query, maxResults = 5 }) => { - const results = await mySearchProvider(query, maxResults) - return { data: JSON.stringify(results), isError: false } - }, -}) +所有示例都是可运行脚本,位于 [`examples/`](./examples/) 目录。使用 `npx tsx` 运行: -const registry = new ToolRegistry() -registerBuiltInTools(registry) -registry.register(searchTool) - -const executor = new ToolExecutor(registry) -const agent = new Agent( - { name: 'researcher', model: 'claude-sonnet-4-6', tools: ['web_search'] }, - registry, - executor, -) - -const result = await agent.run('Find the three most recent TypeScript releases.') +```bash +npx tsx examples/01-single-agent.ts ``` -
- -
-多模型团队 — 在一个工作流中混合使用 Claude 和 GPT - -```typescript -const claudeAgent: AgentConfig = { - name: 'strategist', - model: 'claude-opus-4-6', - provider: 'anthropic', - systemPrompt: 'You plan high-level approaches.', - tools: ['file_write'], -} - -const gptAgent: AgentConfig = { - name: 'implementer', - model: 'gpt-5.4', - provider: 'openai', - systemPrompt: 'You implement plans as working code.', - tools: ['bash', 'file_read', 'file_write'], -} - -const team = orchestrator.createTeam('mixed-team', { - name: 'mixed-team', - agents: [claudeAgent, gptAgent], - sharedMemory: true, -}) - -const result = await orchestrator.runTeam(team, 'Build a CLI tool that converts JSON to CSV.') -``` - -
- -
-流式输出 - -```typescript -import { Agent, ToolRegistry, ToolExecutor, registerBuiltInTools } from '@jackchen_me/open-multi-agent' - -const registry = new ToolRegistry() -registerBuiltInTools(registry) -const executor = new ToolExecutor(registry) - -const agent = new Agent( - { name: 'writer', model: 'claude-sonnet-4-6', maxTurns: 3 }, - registry, - executor, -) - -for await (const event of agent.stream('Explain monads in two sentences.')) { - if (event.type === 'text' && typeof event.data === 'string') { - process.stdout.write(event.data) - } -} -``` - -
+| 示例 | 展示内容 | +|------|----------| +| [01 — 单智能体](examples/01-single-agent.ts) | `runAgent()` 单次调用、`stream()` 流式输出、`prompt()` 多轮对话 | +| [02 — 团队协作](examples/02-team-collaboration.ts) | `runTeam()` 自动编排 + 协调者模式 | +| [03 — 任务流水线](examples/03-task-pipeline.ts) | `runTasks()` 显式依赖图(设计 → 实现 → 测试 + 评审) | +| [04 — 多模型团队](examples/04-multi-model-team.ts) | `defineTool()` 自定义工具、Anthropic + OpenAI 混合、`AgentPool` | +| [05 — Copilot](examples/05-copilot-test.ts) | GitHub Copilot 作为 LLM 提供者 | +| [06 — 本地模型](examples/06-local-model.ts) | Ollama + Claude 混合流水线,通过 `baseURL` 接入(兼容 vLLM、LM Studio 等) | +| [07 — 扇出聚合](examples/07-fan-out-aggregate.ts) | `runParallel()` MapReduce — 3 个分析师并行,然后综合 | +| [08 — Gemma 4 本地](examples/08-gemma4-local.ts) | `runTasks()` + `runTeam()` 本地 Gemma 4 via Ollama — 零 API 费用 | +| [09 — 结构化输出](examples/09-structured-output.ts) | `outputSchema`(Zod)— 校验 JSON 输出,通过 `result.structured` 获取 | +| [10 — 任务重试](examples/10-task-retry.ts) | `maxRetries` / `retryDelayMs` / `retryBackoff` + `task_retry` 进度事件 | +| [11 — 可观测性](examples/11-trace-observability.ts) | `onTrace` 回调 — LLM 调用、工具、任务、智能体的结构化 span 事件 | ## 架构 @@ -246,6 +154,7 @@ for await (const event of agent.stream('Explain monads in two sentences.')) { │ - prompt() │───►│ LLMAdapter │ │ - stream() │ │ - AnthropicAdapter │ └────────┬──────────┘ │ - OpenAIAdapter │ + │ │ - CopilotAdapter │ │ └──────────────────────┘ ┌────────▼──────────┐ │ AgentRunner │ ┌──────────────────────┐ @@ -265,17 +174,46 @@ for await (const event of agent.stream('Explain monads in two sentences.')) { | `file_edit` | 通过精确字符串匹配编辑文件。 | | `grep` | 使用正则表达式搜索文件内容。优先使用 ripgrep,回退到 Node.js 实现。 | +## 支持的 Provider + +| Provider | 配置 | 环境变量 | 状态 | +|----------|------|----------|------| +| Anthropic (Claude) | `provider: 'anthropic'` | `ANTHROPIC_API_KEY` | 已验证 | +| OpenAI (GPT) | `provider: 'openai'` | `OPENAI_API_KEY` | 已验证 | +| GitHub Copilot | `provider: 'copilot'` | `GITHUB_TOKEN` | 已验证 | +| Ollama / vLLM / LM Studio | `provider: 'openai'` + `baseURL` | — | 已验证 | + +已验证支持 tool-calling 的本地模型:**Gemma 4**(见[示例 08](examples/08-gemma4-local.ts))。 + +任何 OpenAI 兼容 API 均可通过 `provider: 'openai'` + `baseURL` 接入(DeepSeek、Groq、Mistral、Qwen、MiniMax 等)。这些 Provider 尚未完整验证——欢迎通过 [#25](https://github.com/JackChen-me/open-multi-agent/issues/25) 贡献验证。 + ## 参与贡献 欢迎提 Issue、功能需求和 PR。以下方向的贡献尤其有价值: -- **LLM 适配器** — Ollama、llama.cpp、vLLM、Gemini。`LLMAdapter` 接口只需实现两个方法:`chat()` 和 `stream()`。 +- **Provider 集成** — 验证并文档化 OpenAI 兼容 Provider(DeepSeek、Groq、Qwen、MiniMax 等)通过 `baseURL` 接入。详见 [#25](https://github.com/JackChen-me/open-multi-agent/issues/25)。对于非 OpenAI 兼容的 Provider(如 Gemini),欢迎贡献新的 `LLMAdapter` 实现——接口只需两个方法:`chat()` 和 `stream()`。 - **示例** — 真实场景的工作流和用例。 - **文档** — 指南、教程和 API 文档。 +## 作者 + +> JackChen — 前 WPS 产品经理,现独立创业者。关注小红书[「杰克西|硅基杠杆」](https://www.xiaohongshu.com/user/profile/5a1bdc1e4eacab4aa39ea6d6),持续获取我的 AI Agent 观点和思考。 + +## 贡献者 + + + + + ## Star 趋势 -[![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260402)](https://star-history.com/#JackChen-me/open-multi-agent&Date) + + + + + Star History Chart + + ## 许可证 diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..235d6d9 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,17 @@ +# Security Policy + +## Supported Versions + +| Version | Supported | +|---------|-----------| +| latest | Yes | + +## Reporting a Vulnerability + +If you discover a security vulnerability, please report it responsibly via email: + +**jack@yuanasi.com** + +Please do **not** open a public GitHub issue for security vulnerabilities. + +We will acknowledge receipt within 48 hours and aim to provide a fix or mitigation plan within 7 days. diff --git a/examples/05-copilot-test.ts b/examples/05-copilot-test.ts new file mode 100644 index 0000000..d027aea --- /dev/null +++ b/examples/05-copilot-test.ts @@ -0,0 +1,49 @@ +/** + * Quick smoke test for the Copilot adapter. + * + * Run: + * npx tsx examples/05-copilot-test.ts + * + * If GITHUB_COPILOT_TOKEN is not set, the adapter will start an interactive + * OAuth2 device flow — you'll be prompted to sign in via your browser. + */ + +import { OpenMultiAgent } from '../src/index.js' +import type { OrchestratorEvent } from '../src/types.js' + +const orchestrator = new OpenMultiAgent({ + defaultModel: 'gpt-4o', + defaultProvider: 'copilot', + onProgress: (event: OrchestratorEvent) => { + if (event.type === 'agent_start') { + console.log(`[start] agent=${event.agent}`) + } else if (event.type === 'agent_complete') { + console.log(`[complete] agent=${event.agent}`) + } + }, +}) + +console.log('Testing Copilot adapter with gpt-4o...\n') + +const result = await orchestrator.runAgent( + { + name: 'assistant', + model: 'gpt-4o', + provider: 'copilot', + systemPrompt: 'You are a helpful assistant. Keep answers brief.', + maxTurns: 1, + maxTokens: 256, + }, + 'What is 2 + 2? Reply in one sentence.', +) + +if (result.success) { + console.log('\nAgent output:') + console.log('─'.repeat(60)) + console.log(result.output) + console.log('─'.repeat(60)) + console.log(`\nTokens: input=${result.tokenUsage.input_tokens}, output=${result.tokenUsage.output_tokens}`) +} else { + console.error('Agent failed:', result.output) + process.exit(1) +} diff --git a/examples/06-local-model.ts b/examples/06-local-model.ts new file mode 100644 index 0000000..d7cf292 --- /dev/null +++ b/examples/06-local-model.ts @@ -0,0 +1,199 @@ +/** + * Example 06 — Local Model + Cloud Model Team (Ollama + Claude) + * + * Demonstrates mixing a local model served by Ollama with a cloud model + * (Claude) in the same task pipeline. The key technique is using + * `provider: 'openai'` with a custom `baseURL` pointing at Ollama's + * OpenAI-compatible endpoint. + * + * This pattern works with ANY OpenAI-compatible local server: + * - Ollama → http://localhost:11434/v1 + * - vLLM → http://localhost:8000/v1 + * - LM Studio → http://localhost:1234/v1 + * - llama.cpp → http://localhost:8080/v1 + * Just change the baseURL and model name below. + * + * Run: + * npx tsx examples/06-local-model.ts + * + * Prerequisites: + * 1. Ollama installed and running: https://ollama.com + * 2. Pull the model: ollama pull llama3.1 + * 3. ANTHROPIC_API_KEY env var must be set. + */ + +import { OpenMultiAgent } from '../src/index.js' +import type { AgentConfig, OrchestratorEvent, Task } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Agents +// --------------------------------------------------------------------------- + +/** + * Coder — uses Claude (Anthropic) for high-quality code generation. + */ +const coder: AgentConfig = { + name: 'coder', + model: 'claude-sonnet-4-6', + provider: 'anthropic', + systemPrompt: `You are a senior TypeScript developer. Write clean, well-typed, +production-quality code. Use the tools to write files to /tmp/local-model-demo/. +Always include brief JSDoc comments on exported functions.`, + tools: ['bash', 'file_write'], + maxTurns: 6, +} + +/** + * Reviewer — uses a local Ollama model via the OpenAI-compatible API. + * The apiKey is required by the OpenAI SDK but Ollama ignores it, + * so we pass the placeholder string 'ollama'. + */ +const reviewer: AgentConfig = { + name: 'reviewer', + model: 'llama3.1', + provider: 'openai', // 'openai' here means "OpenAI-compatible protocol", not the OpenAI cloud + baseURL: 'http://localhost:11434/v1', + apiKey: 'ollama', + systemPrompt: `You are a code reviewer. You read source files and produce a structured review. +Your review MUST include these sections: +- Summary (2-3 sentences) +- Strengths (bullet list) +- Issues (bullet list — or "None found" if the code is clean) +- Verdict: SHIP or NEEDS WORK + +Be specific and constructive. Reference line numbers or function names when possible.`, + tools: ['file_read'], + maxTurns: 4, +} + +// --------------------------------------------------------------------------- +// Progress handler +// --------------------------------------------------------------------------- + +const taskTimes = new Map() + +function handleProgress(event: OrchestratorEvent): void { + const ts = new Date().toISOString().slice(11, 23) + + switch (event.type) { + case 'task_start': { + taskTimes.set(event.task ?? '', Date.now()) + const task = event.data as Task | undefined + console.log(`[${ts}] TASK READY "${task?.title ?? event.task}" → ${task?.assignee ?? '?'}`) + break + } + case 'task_complete': { + const elapsed = Date.now() - (taskTimes.get(event.task ?? '') ?? Date.now()) + console.log(`[${ts}] TASK DONE task=${event.task} in ${elapsed}ms`) + break + } + case 'agent_start': + console.log(`[${ts}] AGENT START ${event.agent}`) + break + case 'agent_complete': + console.log(`[${ts}] AGENT DONE ${event.agent}`) + break + case 'error': + console.error(`[${ts}] ERROR ${event.agent ?? ''} task=${event.task ?? '?'}`) + break + } +} + +// --------------------------------------------------------------------------- +// Orchestrator + Team +// --------------------------------------------------------------------------- + +const orchestrator = new OpenMultiAgent({ + defaultModel: 'claude-sonnet-4-6', + maxConcurrency: 2, + onProgress: handleProgress, +}) + +const team = orchestrator.createTeam('local-cloud-team', { + name: 'local-cloud-team', + agents: [coder, reviewer], + sharedMemory: true, +}) + +// --------------------------------------------------------------------------- +// Task pipeline: code → review +// --------------------------------------------------------------------------- + +const OUTPUT_DIR = '/tmp/local-model-demo' + +const tasks: Array<{ + title: string + description: string + assignee?: string + dependsOn?: string[] +}> = [ + { + title: 'Write: retry utility', + description: `Write a small but complete TypeScript utility to ${OUTPUT_DIR}/retry.ts. + +The module should export: +1. A \`RetryOptions\` interface with: maxRetries (number), delayMs (number), + backoffFactor (optional number, default 2), shouldRetry (optional predicate + taking the error and returning boolean). +2. An async \`retry(fn: () => Promise, options: RetryOptions): Promise\` + function that retries \`fn\` with exponential backoff. +3. A convenience \`withRetry\` wrapper that returns a new function with retry + behaviour baked in. + +Include JSDoc comments. No external dependencies — use only Node built-ins. +After writing the file, also create a small test script at ${OUTPUT_DIR}/retry-test.ts +that exercises the happy path and a failure case, then run it with \`npx tsx\`.`, + assignee: 'coder', + }, + { + title: 'Review: retry utility', + description: `Read the files at ${OUTPUT_DIR}/retry.ts and ${OUTPUT_DIR}/retry-test.ts. + +Produce a structured code review covering: +- Summary (2-3 sentences describing the module) +- Strengths (bullet list) +- Issues (bullet list — be specific about what and why) +- Verdict: SHIP or NEEDS WORK`, + assignee: 'reviewer', + dependsOn: ['Write: retry utility'], + }, +] + +// --------------------------------------------------------------------------- +// Run +// --------------------------------------------------------------------------- + +console.log('Local + Cloud model team') +console.log(` coder → Claude (${coder.model}) via Anthropic API`) +console.log(` reviewer → Ollama (${reviewer.model}) at ${reviewer.baseURL}`) +console.log() +console.log('Pipeline: coder writes code → local model reviews it') +console.log('='.repeat(60)) + +const result = await orchestrator.runTasks(team, tasks) + +// --------------------------------------------------------------------------- +// Summary +// --------------------------------------------------------------------------- + +console.log('\n' + '='.repeat(60)) +console.log('Pipeline complete.\n') +console.log(`Overall success: ${result.success}`) +console.log(`Tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`) + +console.log('\nPer-agent summary:') +for (const [name, r] of result.agentResults) { + const icon = r.success ? 'OK ' : 'FAIL' + const provider = name === 'coder' ? 'anthropic' : 'ollama (local)' + const tools = r.toolCalls.map(c => c.toolName).join(', ') + console.log(` [${icon}] ${name.padEnd(10)} (${provider.padEnd(16)}) tools: ${tools || '(none)'}`) +} + +// Print the reviewer's output +const review = result.agentResults.get('reviewer') +if (review?.success) { + console.log('\nCode review (from local model):') + console.log('─'.repeat(60)) + console.log(review.output) + console.log('─'.repeat(60)) +} diff --git a/examples/07-fan-out-aggregate.ts b/examples/07-fan-out-aggregate.ts new file mode 100644 index 0000000..43b2c32 --- /dev/null +++ b/examples/07-fan-out-aggregate.ts @@ -0,0 +1,209 @@ +/** + * Example 07 — Fan-Out / Aggregate (MapReduce) Pattern + * + * Demonstrates: + * - Fan-out: send the same question to N "analyst" agents in parallel + * - Aggregate: a "synthesizer" agent reads all analyst outputs and produces + * a balanced final report + * - AgentPool with runParallel() for concurrent fan-out + * - No tools needed — pure LLM reasoning to keep the focus on the pattern + * + * Run: + * npx tsx examples/07-fan-out-aggregate.ts + * + * Prerequisites: + * ANTHROPIC_API_KEY env var must be set. + */ + +import { Agent, AgentPool, ToolRegistry, ToolExecutor, registerBuiltInTools } from '../src/index.js' +import type { AgentConfig, AgentRunResult } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Analysis topic +// --------------------------------------------------------------------------- + +const TOPIC = `Should a solo developer build a SaaS product that uses AI agents +for automated customer support? Consider the current state of AI technology, +market demand, competition, costs, and the unique constraints of being a solo +founder with limited time (~6 hours/day of productive work).` + +// --------------------------------------------------------------------------- +// Analyst agent configs — three perspectives on the same question +// --------------------------------------------------------------------------- + +const optimistConfig: AgentConfig = { + name: 'optimist', + model: 'claude-sonnet-4-6', + systemPrompt: `You are an optimistic technology analyst who focuses on +opportunities, upside potential, and emerging trends. You see possibilities +where others see obstacles. Back your optimism with concrete reasoning — +cite market trends, cost curves, and real capabilities. Keep your analysis +to 200-300 words.`, + maxTurns: 1, + temperature: 0.4, +} + +const skepticConfig: AgentConfig = { + name: 'skeptic', + model: 'claude-sonnet-4-6', + systemPrompt: `You are a skeptical technology analyst who focuses on risks, +challenges, failure modes, and hidden costs. You stress-test assumptions and +ask "what could go wrong?" Back your skepticism with concrete reasoning — +cite failure rates, technical limitations, and market realities. Keep your +analysis to 200-300 words.`, + maxTurns: 1, + temperature: 0.4, +} + +const pragmatistConfig: AgentConfig = { + name: 'pragmatist', + model: 'claude-sonnet-4-6', + systemPrompt: `You are a pragmatic technology analyst who focuses on practical +feasibility, execution complexity, and resource requirements. You care about +what works today, not what might work someday. You think in terms of MVPs, +timelines, and concrete tradeoffs. Keep your analysis to 200-300 words.`, + maxTurns: 1, + temperature: 0.4, +} + +const synthesizerConfig: AgentConfig = { + name: 'synthesizer', + model: 'claude-sonnet-4-6', + systemPrompt: `You are a senior strategy advisor who synthesizes multiple +perspectives into a balanced, actionable recommendation. You do not simply +summarise — you weigh the arguments, identify where they agree and disagree, +and produce a clear verdict with next steps. Structure your output as: + +1. Key agreements across perspectives +2. Key disagreements and how you weigh them +3. Verdict (go / no-go / conditional go) +4. Recommended next steps (3-5 bullet points) + +Keep the final report to 300-400 words.`, + maxTurns: 1, + temperature: 0.3, +} + +// --------------------------------------------------------------------------- +// Build agents — no tools needed for pure reasoning +// --------------------------------------------------------------------------- + +function buildAgent(config: AgentConfig): Agent { + const registry = new ToolRegistry() + registerBuiltInTools(registry) // not needed here, but safe if tools are added later + const executor = new ToolExecutor(registry) + return new Agent(config, registry, executor) +} + +const optimist = buildAgent(optimistConfig) +const skeptic = buildAgent(skepticConfig) +const pragmatist = buildAgent(pragmatistConfig) +const synthesizer = buildAgent(synthesizerConfig) + +// --------------------------------------------------------------------------- +// Set up the pool +// --------------------------------------------------------------------------- + +const pool = new AgentPool(3) // 3 analysts can run simultaneously +pool.add(optimist) +pool.add(skeptic) +pool.add(pragmatist) +pool.add(synthesizer) + +console.log('Fan-Out / Aggregate (MapReduce) Pattern') +console.log('='.repeat(60)) +console.log(`\nTopic: ${TOPIC.replace(/\n/g, ' ').trim()}\n`) + +// --------------------------------------------------------------------------- +// Step 1: Fan-out — run all 3 analysts in parallel +// --------------------------------------------------------------------------- + +console.log('[Step 1] Fan-out: 3 analysts running in parallel...\n') + +const analystResults: Map = await pool.runParallel([ + { agent: 'optimist', prompt: TOPIC }, + { agent: 'skeptic', prompt: TOPIC }, + { agent: 'pragmatist', prompt: TOPIC }, +]) + +// Print each analyst's output (truncated) +const analysts = ['optimist', 'skeptic', 'pragmatist'] as const +for (const name of analysts) { + const result = analystResults.get(name)! + const status = result.success ? 'OK' : 'FAILED' + console.log(` ${name} [${status}] — ${result.tokenUsage.output_tokens} output tokens`) + console.log(` ${result.output.slice(0, 150).replace(/\n/g, ' ')}...`) + console.log() +} + +// Check all analysts succeeded +for (const name of analysts) { + if (!analystResults.get(name)!.success) { + console.error(`Analyst '${name}' failed: ${analystResults.get(name)!.output}`) + process.exit(1) + } +} + +// --------------------------------------------------------------------------- +// Step 2: Aggregate — synthesizer reads all 3 analyses +// --------------------------------------------------------------------------- + +console.log('[Step 2] Aggregate: synthesizer producing final report...\n') + +const synthesizerPrompt = `Three analysts have independently evaluated the same question. +Read their analyses below and produce your synthesis report. + +--- OPTIMIST --- +${analystResults.get('optimist')!.output} + +--- SKEPTIC --- +${analystResults.get('skeptic')!.output} + +--- PRAGMATIST --- +${analystResults.get('pragmatist')!.output} + +Now synthesize these three perspectives into a balanced recommendation.` + +const synthResult = await pool.run('synthesizer', synthesizerPrompt) + +if (!synthResult.success) { + console.error('Synthesizer failed:', synthResult.output) + process.exit(1) +} + +// --------------------------------------------------------------------------- +// Final output +// --------------------------------------------------------------------------- + +console.log('='.repeat(60)) +console.log('SYNTHESIZED REPORT') +console.log('='.repeat(60)) +console.log() +console.log(synthResult.output) +console.log() +console.log('-'.repeat(60)) + +// --------------------------------------------------------------------------- +// Token usage comparison +// --------------------------------------------------------------------------- + +console.log('\nToken Usage Summary:') +console.log('-'.repeat(60)) + +let totalInput = 0 +let totalOutput = 0 + +for (const name of analysts) { + const r = analystResults.get(name)! + totalInput += r.tokenUsage.input_tokens + totalOutput += r.tokenUsage.output_tokens + console.log(` ${name.padEnd(12)} — input: ${r.tokenUsage.input_tokens}, output: ${r.tokenUsage.output_tokens}`) +} + +totalInput += synthResult.tokenUsage.input_tokens +totalOutput += synthResult.tokenUsage.output_tokens +console.log(` ${'synthesizer'.padEnd(12)} — input: ${synthResult.tokenUsage.input_tokens}, output: ${synthResult.tokenUsage.output_tokens}`) +console.log('-'.repeat(60)) +console.log(` ${'TOTAL'.padEnd(12)} — input: ${totalInput}, output: ${totalOutput}`) + +console.log('\nDone.') diff --git a/examples/08-gemma4-local.ts b/examples/08-gemma4-local.ts new file mode 100644 index 0000000..0d31853 --- /dev/null +++ b/examples/08-gemma4-local.ts @@ -0,0 +1,192 @@ +/** + * Example 08 — Gemma 4 Local (100% Local, Zero API Cost) + * + * Demonstrates both execution modes with a fully local Gemma 4 model via + * Ollama. No cloud API keys needed — everything runs on your machine. + * + * Part 1 — runTasks(): explicit task pipeline (researcher → summarizer) + * Part 2 — runTeam(): auto-orchestration where Gemma 4 acts as coordinator, + * decomposes the goal into tasks, and synthesises the final result + * + * This is the hardest test for a local model — runTeam() requires it to + * produce valid JSON for task decomposition AND do tool-calling for execution. + * Gemma 4 e2b (5.1B params) handles both reliably. + * + * Run: + * no_proxy=localhost npx tsx examples/08-gemma4-local.ts + * + * Prerequisites: + * 1. Ollama >= 0.20.0 installed and running: https://ollama.com + * 2. Pull the model: ollama pull gemma4:e2b + * (or gemma4:e4b for better quality on machines with more RAM) + * 3. No API keys needed! + * + * Note: The no_proxy=localhost prefix is needed if you have an HTTP proxy + * configured, since the OpenAI SDK would otherwise route Ollama requests + * through the proxy. + */ + +import { OpenMultiAgent } from '../src/index.js' +import type { AgentConfig, OrchestratorEvent, Task } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Configuration — change this to match your Ollama setup +// --------------------------------------------------------------------------- + +// See available tags at https://ollama.com/library/gemma4 +const OLLAMA_MODEL = 'gemma4:e2b' // or 'gemma4:e4b', 'gemma4:26b' +const OLLAMA_BASE_URL = 'http://localhost:11434/v1' +const OUTPUT_DIR = '/tmp/gemma4-demo' + +// --------------------------------------------------------------------------- +// Agents +// --------------------------------------------------------------------------- + +const researcher: AgentConfig = { + name: 'researcher', + model: OLLAMA_MODEL, + provider: 'openai', + baseURL: OLLAMA_BASE_URL, + apiKey: 'ollama', // placeholder — Ollama ignores this, but the OpenAI SDK requires a non-empty value + systemPrompt: `You are a system researcher. Use bash to run non-destructive, +read-only commands (uname -a, sw_vers, df -h, uptime, etc.) and report results. +Use file_write to save reports when asked.`, + tools: ['bash', 'file_write'], + maxTurns: 8, +} + +const summarizer: AgentConfig = { + name: 'summarizer', + model: OLLAMA_MODEL, + provider: 'openai', + baseURL: OLLAMA_BASE_URL, + apiKey: 'ollama', + systemPrompt: `You are a technical writer. Read files and produce concise, +structured Markdown summaries. Use file_write to save reports when asked.`, + tools: ['file_read', 'file_write'], + maxTurns: 4, +} + +// --------------------------------------------------------------------------- +// Progress handler +// --------------------------------------------------------------------------- + +function handleProgress(event: OrchestratorEvent): void { + const ts = new Date().toISOString().slice(11, 23) + switch (event.type) { + case 'task_start': { + const task = event.data as Task | undefined + console.log(`[${ts}] TASK START "${task?.title ?? event.task}" → ${task?.assignee ?? '?'}`) + break + } + case 'task_complete': + console.log(`[${ts}] TASK DONE "${event.task}"`) + break + case 'agent_start': + console.log(`[${ts}] AGENT START ${event.agent}`) + break + case 'agent_complete': + console.log(`[${ts}] AGENT DONE ${event.agent}`) + break + case 'error': + console.error(`[${ts}] ERROR ${event.agent ?? ''} task=${event.task ?? '?'}`) + break + } +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Part 1: runTasks() — Explicit task pipeline +// ═══════════════════════════════════════════════════════════════════════════ + +console.log('Part 1: runTasks() — Explicit Pipeline') +console.log('='.repeat(60)) +console.log(` model → ${OLLAMA_MODEL} via Ollama`) +console.log(` pipeline → researcher gathers info → summarizer writes summary`) +console.log() + +const orchestrator1 = new OpenMultiAgent({ + defaultModel: OLLAMA_MODEL, + maxConcurrency: 1, // local model serves one request at a time + onProgress: handleProgress, +}) + +const team1 = orchestrator1.createTeam('explicit', { + name: 'explicit', + agents: [researcher, summarizer], + sharedMemory: true, +}) + +const tasks = [ + { + title: 'Gather system information', + description: `Use bash to run system info commands (uname -a, sw_vers, sysctl, df -h, uptime). +Then write a structured Markdown report to ${OUTPUT_DIR}/system-report.md with sections: +OS, Hardware, Disk, and Uptime.`, + assignee: 'researcher', + }, + { + title: 'Summarize the report', + description: `Read the file at ${OUTPUT_DIR}/system-report.md. +Produce a concise one-paragraph executive summary of the system information.`, + assignee: 'summarizer', + dependsOn: ['Gather system information'], + }, +] + +const start1 = Date.now() +const result1 = await orchestrator1.runTasks(team1, tasks) + +console.log(`\nSuccess: ${result1.success} Time: ${((Date.now() - start1) / 1000).toFixed(1)}s`) +console.log(`Tokens — input: ${result1.totalTokenUsage.input_tokens}, output: ${result1.totalTokenUsage.output_tokens}`) + +const summary = result1.agentResults.get('summarizer') +if (summary?.success) { + console.log('\nSummary (from local Gemma 4):') + console.log('-'.repeat(60)) + console.log(summary.output) + console.log('-'.repeat(60)) +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Part 2: runTeam() — Auto-orchestration (Gemma 4 as coordinator) +// ═══════════════════════════════════════════════════════════════════════════ + +console.log('\n\nPart 2: runTeam() — Auto-Orchestration') +console.log('='.repeat(60)) +console.log(` coordinator → auto-created by runTeam(), also Gemma 4`) +console.log(` goal → given in natural language, framework plans everything`) +console.log() + +const orchestrator2 = new OpenMultiAgent({ + defaultModel: OLLAMA_MODEL, + defaultProvider: 'openai', + defaultBaseURL: OLLAMA_BASE_URL, + defaultApiKey: 'ollama', + maxConcurrency: 1, + onProgress: handleProgress, +}) + +const team2 = orchestrator2.createTeam('auto', { + name: 'auto', + agents: [researcher, summarizer], + sharedMemory: true, +}) + +const goal = `Check this machine's Node.js version, npm version, and OS info, +then write a short Markdown summary report to /tmp/gemma4-auto/report.md` + +const start2 = Date.now() +const result2 = await orchestrator2.runTeam(team2, goal) + +console.log(`\nSuccess: ${result2.success} Time: ${((Date.now() - start2) / 1000).toFixed(1)}s`) +console.log(`Tokens — input: ${result2.totalTokenUsage.input_tokens}, output: ${result2.totalTokenUsage.output_tokens}`) + +const coordResult = result2.agentResults.get('coordinator') +if (coordResult?.success) { + console.log('\nFinal synthesis (from local Gemma 4 coordinator):') + console.log('-'.repeat(60)) + console.log(coordResult.output) + console.log('-'.repeat(60)) +} + +console.log('\nAll processing done locally. $0 API cost.') diff --git a/examples/09-structured-output.ts b/examples/09-structured-output.ts new file mode 100644 index 0000000..2ffc29e --- /dev/null +++ b/examples/09-structured-output.ts @@ -0,0 +1,73 @@ +/** + * Example 09 — Structured Output + * + * Demonstrates `outputSchema` on AgentConfig. The agent's response is + * automatically parsed as JSON and validated against a Zod schema. + * On validation failure, the framework retries once with error feedback. + * + * The validated result is available via `result.structured`. + * + * Run: + * npx tsx examples/09-structured-output.ts + * + * Prerequisites: + * ANTHROPIC_API_KEY env var must be set. + */ + +import { z } from 'zod' +import { OpenMultiAgent } from '../src/index.js' +import type { AgentConfig } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Define a Zod schema for the expected output +// --------------------------------------------------------------------------- + +const ReviewAnalysis = z.object({ + summary: z.string().describe('One-sentence summary of the review'), + sentiment: z.enum(['positive', 'negative', 'neutral']), + confidence: z.number().min(0).max(1).describe('How confident the analysis is'), + keyTopics: z.array(z.string()).describe('Main topics mentioned in the review'), +}) + +type ReviewAnalysis = z.infer + +// --------------------------------------------------------------------------- +// Agent with outputSchema +// --------------------------------------------------------------------------- + +const analyst: AgentConfig = { + name: 'analyst', + model: 'claude-sonnet-4-6', + systemPrompt: 'You are a product review analyst. Analyze the given review and extract structured insights.', + outputSchema: ReviewAnalysis, +} + +// --------------------------------------------------------------------------- +// Run +// --------------------------------------------------------------------------- + +const orchestrator = new OpenMultiAgent({ defaultModel: 'claude-sonnet-4-6' }) + +const reviews = [ + 'This keyboard is amazing! The mechanical switches feel incredible and the RGB lighting is stunning. Build quality is top-notch. Only downside is the price.', + 'Terrible experience. The product arrived broken, customer support was unhelpful, and the return process took 3 weeks.', + 'It works fine. Nothing special, nothing bad. Does what it says on the box.', +] + +console.log('Analyzing product reviews with structured output...\n') + +for (const review of reviews) { + const result = await orchestrator.runAgent(analyst, `Analyze this review: "${review}"`) + + if (result.structured) { + const data = result.structured as ReviewAnalysis + console.log(`Sentiment: ${data.sentiment} (confidence: ${data.confidence})`) + console.log(`Summary: ${data.summary}`) + console.log(`Topics: ${data.keyTopics.join(', ')}`) + } else { + console.log(`Validation failed. Raw output: ${result.output.slice(0, 100)}`) + } + + console.log(`Tokens: ${result.tokenUsage.input_tokens} in / ${result.tokenUsage.output_tokens} out`) + console.log('---') +} diff --git a/examples/10-task-retry.ts b/examples/10-task-retry.ts new file mode 100644 index 0000000..5f53e5e --- /dev/null +++ b/examples/10-task-retry.ts @@ -0,0 +1,132 @@ +/** + * Example 10 — Task Retry with Exponential Backoff + * + * Demonstrates `maxRetries`, `retryDelayMs`, and `retryBackoff` on task config. + * When a task fails, the framework automatically retries with exponential + * backoff. The `onProgress` callback receives `task_retry` events so you can + * log retry attempts in real time. + * + * Scenario: a two-step pipeline where the first task (data fetch) is configured + * to retry on failure, and the second task (analysis) depends on it. + * + * Run: + * npx tsx examples/10-task-retry.ts + * + * Prerequisites: + * ANTHROPIC_API_KEY env var must be set. + */ + +import { OpenMultiAgent } from '../src/index.js' +import type { AgentConfig, OrchestratorEvent } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Agents +// --------------------------------------------------------------------------- + +const fetcher: AgentConfig = { + name: 'fetcher', + model: 'claude-sonnet-4-6', + systemPrompt: `You are a data-fetching agent. When given a topic, produce a short +JSON summary with 3-5 key facts. Output ONLY valid JSON, no markdown fences. +Example: {"topic":"...", "facts":["fact1","fact2","fact3"]}`, + maxTurns: 2, +} + +const analyst: AgentConfig = { + name: 'analyst', + model: 'claude-sonnet-4-6', + systemPrompt: `You are a data analyst. Read the fetched data from shared memory +and produce a brief analysis (3-4 sentences) highlighting trends or insights.`, + maxTurns: 2, +} + +// --------------------------------------------------------------------------- +// Progress handler — watch for task_retry events +// --------------------------------------------------------------------------- + +function handleProgress(event: OrchestratorEvent): void { + const ts = new Date().toISOString().slice(11, 23) + + switch (event.type) { + case 'task_start': + console.log(`[${ts}] TASK START "${event.task}" (agent: ${event.agent})`) + break + case 'task_complete': + console.log(`[${ts}] TASK DONE "${event.task}"`) + break + case 'task_retry': { + const d = event.data as { attempt: number; maxAttempts: number; error: string; nextDelayMs: number } + console.log(`[${ts}] TASK RETRY "${event.task}" — attempt ${d.attempt}/${d.maxAttempts}, next in ${d.nextDelayMs}ms`) + console.log(` error: ${d.error.slice(0, 120)}`) + break + } + case 'error': + console.log(`[${ts}] ERROR "${event.task}" agent=${event.agent}`) + break + } +} + +// --------------------------------------------------------------------------- +// Orchestrator + team +// --------------------------------------------------------------------------- + +const orchestrator = new OpenMultiAgent({ + defaultModel: 'claude-sonnet-4-6', + onProgress: handleProgress, +}) + +const team = orchestrator.createTeam('retry-demo', { + name: 'retry-demo', + agents: [fetcher, analyst], + sharedMemory: true, +}) + +// --------------------------------------------------------------------------- +// Tasks — fetcher has retry config, analyst depends on it +// --------------------------------------------------------------------------- + +const tasks = [ + { + title: 'Fetch data', + description: 'Fetch key facts about the adoption of TypeScript in open-source projects as of 2024. Output a JSON object with a "topic" and "facts" array.', + assignee: 'fetcher', + // Retry config: up to 2 retries, 500ms base delay, 2x backoff (500ms, 1000ms) + maxRetries: 2, + retryDelayMs: 500, + retryBackoff: 2, + }, + { + title: 'Analyze data', + description: 'Read the fetched data from shared memory and produce a 3-4 sentence analysis of TypeScript adoption trends.', + assignee: 'analyst', + dependsOn: ['Fetch data'], + // No retry — if analysis fails, just report the error + }, +] + +// --------------------------------------------------------------------------- +// Run +// --------------------------------------------------------------------------- + +console.log('Task Retry Example') +console.log('='.repeat(60)) +console.log('Pipeline: fetch (with retry) → analyze') +console.log(`Retry config: maxRetries=2, delay=500ms, backoff=2x`) +console.log('='.repeat(60)) +console.log() + +const result = await orchestrator.runTasks(team, tasks) + +// --------------------------------------------------------------------------- +// Summary +// --------------------------------------------------------------------------- + +console.log('\n' + '='.repeat(60)) +console.log(`Overall success: ${result.success}`) +console.log(`Tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`) + +for (const [name, r] of result.agentResults) { + const icon = r.success ? 'OK ' : 'FAIL' + console.log(` [${icon}] ${name}`) + console.log(` ${r.output.slice(0, 200)}`) +} diff --git a/examples/11-trace-observability.ts b/examples/11-trace-observability.ts new file mode 100644 index 0000000..20b463e --- /dev/null +++ b/examples/11-trace-observability.ts @@ -0,0 +1,133 @@ +/** + * Example 11 — Trace Observability + * + * Demonstrates the `onTrace` callback for lightweight observability. Every LLM + * call, tool execution, task lifecycle, and agent run emits a structured trace + * event with timing data and token usage — giving you full visibility into + * what's happening inside a multi-agent run. + * + * Trace events share a `runId` for correlation, so you can reconstruct the + * full execution timeline. Pipe them into your own logging, OpenTelemetry, or + * dashboard. + * + * Run: + * npx tsx examples/11-trace-observability.ts + * + * Prerequisites: + * ANTHROPIC_API_KEY env var must be set. + */ + +import { OpenMultiAgent } from '../src/index.js' +import type { AgentConfig, TraceEvent } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Agents +// --------------------------------------------------------------------------- + +const researcher: AgentConfig = { + name: 'researcher', + model: 'claude-sonnet-4-6', + systemPrompt: 'You are a research assistant. Provide concise, factual answers.', + maxTurns: 2, +} + +const writer: AgentConfig = { + name: 'writer', + model: 'claude-sonnet-4-6', + systemPrompt: 'You are a technical writer. Summarize research into clear prose.', + maxTurns: 2, +} + +// --------------------------------------------------------------------------- +// Trace handler — log every span with timing +// --------------------------------------------------------------------------- + +function handleTrace(event: TraceEvent): void { + const dur = `${event.durationMs}ms`.padStart(7) + + switch (event.type) { + case 'llm_call': + console.log( + ` [LLM] ${dur} agent=${event.agent} model=${event.model} turn=${event.turn}` + + ` tokens=${event.tokens.input_tokens}in/${event.tokens.output_tokens}out`, + ) + break + case 'tool_call': + console.log( + ` [TOOL] ${dur} agent=${event.agent} tool=${event.tool}` + + ` error=${event.isError}`, + ) + break + case 'task': + console.log( + ` [TASK] ${dur} task="${event.taskTitle}" agent=${event.agent}` + + ` success=${event.success} retries=${event.retries}`, + ) + break + case 'agent': + console.log( + ` [AGENT] ${dur} agent=${event.agent} turns=${event.turns}` + + ` tools=${event.toolCalls} tokens=${event.tokens.input_tokens}in/${event.tokens.output_tokens}out`, + ) + break + } +} + +// --------------------------------------------------------------------------- +// Orchestrator + team +// --------------------------------------------------------------------------- + +const orchestrator = new OpenMultiAgent({ + defaultModel: 'claude-sonnet-4-6', + onTrace: handleTrace, +}) + +const team = orchestrator.createTeam('trace-demo', { + name: 'trace-demo', + agents: [researcher, writer], + sharedMemory: true, +}) + +// --------------------------------------------------------------------------- +// Tasks — researcher first, then writer summarizes +// --------------------------------------------------------------------------- + +const tasks = [ + { + title: 'Research topic', + description: 'List 5 key benefits of TypeScript for large codebases. Be concise.', + assignee: 'researcher', + }, + { + title: 'Write summary', + description: 'Read the research from shared memory and write a 3-sentence summary.', + assignee: 'writer', + dependsOn: ['Research topic'], + }, +] + +// --------------------------------------------------------------------------- +// Run +// --------------------------------------------------------------------------- + +console.log('Trace Observability Example') +console.log('='.repeat(60)) +console.log('Pipeline: research → write (with full trace output)') +console.log('='.repeat(60)) +console.log() + +const result = await orchestrator.runTasks(team, tasks) + +// --------------------------------------------------------------------------- +// Summary +// --------------------------------------------------------------------------- + +console.log('\n' + '='.repeat(60)) +console.log(`Overall success: ${result.success}`) +console.log(`Tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`) + +for (const [name, r] of result.agentResults) { + const icon = r.success ? 'OK ' : 'FAIL' + console.log(` [${icon}] ${name}`) + console.log(` ${r.output.slice(0, 200)}`) +} diff --git a/package-lock.json b/package-lock.json index b74dcd0..b48f976 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,6 +16,7 @@ }, "devDependencies": { "@types/node": "^22.0.0", + "tsx": "^4.21.0", "typescript": "^5.6.0", "vitest": "^2.1.0" }, @@ -321,6 +322,23 @@ "node": ">=12" } }, + "node_modules/@esbuild/netbsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.7.tgz", + "integrity": "sha512-b6pqtrQdigZBwZxAn1UpazEisvwaIDvdbMbmrly7cDTMFnw/+3lVxxCTGOrkPVnsYIosJJXAsILG9XcQS+Yu6w==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, "node_modules/@esbuild/netbsd-x64": { "version": "0.21.5", "resolved": "https://registry.npmmirror.com/@esbuild/netbsd-x64/-/netbsd-x64-0.21.5.tgz", @@ -338,6 +356,23 @@ "node": ">=12" } }, + "node_modules/@esbuild/openbsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.7.tgz", + "integrity": "sha512-AFuojMQTxAz75Fo8idVcqoQWEHIXFRbOc1TrVcFSgCZtQfSdc1RXgB3tjOn/krRHENUB4j00bfGjyl2mJrU37A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, "node_modules/@esbuild/openbsd-x64": { "version": "0.21.5", "resolved": "https://registry.npmmirror.com/@esbuild/openbsd-x64/-/openbsd-x64-0.21.5.tgz", @@ -355,6 +390,23 @@ "node": ">=12" } }, + "node_modules/@esbuild/openharmony-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.7.tgz", + "integrity": "sha512-+KrvYb/C8zA9CU/g0sR6w2RBw7IGc5J2BPnc3dYc5VJxHCSF1yNMxTV5LQ7GuKteQXZtspjFbiuW5/dOj7H4Yw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": ">=18" + } + }, "node_modules/@esbuild/sunos-x64": { "version": "0.21.5", "resolved": "https://registry.npmmirror.com/@esbuild/sunos-x64/-/sunos-x64-0.21.5.tgz", @@ -1288,6 +1340,19 @@ "node": ">= 0.4" } }, + "node_modules/get-tsconfig": { + "version": "4.13.7", + "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.7.tgz", + "integrity": "sha512-7tN6rFgBlMgpBML5j8typ92BKFi2sFQvIdpAqLA2beia5avZDrMs0FLZiM5etShWq5irVyGcGMEA1jcDaK7A/Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "resolve-pkg-maps": "^1.0.0" + }, + "funding": { + "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" + } + }, "node_modules/gopd": { "version": "1.2.0", "resolved": "https://registry.npmmirror.com/gopd/-/gopd-1.2.0.tgz", @@ -1564,6 +1629,16 @@ "node": "^10 || ^12 || >=14" } }, + "node_modules/resolve-pkg-maps": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", + "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" + } + }, "node_modules/rollup": { "version": "4.60.1", "resolved": "https://registry.npmmirror.com/rollup/-/rollup-4.60.1.tgz", @@ -1690,6 +1765,459 @@ "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", "license": "MIT" }, + "node_modules/tsx": { + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz", + "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==", + "dev": true, + "license": "MIT", + "dependencies": { + "esbuild": "~0.27.0", + "get-tsconfig": "^4.7.5" + }, + "bin": { + "tsx": "dist/cli.mjs" + }, + "engines": { + "node": ">=18.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + } + }, + "node_modules/tsx/node_modules/@esbuild/aix-ppc64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.7.tgz", + "integrity": "sha512-EKX3Qwmhz1eMdEJokhALr0YiD0lhQNwDqkPYyPhiSwKrh7/4KRjQc04sZ8db+5DVVnZ1LmbNDI1uAMPEUBnQPg==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/android-arm": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.7.tgz", + "integrity": "sha512-jbPXvB4Yj2yBV7HUfE2KHe4GJX51QplCN1pGbYjvsyCZbQmies29EoJbkEc+vYuU5o45AfQn37vZlyXy4YJ8RQ==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/android-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.7.tgz", + "integrity": "sha512-62dPZHpIXzvChfvfLJow3q5dDtiNMkwiRzPylSCfriLvZeq0a1bWChrGx/BbUbPwOrsWKMn8idSllklzBy+dgQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/android-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.7.tgz", + "integrity": "sha512-x5VpMODneVDb70PYV2VQOmIUUiBtY3D3mPBG8NxVk5CogneYhkR7MmM3yR/uMdITLrC1ml/NV1rj4bMJuy9MCg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/darwin-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.7.tgz", + "integrity": "sha512-5lckdqeuBPlKUwvoCXIgI2D9/ABmPq3Rdp7IfL70393YgaASt7tbju3Ac+ePVi3KDH6N2RqePfHnXkaDtY9fkw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/darwin-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.7.tgz", + "integrity": "sha512-rYnXrKcXuT7Z+WL5K980jVFdvVKhCHhUwid+dDYQpH+qu+TefcomiMAJpIiC2EM3Rjtq0sO3StMV/+3w3MyyqQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/freebsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.7.tgz", + "integrity": "sha512-B48PqeCsEgOtzME2GbNM2roU29AMTuOIN91dsMO30t+Ydis3z/3Ngoj5hhnsOSSwNzS+6JppqWsuhTp6E82l2w==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/freebsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.7.tgz", + "integrity": "sha512-jOBDK5XEjA4m5IJK3bpAQF9/Lelu/Z9ZcdhTRLf4cajlB+8VEhFFRjWgfy3M1O4rO2GQ/b2dLwCUGpiF/eATNQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-arm": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.7.tgz", + "integrity": "sha512-RkT/YXYBTSULo3+af8Ib0ykH8u2MBh57o7q/DAs3lTJlyVQkgQvlrPTnjIzzRPQyavxtPtfg0EopvDyIt0j1rA==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.7.tgz", + "integrity": "sha512-RZPHBoxXuNnPQO9rvjh5jdkRmVizktkT7TCDkDmQ0W2SwHInKCAV95GRuvdSvA7w4VMwfCjUiPwDi0ZO6Nfe9A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-ia32": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.7.tgz", + "integrity": "sha512-GA48aKNkyQDbd3KtkplYWT102C5sn/EZTY4XROkxONgruHPU72l+gW+FfF8tf2cFjeHaRbWpOYa/uRBz/Xq1Pg==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-loong64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.7.tgz", + "integrity": "sha512-a4POruNM2oWsD4WKvBSEKGIiWQF8fZOAsycHOt6JBpZ+JN2n2JH9WAv56SOyu9X5IqAjqSIPTaJkqN8F7XOQ5Q==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-mips64el": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.7.tgz", + "integrity": "sha512-KabT5I6StirGfIz0FMgl1I+R1H73Gp0ofL9A3nG3i/cYFJzKHhouBV5VWK1CSgKvVaG4q1RNpCTR2LuTVB3fIw==", + "cpu": [ + "mips64el" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-ppc64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.7.tgz", + "integrity": "sha512-gRsL4x6wsGHGRqhtI+ifpN/vpOFTQtnbsupUF5R5YTAg+y/lKelYR1hXbnBdzDjGbMYjVJLJTd2OFmMewAgwlQ==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-riscv64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.7.tgz", + "integrity": "sha512-hL25LbxO1QOngGzu2U5xeXtxXcW+/GvMN3ejANqXkxZ/opySAZMrc+9LY/WyjAan41unrR3YrmtTsUpwT66InQ==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-s390x": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.7.tgz", + "integrity": "sha512-2k8go8Ycu1Kb46vEelhu1vqEP+UeRVj2zY1pSuPdgvbd5ykAw82Lrro28vXUrRmzEsUV0NzCf54yARIK8r0fdw==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.7.tgz", + "integrity": "sha512-hzznmADPt+OmsYzw1EE33ccA+HPdIqiCRq7cQeL1Jlq2gb1+OyWBkMCrYGBJ+sxVzve2ZJEVeePbLM2iEIZSxA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/netbsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.7.tgz", + "integrity": "sha512-OfatkLojr6U+WN5EDYuoQhtM+1xco+/6FSzJJnuWiUw5eVcicbyK3dq5EeV/QHT1uy6GoDhGbFpprUiHUYggrw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/openbsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.7.tgz", + "integrity": "sha512-+A1NJmfM8WNDv5CLVQYJ5PshuRm/4cI6WMZRg1by1GwPIQPCTs1GLEUHwiiQGT5zDdyLiRM/l1G0Pv54gvtKIg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/sunos-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.7.tgz", + "integrity": "sha512-ikktIhFBzQNt/QDyOL580ti9+5mL/YZeUPKU2ivGtGjdTYoqz6jObj6nOMfhASpS4GU4Q/Clh1QtxWAvcYKamA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/win32-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.7.tgz", + "integrity": "sha512-7yRhbHvPqSpRUV7Q20VuDwbjW5kIMwTHpptuUzV+AA46kiPze5Z7qgt6CLCK3pWFrHeNfDd1VKgyP4O+ng17CA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/win32-ia32": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.7.tgz", + "integrity": "sha512-SmwKXe6VHIyZYbBLJrhOoCJRB/Z1tckzmgTLfFYOfpMAx63BJEaL9ExI8x7v0oAO3Zh6D/Oi1gVxEYr5oUCFhw==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/win32-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.7.tgz", + "integrity": "sha512-56hiAJPhwQ1R4i+21FVF7V8kSD5zZTdHcVuRFMW0hn753vVfQN8xlx4uOPT4xoGH0Z/oVATuR82AiqSTDIpaHg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/esbuild": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.7.tgz", + "integrity": "sha512-IxpibTjyVnmrIQo5aqNpCgoACA/dTKLTlhMHihVHhdkxKyPO1uBBthumT0rdHmcsk9uMonIWS0m4FljWzILh3w==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.27.7", + "@esbuild/android-arm": "0.27.7", + "@esbuild/android-arm64": "0.27.7", + "@esbuild/android-x64": "0.27.7", + "@esbuild/darwin-arm64": "0.27.7", + "@esbuild/darwin-x64": "0.27.7", + "@esbuild/freebsd-arm64": "0.27.7", + "@esbuild/freebsd-x64": "0.27.7", + "@esbuild/linux-arm": "0.27.7", + "@esbuild/linux-arm64": "0.27.7", + "@esbuild/linux-ia32": "0.27.7", + "@esbuild/linux-loong64": "0.27.7", + "@esbuild/linux-mips64el": "0.27.7", + "@esbuild/linux-ppc64": "0.27.7", + "@esbuild/linux-riscv64": "0.27.7", + "@esbuild/linux-s390x": "0.27.7", + "@esbuild/linux-x64": "0.27.7", + "@esbuild/netbsd-arm64": "0.27.7", + "@esbuild/netbsd-x64": "0.27.7", + "@esbuild/openbsd-arm64": "0.27.7", + "@esbuild/openbsd-x64": "0.27.7", + "@esbuild/openharmony-arm64": "0.27.7", + "@esbuild/sunos-x64": "0.27.7", + "@esbuild/win32-arm64": "0.27.7", + "@esbuild/win32-ia32": "0.27.7", + "@esbuild/win32-x64": "0.27.7" + } + }, "node_modules/typescript": { "version": "5.9.3", "resolved": "https://registry.npmmirror.com/typescript/-/typescript-5.9.3.tgz", diff --git a/package.json b/package.json index c9910a8..d25f6b2 100644 --- a/package.json +++ b/package.json @@ -1 +1,50 @@ -{"name":"@jackchen_me/open-multi-agent","version":"0.1.0","description":"Production-grade multi-agent orchestration framework. Model-agnostic, supports team collaboration, task scheduling, and inter-agent communication.","type":"module","main":"dist/index.js","types":"dist/index.d.ts","exports":{".":{"types":"./dist/index.d.ts","import":"./dist/index.js"}},"scripts":{"build":"tsc","dev":"tsc --watch","test":"vitest run","test:watch":"vitest","lint":"tsc --noEmit","prepublishOnly":"npm run build"},"keywords":["ai","agent","multi-agent","orchestration","llm","claude","openai","ollama","mcp","tool-use","agent-framework"],"author":"","license":"MIT","engines":{"node":">=18.0.0"},"dependencies":{"@anthropic-ai/sdk":"^0.52.0","openai":"^4.73.0","zod":"^3.23.0"},"devDependencies":{"typescript":"^5.6.0","vitest":"^2.1.0","@types/node":"^22.0.0"}} +{ + "name":"@jackchen_me/open-multi-agent", + "version":"0.1.0", + "description":"Production-grade multi-agent orchestration framework. Model-agnostic, supports team collaboration, task scheduling, and inter-agent communication.", + "type":"module", + "main":"dist/index.js", + "types":"dist/index.d.ts", + "exports": { + ".": { + "types":"./dist/index.d.ts", + "import":"./dist/index.js" + } + }, + "scripts": { + "build":"tsc", + "dev":"tsc --watch", + "test":"vitest run", + "test:watch":"vitest", + "lint":"tsc --noEmit", + "prepublishOnly":"npm run build" + }, + "keywords": [ + "ai", + "agent", + "multi-agent", + "orchestration", + "llm", + "claude", + "openai", + "ollama", + "mcp", + "tool-use", + "agent-framework" + ], + "author":"", + "license":"MIT", + "engines":{ + "node":">=18.0.0" + }, + "dependencies": { + "@anthropic-ai/sdk":"^0.52.0", + "openai":"^4.73.0", + "zod":"^3.23.0" + }, + "devDependencies": { + "typescript":"^5.6.0", + "vitest":"^2.1.0", + "@types/node":"^22.0.0" + } +} diff --git a/src/agent/agent.ts b/src/agent/agent.ts index 1dc530d..58a1df3 100644 --- a/src/agent/agent.ts +++ b/src/agent/agent.ts @@ -32,10 +32,16 @@ import type { TokenUsage, ToolUseContext, } from '../types.js' +import { emitTrace, generateRunId } from '../utils/trace.js' import type { ToolDefinition as FrameworkToolDefinition, ToolRegistry } from '../tool/framework.js' import type { ToolExecutor } from '../tool/executor.js' import { createAdapter } from '../llm/adapter.js' -import { AgentRunner, type RunnerOptions, type RunOptions } from './runner.js' +import { AgentRunner, type RunnerOptions, type RunOptions, type RunResult } from './runner.js' +import { + buildStructuredOutputInstruction, + extractJSON, + validateOutput, +} from './structured-output.js' // --------------------------------------------------------------------------- // Internal helpers @@ -109,11 +115,20 @@ export class Agent { } const provider = this.config.provider ?? 'anthropic' - const adapter = await createAdapter(provider) + const adapter = await createAdapter(provider, this.config.apiKey, this.config.baseURL) + + // Append structured-output instructions when an outputSchema is configured. + let effectiveSystemPrompt = this.config.systemPrompt + if (this.config.outputSchema) { + const instruction = buildStructuredOutputInstruction(this.config.outputSchema) + effectiveSystemPrompt = effectiveSystemPrompt + ? effectiveSystemPrompt + '\n' + instruction + : instruction + } const runnerOptions: RunnerOptions = { model: this.config.model, - systemPrompt: this.config.systemPrompt, + systemPrompt: effectiveSystemPrompt, maxTurns: this.config.maxTurns, maxTokens: this.config.maxTokens, temperature: this.config.temperature, @@ -144,12 +159,12 @@ export class Agent { * * Use this for one-shot queries where past context is irrelevant. */ - async run(prompt: string): Promise { + async run(prompt: string, runOptions?: Partial): Promise { const messages: LLMMessage[] = [ { role: 'user', content: [{ type: 'text', text: prompt }] }, ] - return this.executeRun(messages) + return this.executeRun(messages, runOptions) } /** @@ -160,6 +175,7 @@ export class Agent { * * Use this for multi-turn interactions. */ + // TODO(#18): accept optional RunOptions to forward trace context async prompt(message: string): Promise { const userMessage: LLMMessage = { role: 'user', @@ -183,6 +199,7 @@ export class Agent { * * Like {@link run}, this does not use or update the persistent history. */ + // TODO(#18): accept optional RunOptions to forward trace context async *stream(prompt: string): AsyncGenerator { const messages: LLMMessage[] = [ { role: 'user', content: [{ type: 'text', text: prompt }] }, @@ -252,33 +269,165 @@ export class Agent { * Shared execution path used by both `run` and `prompt`. * Handles state transitions and error wrapping. */ - private async executeRun(messages: LLMMessage[]): Promise { + private async executeRun( + messages: LLMMessage[], + callerOptions?: Partial, + ): Promise { this.transitionTo('running') + const agentStartMs = Date.now() + try { const runner = await this.getRunner() + const internalOnMessage = (msg: LLMMessage) => { + this.state.messages.push(msg) + callerOptions?.onMessage?.(msg) + } + // Auto-generate runId when onTrace is provided but runId is missing + const needsRunId = callerOptions?.onTrace && !callerOptions.runId const runOptions: RunOptions = { - onMessage: msg => { - this.state.messages.push(msg) - }, + ...callerOptions, + onMessage: internalOnMessage, + ...(needsRunId ? { runId: generateRunId() } : undefined), } const result = await runner.run(messages, runOptions) - this.state.tokenUsage = addUsage(this.state.tokenUsage, result.tokenUsage) - this.transitionTo('completed') - return this.toAgentRunResult(result, true) + // --- Structured output validation --- + if (this.config.outputSchema) { + const validated = await this.validateStructuredOutput( + messages, + result, + runner, + runOptions, + ) + this.emitAgentTrace(callerOptions, agentStartMs, validated) + return validated + } + + this.transitionTo('completed') + const agentResult = this.toAgentRunResult(result, true) + this.emitAgentTrace(callerOptions, agentStartMs, agentResult) + return agentResult } catch (err) { const error = err instanceof Error ? err : new Error(String(err)) this.transitionToError(error) - return { + const errorResult: AgentRunResult = { success: false, output: error.message, messages: [], tokenUsage: ZERO_USAGE, toolCalls: [], + structured: undefined, + } + this.emitAgentTrace(callerOptions, agentStartMs, errorResult) + return errorResult + } + } + + /** Emit an `agent` trace event if `onTrace` is provided. */ + private emitAgentTrace( + options: Partial | undefined, + startMs: number, + result: AgentRunResult, + ): void { + if (!options?.onTrace) return + const endMs = Date.now() + emitTrace(options.onTrace, { + type: 'agent', + runId: options.runId ?? '', + taskId: options.taskId, + agent: options.traceAgent ?? this.name, + turns: result.messages.filter(m => m.role === 'assistant').length, + tokens: result.tokenUsage, + toolCalls: result.toolCalls.length, + startMs, + endMs, + durationMs: endMs - startMs, + }) + } + + /** + * Validate agent output against the configured `outputSchema`. + * On first validation failure, retry once with error feedback. + */ + private async validateStructuredOutput( + originalMessages: LLMMessage[], + result: RunResult, + runner: AgentRunner, + runOptions: RunOptions, + ): Promise { + const schema = this.config.outputSchema! + + // First attempt + let firstAttemptError: unknown + try { + const parsed = extractJSON(result.output) + const validated = validateOutput(schema, parsed) + this.transitionTo('completed') + return this.toAgentRunResult(result, true, validated) + } catch (e) { + firstAttemptError = e + } + + // Retry: send full context + error feedback + const errorMsg = firstAttemptError instanceof Error + ? firstAttemptError.message + : String(firstAttemptError) + + const errorFeedbackMessage: LLMMessage = { + role: 'user' as const, + content: [{ + type: 'text' as const, + text: [ + 'Your previous response did not produce valid JSON matching the required schema.', + '', + `Error: ${errorMsg}`, + '', + 'Please try again. Respond with ONLY valid JSON, no other text.', + ].join('\n'), + }], + } + + const retryMessages: LLMMessage[] = [ + ...originalMessages, + ...result.messages, + errorFeedbackMessage, + ] + + const retryResult = await runner.run(retryMessages, runOptions) + this.state.tokenUsage = addUsage(this.state.tokenUsage, retryResult.tokenUsage) + + const mergedTokenUsage = addUsage(result.tokenUsage, retryResult.tokenUsage) + // Include the error feedback turn to maintain alternating user/assistant roles, + // which is required by Anthropic's API for subsequent prompt() calls. + const mergedMessages = [...result.messages, errorFeedbackMessage, ...retryResult.messages] + const mergedToolCalls = [...result.toolCalls, ...retryResult.toolCalls] + + try { + const parsed = extractJSON(retryResult.output) + const validated = validateOutput(schema, parsed) + this.transitionTo('completed') + return { + success: true, + output: retryResult.output, + messages: mergedMessages, + tokenUsage: mergedTokenUsage, + toolCalls: mergedToolCalls, + structured: validated, + } + } catch { + // Retry also failed + this.transitionTo('completed') + return { + success: false, + output: retryResult.output, + messages: mergedMessages, + tokenUsage: mergedTokenUsage, + toolCalls: mergedToolCalls, + structured: undefined, } } } @@ -331,8 +480,9 @@ export class Agent { // ------------------------------------------------------------------------- private toAgentRunResult( - result: import('./runner.js').RunResult, + result: RunResult, success: boolean, + structured?: unknown, ): AgentRunResult { return { success, @@ -340,6 +490,7 @@ export class Agent { messages: result.messages, tokenUsage: result.tokenUsage, toolCalls: result.toolCalls, + structured, } } diff --git a/src/agent/pool.ts b/src/agent/pool.ts index 915f361..aba0eb8 100644 --- a/src/agent/pool.ts +++ b/src/agent/pool.ts @@ -21,6 +21,7 @@ */ import type { AgentRunResult } from '../types.js' +import type { RunOptions } from './runner.js' import type { Agent } from './agent.js' import { Semaphore } from '../utils/semaphore.js' @@ -123,12 +124,16 @@ export class AgentPool { * * @throws {Error} If the agent name is not found. */ - async run(agentName: string, prompt: string): Promise { + async run( + agentName: string, + prompt: string, + runOptions?: Partial, + ): Promise { const agent = this.requireAgent(agentName) await this.semaphore.acquire() try { - return await agent.run(prompt) + return await agent.run(prompt, runOptions) } finally { this.semaphore.release() } @@ -144,6 +149,7 @@ export class AgentPool { * * @param tasks - Array of `{ agent, prompt }` descriptors. */ + // TODO(#18): accept RunOptions per task to forward trace context async runParallel( tasks: ReadonlyArray<{ readonly agent: string; readonly prompt: string }>, ): Promise> { @@ -182,6 +188,7 @@ export class AgentPool { * * @throws {Error} If the pool is empty. */ + // TODO(#18): accept RunOptions to forward trace context async runAny(prompt: string): Promise { const allAgents = this.list() if (allAgents.length === 0) { diff --git a/src/agent/runner.ts b/src/agent/runner.ts index 13667db..113f93c 100644 --- a/src/agent/runner.ts +++ b/src/agent/runner.ts @@ -25,7 +25,9 @@ import type { ToolUseContext, LLMAdapter, LLMChatOptions, + TraceEvent, } from '../types.js' +import { emitTrace } from '../utils/trace.js' import type { ToolRegistry } from '../tool/framework.js' import type { ToolExecutor } from '../tool/executor.js' @@ -76,6 +78,14 @@ export interface RunOptions { readonly onToolResult?: (name: string, result: ToolResult) => void /** Fired after each complete {@link LLMMessage} is appended. */ readonly onMessage?: (message: LLMMessage) => void + /** Trace callback for observability spans. Async callbacks are safe. */ + readonly onTrace?: (event: TraceEvent) => void | Promise + /** Run ID for trace correlation. */ + readonly runId?: string + /** Task ID for trace correlation. */ + readonly taskId?: string + /** Agent name for trace correlation (overrides RunnerOptions.agentName). */ + readonly traceAgent?: string } /** The aggregated result returned when a full run completes. */ @@ -254,7 +264,23 @@ export class AgentRunner { // ------------------------------------------------------------------ // Step 1: Call the LLM and collect the full response for this turn. // ------------------------------------------------------------------ + const llmStartMs = Date.now() const response = await this.adapter.chat(conversationMessages, baseChatOptions) + if (options.onTrace) { + const llmEndMs = Date.now() + emitTrace(options.onTrace, { + type: 'llm_call', + runId: options.runId ?? '', + taskId: options.taskId, + agent: options.traceAgent ?? this.options.agentName ?? 'unknown', + model: this.options.model, + turn: turns, + tokens: response.usage, + startMs: llmStartMs, + endMs: llmEndMs, + durationMs: llmEndMs - llmStartMs, + }) + } totalUsage = addTokenUsage(totalUsage, response.usage) @@ -319,10 +345,25 @@ export class AgentRunner { result = { data: message, isError: true } } - const duration = Date.now() - startTime + const endTime = Date.now() + const duration = endTime - startTime options.onToolResult?.(block.name, result) + if (options.onTrace) { + emitTrace(options.onTrace, { + type: 'tool_call', + runId: options.runId ?? '', + taskId: options.taskId, + agent: options.traceAgent ?? this.options.agentName ?? 'unknown', + tool: block.name, + isError: result.isError ?? false, + startMs: startTime, + endMs: endTime, + durationMs: duration, + }) + } + const record: ToolCallRecord = { toolName: block.name, input: block.input, diff --git a/src/agent/structured-output.ts b/src/agent/structured-output.ts new file mode 100644 index 0000000..3da0f06 --- /dev/null +++ b/src/agent/structured-output.ts @@ -0,0 +1,126 @@ +/** + * @fileoverview Structured output utilities for agent responses. + * + * Provides JSON extraction, Zod validation, and system-prompt injection so + * that agents can return typed, schema-validated output. + */ + +import { type ZodSchema } from 'zod' +import { zodToJsonSchema } from '../tool/framework.js' + +// --------------------------------------------------------------------------- +// System-prompt instruction builder +// --------------------------------------------------------------------------- + +/** + * Build a JSON-mode instruction block to append to the agent's system prompt. + * + * Converts the Zod schema to JSON Schema and formats it as a clear directive + * for the LLM to respond with valid JSON matching the schema. + */ +export function buildStructuredOutputInstruction(schema: ZodSchema): string { + const jsonSchema = zodToJsonSchema(schema) + return [ + '', + '## Output Format (REQUIRED)', + 'You MUST respond with ONLY valid JSON that conforms to the following JSON Schema.', + 'Do NOT include any text, markdown fences, or explanation outside the JSON object.', + 'Do NOT wrap the JSON in ```json code fences.', + '', + '```', + JSON.stringify(jsonSchema, null, 2), + '```', + ].join('\n') +} + +// --------------------------------------------------------------------------- +// JSON extraction +// --------------------------------------------------------------------------- + +/** + * Attempt to extract and parse JSON from the agent's raw text output. + * + * Handles three cases in order: + * 1. The output is already valid JSON (ideal case) + * 2. The output contains a ` ```json ` fenced block + * 3. The output contains a bare JSON object/array (first `{`/`[` to last `}`/`]`) + * + * @throws {Error} when no valid JSON can be extracted + */ +export function extractJSON(raw: string): unknown { + const trimmed = raw.trim() + + // Case 1: Direct parse + try { + return JSON.parse(trimmed) + } catch { + // Continue to fallback strategies + } + + // Case 2a: Prefer ```json tagged fence + const jsonFenceMatch = trimmed.match(/```json\s*([\s\S]*?)```/) + if (jsonFenceMatch?.[1]) { + try { + return JSON.parse(jsonFenceMatch[1].trim()) + } catch { + // Continue + } + } + + // Case 2b: Fall back to bare ``` fence + const bareFenceMatch = trimmed.match(/```\s*([\s\S]*?)```/) + if (bareFenceMatch?.[1]) { + try { + return JSON.parse(bareFenceMatch[1].trim()) + } catch { + // Continue + } + } + + // Case 3: Find first { to last } (object) + const objStart = trimmed.indexOf('{') + const objEnd = trimmed.lastIndexOf('}') + if (objStart !== -1 && objEnd > objStart) { + try { + return JSON.parse(trimmed.slice(objStart, objEnd + 1)) + } catch { + // Fall through + } + } + + // Case 3b: Find first [ to last ] (array) + const arrStart = trimmed.indexOf('[') + const arrEnd = trimmed.lastIndexOf(']') + if (arrStart !== -1 && arrEnd > arrStart) { + try { + return JSON.parse(trimmed.slice(arrStart, arrEnd + 1)) + } catch { + // Fall through + } + } + + throw new Error( + `Failed to extract JSON from output. Raw output begins with: "${trimmed.slice(0, 100)}"`, + ) +} + +// --------------------------------------------------------------------------- +// Zod validation +// --------------------------------------------------------------------------- + +/** + * Validate a parsed JSON value against a Zod schema. + * + * @returns The validated (and potentially transformed) value on success. + * @throws {Error} with a human-readable Zod error message on failure. + */ +export function validateOutput(schema: ZodSchema, data: unknown): unknown { + const result = schema.safeParse(data) + if (result.success) { + return result.data + } + const issues = result.error.issues + .map(issue => ` - ${issue.path.length > 0 ? issue.path.join('.') : '(root)'}: ${issue.message}`) + .join('\n') + throw new Error(`Output validation failed:\n${issues}`) +} diff --git a/src/index.ts b/src/index.ts index 814996f..312f852 100644 --- a/src/index.ts +++ b/src/index.ts @@ -54,7 +54,7 @@ // Orchestrator (primary entry point) // --------------------------------------------------------------------------- -export { OpenMultiAgent } from './orchestrator/orchestrator.js' +export { OpenMultiAgent, executeWithRetry, computeRetryDelay } from './orchestrator/orchestrator.js' export { Scheduler } from './orchestrator/scheduler.js' export type { SchedulingStrategy } from './orchestrator/scheduler.js' @@ -63,6 +63,7 @@ export type { SchedulingStrategy } from './orchestrator/scheduler.js' // --------------------------------------------------------------------------- export { Agent } from './agent/agent.js' +export { buildStructuredOutputInstruction, extractJSON, validateOutput } from './agent/structured-output.js' export { AgentPool, Semaphore } from './agent/pool.js' export type { PoolStatus } from './agent/pool.js' @@ -160,7 +161,18 @@ export type { OrchestratorConfig, OrchestratorEvent, + // Trace + TraceEventType, + TraceEventBase, + TraceEvent, + LLMCallTrace, + ToolCallTrace, + TaskTrace, + AgentTrace, + // Memory MemoryEntry, MemoryStore, } from './types.js' + +export { generateRunId } from './utils/trace.js' diff --git a/src/llm/adapter.ts b/src/llm/adapter.ts index d5ec557..9bf8784 100644 --- a/src/llm/adapter.ts +++ b/src/llm/adapter.ts @@ -39,6 +39,7 @@ import type { LLMAdapter } from '../types.js' * directly and bypassing this factory. */ export type SupportedProvider = 'anthropic' | 'openai' | 'ollama' +export type SupportedProvider = 'anthropic' | 'copilot' | 'openai' /** * Instantiate the appropriate {@link LLMAdapter} for the given provider. @@ -46,26 +47,41 @@ export type SupportedProvider = 'anthropic' | 'openai' | 'ollama' * API keys fall back to the standard environment variables * (`ANTHROPIC_API_KEY` / `OPENAI_API_KEY`) when not supplied explicitly. * Ollama uses `OLLAMA_BASE_URL` (defaults to http://localhost:11434). + * API keys fall back to the standard environment variables when not supplied + * explicitly: + * - `anthropic` → `ANTHROPIC_API_KEY` + * - `openai` → `OPENAI_API_KEY` + * - `copilot` → `GITHUB_COPILOT_TOKEN` / `GITHUB_TOKEN`, or interactive + * OAuth2 device flow if neither is set * * Adapters are imported lazily so that projects using only one provider * are not forced to install the SDK for the other. * * @param provider - Which LLM provider to target. * @param apiKey - Optional API key override; falls back to env var. + * @param baseURL - Optional base URL for OpenAI-compatible APIs (Ollama, vLLM, etc.). * @throws {Error} When the provider string is not recognised. */ export async function createAdapter( provider: SupportedProvider, apiKey?: string, + baseURL?: string, ): Promise { switch (provider) { case 'anthropic': { const { AnthropicAdapter } = await import('./anthropic.js') - return new AnthropicAdapter(apiKey) + return new AnthropicAdapter(apiKey, baseURL) + } + case 'copilot': { + if (baseURL) { + console.warn('[open-multi-agent] baseURL is not supported for the copilot provider and will be ignored.') + } + const { CopilotAdapter } = await import('./copilot.js') + return new CopilotAdapter(apiKey) } case 'openai': { const { OpenAIAdapter } = await import('./openai.js') - return new OpenAIAdapter(apiKey) + return new OpenAIAdapter(apiKey, baseURL) } case 'ollama': { const { OllamaAdapter } = await import('./ollama.js') diff --git a/src/llm/anthropic.ts b/src/llm/anthropic.ts index 6b91fd4..fd912d5 100644 --- a/src/llm/anthropic.ts +++ b/src/llm/anthropic.ts @@ -189,9 +189,10 @@ export class AnthropicAdapter implements LLMAdapter { readonly #client: Anthropic - constructor(apiKey?: string) { + constructor(apiKey?: string, baseURL?: string) { this.#client = new Anthropic({ apiKey: apiKey ?? process.env['ANTHROPIC_API_KEY'], + baseURL, }) } diff --git a/src/llm/copilot.ts b/src/llm/copilot.ts new file mode 100644 index 0000000..7e829fe --- /dev/null +++ b/src/llm/copilot.ts @@ -0,0 +1,551 @@ +/** + * @fileoverview GitHub Copilot adapter implementing {@link LLMAdapter}. + * + * Uses the OpenAI-compatible Copilot Chat Completions endpoint at + * `https://api.githubcopilot.com`. Authentication requires a GitHub token + * which is exchanged for a short-lived Copilot session token via the + * internal token endpoint. + * + * API key resolution order: + * 1. `apiKey` constructor argument + * 2. `GITHUB_COPILOT_TOKEN` environment variable + * 3. `GITHUB_TOKEN` environment variable + * 4. Interactive OAuth2 device flow (prompts the user to sign in) + * + * @example + * ```ts + * import { CopilotAdapter } from './copilot.js' + * + * const adapter = new CopilotAdapter() // uses GITHUB_COPILOT_TOKEN, falling back to GITHUB_TOKEN + * const response = await adapter.chat(messages, { + * model: 'claude-sonnet-4', + * maxTokens: 4096, + * }) + * ``` + */ + +import OpenAI from 'openai' +import type { + ChatCompletionChunk, +} from 'openai/resources/chat/completions/index.js' + +import type { + ContentBlock, + LLMAdapter, + LLMChatOptions, + LLMMessage, + LLMResponse, + LLMStreamOptions, + LLMToolDef, + StreamEvent, + TextBlock, + ToolUseBlock, +} from '../types.js' + +import { + toOpenAITool, + fromOpenAICompletion, + normalizeFinishReason, + buildOpenAIMessageList, +} from './openai-common.js' + +// --------------------------------------------------------------------------- +// Copilot auth — OAuth2 device flow + token exchange +// --------------------------------------------------------------------------- + +const COPILOT_TOKEN_URL = 'https://api.github.com/copilot_internal/v2/token' +const DEVICE_CODE_URL = 'https://github.com/login/device/code' +const POLL_URL = 'https://github.com/login/oauth/access_token' +const COPILOT_CLIENT_ID = 'Iv1.b507a08c87ecfe98' + +const COPILOT_HEADERS: Record = { + 'Copilot-Integration-Id': 'vscode-chat', + 'Editor-Version': 'vscode/1.100.0', + 'Editor-Plugin-Version': 'copilot-chat/0.42.2', +} + +interface CopilotTokenResponse { + token: string + expires_at: number +} + +interface DeviceCodeResponse { + device_code: string + user_code: string + verification_uri: string + interval: number + expires_in: number +} + +interface PollResponse { + access_token?: string + error?: string + error_description?: string +} + +/** + * Callback invoked when the OAuth2 device flow needs the user to authorize. + * Receives the verification URI and user code. If not provided, defaults to + * printing them to stdout. + */ +export type DeviceCodeCallback = (verificationUri: string, userCode: string) => void + +const defaultDeviceCodeCallback: DeviceCodeCallback = (uri, code) => { + console.log(`\n┌─────────────────────────────────────────────┐`) + console.log(`│ GitHub Copilot — Sign in │`) + console.log(`│ │`) + console.log(`│ Open: ${uri.padEnd(35)}│`) + console.log(`│ Code: ${code.padEnd(35)}│`) + console.log(`└─────────────────────────────────────────────┘\n`) +} + +/** + * Start the GitHub OAuth2 device code flow with the Copilot client ID. + * + * Calls `onDeviceCode` with the verification URI and user code, then polls + * until the user completes authorization. Returns a GitHub OAuth token + * scoped for Copilot access. + */ +async function deviceCodeLogin(onDeviceCode: DeviceCodeCallback): Promise { + // Step 1: Request a device code + const codeRes = await fetch(DEVICE_CODE_URL, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/x-www-form-urlencoded', + }, + body: new URLSearchParams({ client_id: COPILOT_CLIENT_ID, scope: 'copilot' }), + }) + + if (!codeRes.ok) { + const body = await codeRes.text().catch(() => '') + throw new Error(`Device code request failed (${codeRes.status}): ${body}`) + } + + const codeData = (await codeRes.json()) as DeviceCodeResponse + + // Step 2: Prompt the user via callback + onDeviceCode(codeData.verification_uri, codeData.user_code) + + // Step 3: Poll for the user to complete auth + const interval = (codeData.interval || 5) * 1000 + const deadline = Date.now() + codeData.expires_in * 1000 + + while (Date.now() < deadline) { + await new Promise((resolve) => setTimeout(resolve, interval)) + + const pollRes = await fetch(POLL_URL, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/x-www-form-urlencoded', + }, + body: new URLSearchParams({ + client_id: COPILOT_CLIENT_ID, + device_code: codeData.device_code, + grant_type: 'urn:ietf:params:oauth:grant-type:device_code', + }), + }) + + const pollData = (await pollRes.json()) as PollResponse + + if (pollData.access_token) { + console.log('✓ Authenticated with GitHub Copilot\n') + return pollData.access_token + } + + if (pollData.error === 'authorization_pending') continue + if (pollData.error === 'slow_down') { + await new Promise((resolve) => setTimeout(resolve, 5000)) + continue + } + + throw new Error( + `OAuth device flow failed: ${pollData.error} — ${pollData.error_description ?? ''}`, + ) + } + + throw new Error('Device code expired. Please try again.') +} + +/** + * Exchange a GitHub OAuth token (from the Copilot device flow) for a + * short-lived Copilot session token. + * + * Note: the token exchange endpoint does NOT require the Copilot-specific + * headers (Editor-Version etc.) — only the chat completions endpoint does. + */ +async function fetchCopilotToken(githubToken: string): Promise { + const res = await fetch(COPILOT_TOKEN_URL, { + method: 'GET', + headers: { + Authorization: `token ${githubToken}`, + Accept: 'application/json', + 'User-Agent': 'GitHubCopilotChat/0.28.0', + }, + }) + + if (!res.ok) { + const body = await res.text().catch(() => '') + throw new Error( + `Copilot token exchange failed (${res.status}): ${body || res.statusText}`, + ) + } + + return (await res.json()) as CopilotTokenResponse +} + +// --------------------------------------------------------------------------- +// Adapter implementation +// --------------------------------------------------------------------------- + +/** Options for the {@link CopilotAdapter} constructor. */ +export interface CopilotAdapterOptions { + /** GitHub OAuth token already scoped for Copilot. Falls back to env vars. */ + apiKey?: string + /** + * Callback invoked when the OAuth2 device flow needs user action. + * Defaults to printing the verification URI and user code to stdout. + */ + onDeviceCode?: DeviceCodeCallback +} + +/** + * LLM adapter backed by the GitHub Copilot Chat Completions API. + * + * Authentication options (tried in order): + * 1. `apiKey` constructor arg — a GitHub OAuth token already scoped for Copilot + * 2. `GITHUB_COPILOT_TOKEN` env var + * 3. `GITHUB_TOKEN` env var + * 4. Interactive OAuth2 device flow + * + * The GitHub token is exchanged for a short-lived Copilot session token, which + * is cached and auto-refreshed. + * + * Thread-safe — a single instance may be shared across concurrent agent runs. + * Concurrent token refreshes are serialised via an internal mutex. + */ +export class CopilotAdapter implements LLMAdapter { + readonly name = 'copilot' + + #githubToken: string | null + #cachedToken: string | null = null + #tokenExpiresAt = 0 + #refreshPromise: Promise | null = null + readonly #onDeviceCode: DeviceCodeCallback + + constructor(apiKeyOrOptions?: string | CopilotAdapterOptions) { + const opts = typeof apiKeyOrOptions === 'string' + ? { apiKey: apiKeyOrOptions } + : apiKeyOrOptions ?? {} + + this.#githubToken = opts.apiKey + ?? process.env['GITHUB_COPILOT_TOKEN'] + ?? process.env['GITHUB_TOKEN'] + ?? null + this.#onDeviceCode = opts.onDeviceCode ?? defaultDeviceCodeCallback + } + + /** + * Return a valid Copilot session token, refreshing if necessary. + * If no GitHub token is available, triggers the interactive device flow. + * Concurrent calls share a single in-flight refresh to avoid races. + */ + async #getSessionToken(): Promise { + const now = Math.floor(Date.now() / 1000) + if (this.#cachedToken && this.#tokenExpiresAt - 60 > now) { + return this.#cachedToken + } + + // If another call is already refreshing, piggyback on that promise + if (this.#refreshPromise) { + return this.#refreshPromise + } + + this.#refreshPromise = this.#doRefresh() + try { + return await this.#refreshPromise + } finally { + this.#refreshPromise = null + } + } + + async #doRefresh(): Promise { + if (!this.#githubToken) { + this.#githubToken = await deviceCodeLogin(this.#onDeviceCode) + } + + const resp = await fetchCopilotToken(this.#githubToken) + this.#cachedToken = resp.token + this.#tokenExpiresAt = resp.expires_at + return resp.token + } + + /** Build a short-lived OpenAI client pointed at the Copilot endpoint. */ + async #createClient(): Promise { + const sessionToken = await this.#getSessionToken() + return new OpenAI({ + apiKey: sessionToken, + baseURL: 'https://api.githubcopilot.com', + defaultHeaders: COPILOT_HEADERS, + }) + } + + // ------------------------------------------------------------------------- + // chat() + // ------------------------------------------------------------------------- + + async chat(messages: LLMMessage[], options: LLMChatOptions): Promise { + const client = await this.#createClient() + const openAIMessages = buildOpenAIMessageList(messages, options.systemPrompt) + + const completion = await client.chat.completions.create( + { + model: options.model, + messages: openAIMessages, + max_tokens: options.maxTokens, + temperature: options.temperature, + tools: options.tools ? options.tools.map(toOpenAITool) : undefined, + stream: false, + }, + { + signal: options.abortSignal, + }, + ) + + return fromOpenAICompletion(completion) + } + + // ------------------------------------------------------------------------- + // stream() + // ------------------------------------------------------------------------- + + async *stream( + messages: LLMMessage[], + options: LLMStreamOptions, + ): AsyncIterable { + const client = await this.#createClient() + const openAIMessages = buildOpenAIMessageList(messages, options.systemPrompt) + + const streamResponse = await client.chat.completions.create( + { + model: options.model, + messages: openAIMessages, + max_tokens: options.maxTokens, + temperature: options.temperature, + tools: options.tools ? options.tools.map(toOpenAITool) : undefined, + stream: true, + stream_options: { include_usage: true }, + }, + { + signal: options.abortSignal, + }, + ) + + let completionId = '' + let completionModel = '' + let finalFinishReason: string = 'stop' + let inputTokens = 0 + let outputTokens = 0 + const toolCallBuffers = new Map< + number, + { id: string; name: string; argsJson: string } + >() + let fullText = '' + + try { + for await (const chunk of streamResponse) { + completionId = chunk.id + completionModel = chunk.model + + if (chunk.usage !== null && chunk.usage !== undefined) { + inputTokens = chunk.usage.prompt_tokens + outputTokens = chunk.usage.completion_tokens + } + + const choice: ChatCompletionChunk.Choice | undefined = chunk.choices[0] + if (choice === undefined) continue + + const delta = choice.delta + + if (delta.content !== null && delta.content !== undefined) { + fullText += delta.content + const textEvent: StreamEvent = { type: 'text', data: delta.content } + yield textEvent + } + + for (const toolCallDelta of delta.tool_calls ?? []) { + const idx = toolCallDelta.index + + if (!toolCallBuffers.has(idx)) { + toolCallBuffers.set(idx, { + id: toolCallDelta.id ?? '', + name: toolCallDelta.function?.name ?? '', + argsJson: '', + }) + } + + const buf = toolCallBuffers.get(idx) + if (buf !== undefined) { + if (toolCallDelta.id) buf.id = toolCallDelta.id + if (toolCallDelta.function?.name) buf.name = toolCallDelta.function.name + if (toolCallDelta.function?.arguments) { + buf.argsJson += toolCallDelta.function.arguments + } + } + } + + if (choice.finish_reason !== null && choice.finish_reason !== undefined) { + finalFinishReason = choice.finish_reason + } + } + + const finalToolUseBlocks: ToolUseBlock[] = [] + for (const buf of toolCallBuffers.values()) { + let parsedInput: Record = {} + try { + const parsed: unknown = JSON.parse(buf.argsJson) + if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) { + parsedInput = parsed as Record + } + } catch { + // Malformed JSON — surface as empty object. + } + + const toolUseBlock: ToolUseBlock = { + type: 'tool_use', + id: buf.id, + name: buf.name, + input: parsedInput, + } + finalToolUseBlocks.push(toolUseBlock) + const toolUseEvent: StreamEvent = { type: 'tool_use', data: toolUseBlock } + yield toolUseEvent + } + + const doneContent: ContentBlock[] = [] + if (fullText.length > 0) { + const textBlock: TextBlock = { type: 'text', text: fullText } + doneContent.push(textBlock) + } + doneContent.push(...finalToolUseBlocks) + + const finalResponse: LLMResponse = { + id: completionId, + content: doneContent, + model: completionModel, + stop_reason: normalizeFinishReason(finalFinishReason), + usage: { input_tokens: inputTokens, output_tokens: outputTokens }, + } + + const doneEvent: StreamEvent = { type: 'done', data: finalResponse } + yield doneEvent + } catch (err) { + const error = err instanceof Error ? err : new Error(String(err)) + const errorEvent: StreamEvent = { type: 'error', data: error } + yield errorEvent + } + } +} + +// --------------------------------------------------------------------------- +// Premium request multipliers +// --------------------------------------------------------------------------- + +/** + * Model metadata used for display names, context windows, and premium request + * multiplier lookup. + */ +export interface CopilotModelInfo { + readonly id: string + readonly name: string + readonly contextWindow: number +} + +/** + * Return the premium-request multiplier for a Copilot model. + * + * Copilot doesn't charge per-token — instead each request costs + * `multiplier × 1 premium request` from the user's monthly allowance. + * A multiplier of 0 means the model is included at no premium cost. + * + * Based on https://docs.github.com/en/copilot/reference/ai-models/supported-models#model-multipliers + */ +export function getCopilotMultiplier(modelId: string): number { + const id = modelId.toLowerCase() + + // 0x — included models + if (id.includes('gpt-4.1')) return 0 + if (id.includes('gpt-4o')) return 0 + if (id.includes('gpt-5-mini') || id.includes('gpt-5 mini')) return 0 + if (id.includes('raptor')) return 0 + if (id.includes('goldeneye')) return 0 + + // 0.25x + if (id.includes('grok')) return 0.25 + + // 0.33x + if (id.includes('claude-haiku')) return 0.33 + if (id.includes('gemini-3-flash') || id.includes('gemini-3.0-flash')) return 0.33 + if (id.includes('gpt-5.1-codex-mini')) return 0.33 + if (id.includes('gpt-5.4-mini') || id.includes('gpt-5.4 mini')) return 0.33 + + // 1x — standard premium + if (id.includes('claude-sonnet')) return 1 + if (id.includes('gemini-2.5-pro')) return 1 + if (id.includes('gemini-3-pro') || id.includes('gemini-3.0-pro')) return 1 + if (id.includes('gemini-3.1-pro')) return 1 + if (id.includes('gpt-5.1')) return 1 + if (id.includes('gpt-5.2')) return 1 + if (id.includes('gpt-5.3')) return 1 + if (id.includes('gpt-5.4')) return 1 + + // 30x — fast opus + if (id.includes('claude-opus') && id.includes('fast')) return 30 + + // 3x — opus + if (id.includes('claude-opus')) return 3 + + return 1 +} + +/** + * Human-readable string describing the premium-request cost for a model. + * + * Examples: `"included (0×)"`, `"1× premium request"`, `"0.33× premium request"` + */ +export function formatCopilotMultiplier(multiplier: number): string { + if (multiplier === 0) return 'included (0×)' + if (Number.isInteger(multiplier)) return `${multiplier}× premium request` + return `${multiplier}× premium request` +} + +/** Known model metadata for Copilot-available models. */ +export const COPILOT_MODELS: readonly CopilotModelInfo[] = [ + { id: 'gpt-4.1', name: 'GPT-4.1', contextWindow: 128_000 }, + { id: 'gpt-4o', name: 'GPT-4o', contextWindow: 128_000 }, + { id: 'gpt-5-mini', name: 'GPT-5 mini', contextWindow: 200_000 }, + { id: 'gpt-5.1', name: 'GPT-5.1', contextWindow: 200_000 }, + { id: 'gpt-5.1-codex', name: 'GPT-5.1-Codex', contextWindow: 200_000 }, + { id: 'gpt-5.1-codex-mini', name: 'GPT-5.1-Codex-Mini', contextWindow: 200_000 }, + { id: 'gpt-5.1-codex-max', name: 'GPT-5.1-Codex-Max', contextWindow: 200_000 }, + { id: 'gpt-5.2', name: 'GPT-5.2', contextWindow: 200_000 }, + { id: 'gpt-5.2-codex', name: 'GPT-5.2-Codex', contextWindow: 200_000 }, + { id: 'gpt-5.3-codex', name: 'GPT-5.3-Codex', contextWindow: 200_000 }, + { id: 'gpt-5.4', name: 'GPT-5.4', contextWindow: 200_000 }, + { id: 'gpt-5.4-mini', name: 'GPT-5.4 mini', contextWindow: 200_000 }, + { id: 'claude-haiku-4.5', name: 'Claude Haiku 4.5', contextWindow: 200_000 }, + { id: 'claude-opus-4.5', name: 'Claude Opus 4.5', contextWindow: 200_000 }, + { id: 'claude-opus-4.6', name: 'Claude Opus 4.6', contextWindow: 200_000 }, + { id: 'claude-opus-4.6-fast', name: 'Claude Opus 4.6 (fast)', contextWindow: 200_000 }, + { id: 'claude-sonnet-4', name: 'Claude Sonnet 4', contextWindow: 200_000 }, + { id: 'claude-sonnet-4.5', name: 'Claude Sonnet 4.5', contextWindow: 200_000 }, + { id: 'claude-sonnet-4.6', name: 'Claude Sonnet 4.6', contextWindow: 200_000 }, + { id: 'gemini-2.5-pro', name: 'Gemini 2.5 Pro', contextWindow: 1_000_000 }, + { id: 'gemini-3-flash', name: 'Gemini 3 Flash', contextWindow: 1_000_000 }, + { id: 'gemini-3-pro', name: 'Gemini 3 Pro', contextWindow: 1_000_000 }, + { id: 'gemini-3.1-pro', name: 'Gemini 3.1 Pro', contextWindow: 1_000_000 }, + { id: 'grok-code-fast-1', name: 'Grok Code Fast 1', contextWindow: 128_000 }, + { id: 'raptor-mini', name: 'Raptor mini', contextWindow: 128_000 }, + { id: 'goldeneye', name: 'Goldeneye', contextWindow: 128_000 }, +] as const diff --git a/src/llm/openai-common.ts b/src/llm/openai-common.ts index 08a77f0..ae74f21 100644 --- a/src/llm/openai-common.ts +++ b/src/llm/openai-common.ts @@ -1,15 +1,16 @@ /** - * @fileoverview Shared OpenAI wire-format helpers for Ollama and OpenAI adapters. + * @fileoverview Shared OpenAI wire-format conversion helpers. * - * These functions convert between the framework's internal types and the - * OpenAI/Ollama Chat Completions wire format. Both adapters should import - * from here rather than duplicating the conversion logic. + * Both the OpenAI and Copilot adapters use the OpenAI Chat Completions API + * format. This module contains the common conversion logic so it isn't + * duplicated across adapters. */ +import OpenAI from 'openai' import type { ChatCompletion, - ChatCompletionAssistantMessageParam, ChatCompletionChunk, + ChatCompletionAssistantMessageParam, ChatCompletionMessageParam, ChatCompletionMessageToolCall, ChatCompletionTool, @@ -27,8 +28,12 @@ import type { ToolUseBlock, } from '../types.js' +// --------------------------------------------------------------------------- +// Framework → OpenAI +// --------------------------------------------------------------------------- + /** - * Convert a framework {@link LLMToolDef} to an OpenAI/Ollama {@link ChatCompletionTool}. + * Convert a framework {@link LLMToolDef} to an OpenAI {@link ChatCompletionTool}. */ export function toOpenAITool(tool: LLMToolDef): ChatCompletionTool { return { @@ -43,15 +48,19 @@ export function toOpenAITool(tool: LLMToolDef): ChatCompletionTool { /** * Determine whether a framework message contains any `tool_result` content - * blocks, which must be serialised as separate OpenAI/Ollama `tool`-role messages. + * blocks, which must be serialised as separate OpenAI `tool`-role messages. */ -export function hasToolResults(msg: LLMMessage): boolean { +function hasToolResults(msg: LLMMessage): boolean { return msg.content.some((b) => b.type === 'tool_result') } /** - * Convert a single framework {@link LLMMessage} into one or more OpenAI/Ollama + * Convert framework {@link LLMMessage}s into OpenAI * {@link ChatCompletionMessageParam} entries. + * + * `tool_result` blocks are expanded into top-level `tool`-role messages + * because OpenAI uses a dedicated role for tool results rather than embedding + * them inside user-content arrays. */ export function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageParam[] { const result: ChatCompletionMessageParam[] = [] @@ -60,6 +69,7 @@ export function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageP if (msg.role === 'assistant') { result.push(toOpenAIAssistantMessage(msg)) } else { + // user role if (!hasToolResults(msg)) { result.push(toOpenAIUserMessage(msg)) } else { @@ -85,13 +95,18 @@ export function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageP return result } -export function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageParam { +/** + * Convert a `user`-role framework message into an OpenAI user message. + * Image blocks are converted to the OpenAI image_url content part format. + */ +function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageParam { if (msg.content.length === 1 && msg.content[0]?.type === 'text') { return { role: 'user', content: msg.content[0].text } } const parts: Array<{ type: 'text', text: string } | { type: 'image_url', image_url: { url: string } }> = [] - + type ContentPart = OpenAI.Chat.ChatCompletionContentPartText | OpenAI.Chat.ChatCompletionContentPartImage + for (const block of msg.content) { if (block.type === 'text') { parts.push({ type: 'text', text: block.text }) @@ -103,12 +118,17 @@ export function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageP }, }) } + // tool_result blocks are handled by the caller (toOpenAIMessages); skip here. } return { role: 'user', content: parts } } -export function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessageParam { +/** + * Convert an `assistant`-role framework message into an OpenAI assistant message. + * `tool_use` blocks become `tool_calls`; `text` blocks become message content. + */ +function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessageParam { const toolCalls: ChatCompletionMessageToolCall[] = [] const textParts: string[] = [] @@ -139,8 +159,15 @@ export function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssista return assistantMsg } +// --------------------------------------------------------------------------- +// OpenAI → Framework +// --------------------------------------------------------------------------- + /** - * Convert an OpenAI/Ollama {@link ChatCompletion} into a framework {@link LLMResponse}. + * Convert an OpenAI {@link ChatCompletion} into a framework {@link LLMResponse}. + * + * Takes only the first choice (index 0), consistent with how the framework + * is designed for single-output agents. */ export function fromOpenAICompletion(completion: ChatCompletion): LLMResponse { const choice = completion.choices[0] @@ -191,8 +218,15 @@ export function fromOpenAICompletion(completion: ChatCompletion): LLMResponse { } /** - * Normalize an OpenAI/Ollama `finish_reason` string to the framework's canonical + * Normalize an OpenAI `finish_reason` string to the framework's canonical * stop-reason vocabulary. + * + * Mapping: + * - `'stop'` → `'end_turn'` + * - `'tool_calls'` → `'tool_use'` + * - `'length'` → `'max_tokens'` + * - `'content_filter'` → `'content_filter'` + * - anything else → passed through unchanged */ export function normalizeFinishReason(reason: string): string { switch (reason) { diff --git a/src/llm/openai.ts b/src/llm/openai.ts index b99ddfd..568f94e 100644 --- a/src/llm/openai.ts +++ b/src/llm/openai.ts @@ -32,14 +32,7 @@ import OpenAI from 'openai' import type { - ChatCompletion, - ChatCompletionAssistantMessageParam, ChatCompletionChunk, - ChatCompletionMessageParam, - ChatCompletionMessageToolCall, - ChatCompletionTool, - ChatCompletionToolMessageParam, - ChatCompletionUserMessageParam, } from 'openai/resources/chat/completions/index.js' import type { @@ -55,231 +48,12 @@ import type { ToolUseBlock, } from '../types.js' -// --------------------------------------------------------------------------- -// Internal helpers — framework → OpenAI -// --------------------------------------------------------------------------- - -/** - * Convert a framework {@link LLMToolDef} to an OpenAI {@link ChatCompletionTool}. - * - * OpenAI wraps the function definition inside a `function` key and a `type` - * discriminant. The `inputSchema` is already a JSON Schema object. - */ -function toOpenAITool(tool: LLMToolDef): ChatCompletionTool { - return { - type: 'function', - function: { - name: tool.name, - description: tool.description, - parameters: tool.inputSchema as Record, - }, - } -} - -/** - * Determine whether a framework message contains any `tool_result` content - * blocks, which must be serialised as separate OpenAI `tool`-role messages. - */ -function hasToolResults(msg: LLMMessage): boolean { - return msg.content.some((b) => b.type === 'tool_result') -} - -/** - * Convert a single framework {@link LLMMessage} into one or more OpenAI - * {@link ChatCompletionMessageParam} entries. - * - * The expansion is necessary because OpenAI represents tool results as - * top-level messages with role `tool`, whereas in our model they are content - * blocks inside a `user` message. - * - * Expansion rules: - * - A `user` message containing only text/image blocks → single user message - * - A `user` message containing `tool_result` blocks → one `tool` message per - * tool_result block; any remaining text/image blocks are folded into an - * additional user message prepended to the group - * - An `assistant` message → single assistant message with optional tool_calls - */ -function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageParam[] { - const result: ChatCompletionMessageParam[] = [] - - for (const msg of messages) { - if (msg.role === 'assistant') { - result.push(toOpenAIAssistantMessage(msg)) - } else { - // user role - if (!hasToolResults(msg)) { - result.push(toOpenAIUserMessage(msg)) - } else { - // Split: text/image blocks become a user message (if any exist), then - // each tool_result block becomes an independent tool message. - const nonToolBlocks = msg.content.filter((b) => b.type !== 'tool_result') - if (nonToolBlocks.length > 0) { - result.push(toOpenAIUserMessage({ role: 'user', content: nonToolBlocks })) - } - - for (const block of msg.content) { - if (block.type === 'tool_result') { - const toolMsg: ChatCompletionToolMessageParam = { - role: 'tool', - tool_call_id: block.tool_use_id, - content: block.content, - } - result.push(toolMsg) - } - } - } - } - } - - return result -} - -/** - * Convert a `user`-role framework message into an OpenAI user message. - * Image blocks are converted to the OpenAI image_url content part format. - */ -function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageParam { - // If the entire content is a single text block, use the compact string form - // to keep the request payload smaller. - if (msg.content.length === 1 && msg.content[0]?.type === 'text') { - return { role: 'user', content: msg.content[0].text } - } - - type ContentPart = OpenAI.Chat.ChatCompletionContentPartText | OpenAI.Chat.ChatCompletionContentPartImage - const parts: ContentPart[] = [] - - for (const block of msg.content) { - if (block.type === 'text') { - parts.push({ type: 'text', text: block.text }) - } else if (block.type === 'image') { - parts.push({ - type: 'image_url', - image_url: { - url: `data:${block.source.media_type};base64,${block.source.data}`, - }, - }) - } - // tool_result blocks are handled by the caller (toOpenAIMessages); skip here. - } - - return { role: 'user', content: parts } -} - -/** - * Convert an `assistant`-role framework message into an OpenAI assistant message. - * - * Any `tool_use` blocks become `tool_calls`; `text` blocks become the message content. - */ -function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessageParam { - const toolCalls: ChatCompletionMessageToolCall[] = [] - const textParts: string[] = [] - - for (const block of msg.content) { - if (block.type === 'tool_use') { - toolCalls.push({ - id: block.id, - type: 'function', - function: { - name: block.name, - arguments: JSON.stringify(block.input), - }, - }) - } else if (block.type === 'text') { - textParts.push(block.text) - } - } - - const assistantMsg: ChatCompletionAssistantMessageParam = { - role: 'assistant', - content: textParts.length > 0 ? textParts.join('') : null, - } - - if (toolCalls.length > 0) { - assistantMsg.tool_calls = toolCalls - } - - return assistantMsg -} - -// --------------------------------------------------------------------------- -// Internal helpers — OpenAI → framework -// --------------------------------------------------------------------------- - -/** - * Convert an OpenAI {@link ChatCompletion} into a framework {@link LLMResponse}. - * - * We take only the first choice (index 0), consistent with how the framework - * is designed for single-output agents. - */ -function fromOpenAICompletion(completion: ChatCompletion): LLMResponse { - const choice = completion.choices[0] - if (choice === undefined) { - throw new Error('OpenAI returned a completion with no choices') - } - - const content: ContentBlock[] = [] - const message = choice.message - - if (message.content !== null && message.content !== undefined) { - const textBlock: TextBlock = { type: 'text', text: message.content } - content.push(textBlock) - } - - for (const toolCall of message.tool_calls ?? []) { - let parsedInput: Record = {} - try { - const parsed: unknown = JSON.parse(toolCall.function.arguments) - if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) { - parsedInput = parsed as Record - } - } catch { - // Malformed arguments from the model — surface as empty object. - } - - const toolUseBlock: ToolUseBlock = { - type: 'tool_use', - id: toolCall.id, - name: toolCall.function.name, - input: parsedInput, - } - content.push(toolUseBlock) - } - - const stopReason = normalizeFinishReason(choice.finish_reason ?? 'stop') - - return { - id: completion.id, - content, - model: completion.model, - stop_reason: stopReason, - usage: { - input_tokens: completion.usage?.prompt_tokens ?? 0, - output_tokens: completion.usage?.completion_tokens ?? 0, - }, - } -} - -/** - * Normalize an OpenAI `finish_reason` string to the framework's canonical - * stop-reason vocabulary so consumers never need to branch on provider-specific - * strings. - * - * Mapping: - * - `'stop'` → `'end_turn'` - * - `'tool_calls'` → `'tool_use'` - * - `'length'` → `'max_tokens'` - * - `'content_filter'` → `'content_filter'` - * - anything else → passed through unchanged - */ -function normalizeFinishReason(reason: string): string { - switch (reason) { - case 'stop': return 'end_turn' - case 'tool_calls': return 'tool_use' - case 'length': return 'max_tokens' - case 'content_filter': return 'content_filter' - default: return reason - } -} +import { + toOpenAITool, + fromOpenAICompletion, + normalizeFinishReason, + buildOpenAIMessageList, +} from './openai-common.js' // --------------------------------------------------------------------------- // Adapter implementation @@ -295,9 +69,10 @@ export class OpenAIAdapter implements LLMAdapter { readonly #client: OpenAI - constructor(apiKey?: string) { + constructor(apiKey?: string, baseURL?: string) { this.#client = new OpenAI({ apiKey: apiKey ?? process.env['OPENAI_API_KEY'], + baseURL, }) } @@ -484,31 +259,6 @@ export class OpenAIAdapter implements LLMAdapter { } } -// --------------------------------------------------------------------------- -// Private utility -// --------------------------------------------------------------------------- - -/** - * Prepend a system message when `systemPrompt` is provided, then append the - * converted conversation messages. - * - * OpenAI represents system instructions as a message with `role: 'system'` - * at the top of the array, not as a separate API parameter. - */ -function buildOpenAIMessageList( - messages: LLMMessage[], - systemPrompt: string | undefined, -): ChatCompletionMessageParam[] { - const result: ChatCompletionMessageParam[] = [] - - if (systemPrompt !== undefined && systemPrompt.length > 0) { - result.push({ role: 'system', content: systemPrompt }) - } - - result.push(...toOpenAIMessages(messages)) - return result -} - // Re-export types that consumers of this module commonly need alongside the adapter. export type { ContentBlock, diff --git a/src/orchestrator/orchestrator.ts b/src/orchestrator/orchestrator.ts index 0332969..86f16c0 100644 --- a/src/orchestrator/orchestrator.ts +++ b/src/orchestrator/orchestrator.ts @@ -52,8 +52,10 @@ import type { TeamRunResult, TokenUsage, } from '../types.js' +import type { RunOptions } from '../agent/runner.js' import { Agent } from '../agent/agent.js' import { AgentPool } from '../agent/pool.js' +import { emitTrace, generateRunId } from '../utils/trace.js' import { ToolRegistry } from '../tool/framework.js' import { ToolExecutor } from '../tool/executor.js' import { registerBuiltInTools } from '../tool/built-in/index.js' @@ -92,6 +94,105 @@ function buildAgent(config: AgentConfig): Agent { return new Agent(config, registry, executor) } +/** Promise-based delay. */ +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)) +} + +/** Maximum delay cap to prevent runaway exponential backoff (30 seconds). */ +const MAX_RETRY_DELAY_MS = 30_000 + +/** + * Compute the retry delay for a given attempt, capped at {@link MAX_RETRY_DELAY_MS}. + */ +export function computeRetryDelay( + baseDelay: number, + backoff: number, + attempt: number, +): number { + return Math.min(baseDelay * backoff ** (attempt - 1), MAX_RETRY_DELAY_MS) +} + +/** + * Execute an agent task with optional retry and exponential backoff. + * + * Exported for testability — called internally by {@link executeQueue}. + * + * @param run - The function that executes the task (typically `pool.run`). + * @param task - The task to execute (retry config read from its fields). + * @param onRetry - Called before each retry sleep with event data. + * @param delayFn - Injectable delay function (defaults to real `sleep`). + * @returns The final {@link AgentRunResult} from the last attempt. + */ +export async function executeWithRetry( + run: () => Promise, + task: Task, + onRetry?: (data: { attempt: number; maxAttempts: number; error: string; nextDelayMs: number }) => void, + delayFn: (ms: number) => Promise = sleep, +): Promise { + const rawRetries = Number.isFinite(task.maxRetries) ? task.maxRetries! : 0 + const maxAttempts = Math.max(0, rawRetries) + 1 + const baseDelay = Math.max(0, Number.isFinite(task.retryDelayMs) ? task.retryDelayMs! : 1000) + const backoff = Math.max(1, Number.isFinite(task.retryBackoff) ? task.retryBackoff! : 2) + + let lastError: string = '' + // Accumulate token usage across all attempts so billing/observability + // reflects the true cost of retries. + let totalUsage: TokenUsage = { input_tokens: 0, output_tokens: 0 } + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + try { + const result = await run() + totalUsage = { + input_tokens: totalUsage.input_tokens + result.tokenUsage.input_tokens, + output_tokens: totalUsage.output_tokens + result.tokenUsage.output_tokens, + } + + if (result.success) { + return { ...result, tokenUsage: totalUsage } + } + lastError = result.output + + // Failure — retry or give up + if (attempt < maxAttempts) { + const delay = computeRetryDelay(baseDelay, backoff, attempt) + onRetry?.({ attempt, maxAttempts, error: lastError, nextDelayMs: delay }) + await delayFn(delay) + continue + } + + return { ...result, tokenUsage: totalUsage } + } catch (err) { + lastError = err instanceof Error ? err.message : String(err) + + if (attempt < maxAttempts) { + const delay = computeRetryDelay(baseDelay, backoff, attempt) + onRetry?.({ attempt, maxAttempts, error: lastError, nextDelayMs: delay }) + await delayFn(delay) + continue + } + + // All retries exhausted — return a failure result + return { + success: false, + output: lastError, + messages: [], + tokenUsage: totalUsage, + toolCalls: [], + } + } + } + + // Should not be reached, but TypeScript needs a return + return { + success: false, + output: lastError, + messages: [], + tokenUsage: totalUsage, + toolCalls: [], + } +} + // --------------------------------------------------------------------------- // Parsed task spec (result of coordinator decomposition) // --------------------------------------------------------------------------- @@ -161,6 +262,8 @@ interface RunContext { readonly scheduler: Scheduler readonly agentResults: Map readonly config: OrchestratorConfig + /** Trace run ID, present when `onTrace` is configured. */ + readonly runId?: string } /** @@ -239,49 +342,76 @@ async function executeQueue( // Build the prompt: inject shared memory context + task description const prompt = await buildTaskPrompt(task, team) - try { - const result = await pool.run(assignee, prompt) - ctx.agentResults.set(`${assignee}:${task.id}`, result) + // Build trace context for this task's agent run + const traceOptions: Partial | undefined = config.onTrace + ? { onTrace: config.onTrace, runId: ctx.runId ?? '', taskId: task.id, traceAgent: assignee } + : undefined - if (result.success) { - // Persist result into shared memory so other agents can read it - const sharedMem = team.getSharedMemoryInstance() - if (sharedMem) { - await sharedMem.write(assignee, `task:${task.id}:result`, result.output) - } - - queue.complete(task.id, result.output) + const taskStartMs = config.onTrace ? Date.now() : 0 + let retryCount = 0 + const result = await executeWithRetry( + () => pool.run(assignee, prompt, traceOptions), + task, + (retryData) => { + retryCount++ config.onProgress?.({ - type: 'task_complete', + type: 'task_retry', task: task.id, agent: assignee, - data: result, + data: retryData, } satisfies OrchestratorEvent) + }, + ) - config.onProgress?.({ - type: 'agent_complete', - agent: assignee, - task: task.id, - data: result, - } satisfies OrchestratorEvent) - } else { - queue.fail(task.id, result.output) - config.onProgress?.({ - type: 'error', - task: task.id, - agent: assignee, - data: result, - } satisfies OrchestratorEvent) + // Emit task trace + if (config.onTrace) { + const taskEndMs = Date.now() + emitTrace(config.onTrace, { + type: 'task', + runId: ctx.runId ?? '', + taskId: task.id, + taskTitle: task.title, + agent: assignee, + success: result.success, + retries: retryCount, + startMs: taskStartMs, + endMs: taskEndMs, + durationMs: taskEndMs - taskStartMs, + }) + } + + ctx.agentResults.set(`${assignee}:${task.id}`, result) + + if (result.success) { + // Persist result into shared memory so other agents can read it + const sharedMem = team.getSharedMemoryInstance() + if (sharedMem) { + await sharedMem.write(assignee, `task:${task.id}:result`, result.output) } - } catch (err) { - const message = err instanceof Error ? err.message : String(err) - queue.fail(task.id, message) + + queue.complete(task.id, result.output) + + config.onProgress?.({ + type: 'task_complete', + task: task.id, + agent: assignee, + data: result, + } satisfies OrchestratorEvent) + + config.onProgress?.({ + type: 'agent_complete', + agent: assignee, + task: task.id, + data: result, + } satisfies OrchestratorEvent) + } else { + queue.fail(task.id, result.output) config.onProgress?.({ type: 'error', task: task.id, agent: assignee, - data: err, + data: result, } satisfies OrchestratorEvent) } }) @@ -341,8 +471,8 @@ async function buildTaskPrompt(task: Task, team: Team): Promise { */ export class OpenMultiAgent { private readonly config: Required< - Omit - > & Pick + Omit + > & Pick private readonly teams: Map = new Map() private completedTaskCount = 0 @@ -360,7 +490,10 @@ export class OpenMultiAgent { maxConcurrency: config.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY, defaultModel: config.defaultModel ?? DEFAULT_MODEL, defaultProvider: config.defaultProvider ?? 'anthropic', + defaultBaseURL: config.defaultBaseURL, + defaultApiKey: config.defaultApiKey, onProgress: config.onProgress, + onTrace: config.onTrace, } } @@ -405,14 +538,24 @@ export class OpenMultiAgent { * @param prompt - The user prompt to send. */ async runAgent(config: AgentConfig, prompt: string): Promise { - const agent = buildAgent(config) + const effective: AgentConfig = { + ...config, + provider: config.provider ?? this.config.defaultProvider, + baseURL: config.baseURL ?? this.config.defaultBaseURL, + apiKey: config.apiKey ?? this.config.defaultApiKey, + } + const agent = buildAgent(effective) this.config.onProgress?.({ type: 'agent_start', agent: config.name, data: { prompt }, }) - const result = await agent.run(prompt) + const traceOptions: Partial | undefined = this.config.onTrace + ? { onTrace: this.config.onTrace, runId: generateRunId(), traceAgent: config.name } + : undefined + + const result = await agent.run(prompt, traceOptions) this.config.onProgress?.({ type: 'agent_complete', @@ -462,12 +605,15 @@ export class OpenMultiAgent { name: 'coordinator', model: this.config.defaultModel, provider: this.config.defaultProvider, + baseURL: this.config.defaultBaseURL, + apiKey: this.config.defaultApiKey, systemPrompt: this.buildCoordinatorSystemPrompt(agentConfigs), maxTurns: 3, } const decompositionPrompt = this.buildDecompositionPrompt(goal, agentConfigs) const coordinatorAgent = buildAgent(coordinatorConfig) + const runId = this.config.onTrace ? generateRunId() : undefined this.config.onProgress?.({ type: 'agent_start', @@ -475,7 +621,10 @@ export class OpenMultiAgent { data: { phase: 'decomposition', goal }, }) - const decompositionResult = await coordinatorAgent.run(decompositionPrompt) + const decompTraceOptions: Partial | undefined = this.config.onTrace + ? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator' } + : undefined + const decompositionResult = await coordinatorAgent.run(decompositionPrompt, decompTraceOptions) const agentResults = new Map() agentResults.set('coordinator:decompose', decompositionResult) @@ -519,6 +668,7 @@ export class OpenMultiAgent { scheduler, agentResults, config: this.config, + runId, } await executeQueue(queue, ctx) @@ -527,7 +677,10 @@ export class OpenMultiAgent { // Step 5: Coordinator synthesises final result // ------------------------------------------------------------------ const synthesisPrompt = await this.buildSynthesisPrompt(goal, queue.list(), team) - const synthesisResult = await coordinatorAgent.run(synthesisPrompt) + const synthTraceOptions: Partial | undefined = this.config.onTrace + ? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator' } + : undefined + const synthesisResult = await coordinatorAgent.run(synthesisPrompt, synthTraceOptions) agentResults.set('coordinator', synthesisResult) this.config.onProgress?.({ @@ -564,6 +717,9 @@ export class OpenMultiAgent { description: string assignee?: string dependsOn?: string[] + maxRetries?: number + retryDelayMs?: number + retryBackoff?: number }>, ): Promise { const agentConfigs = team.getAgents() @@ -576,6 +732,9 @@ export class OpenMultiAgent { description: t.description, assignee: t.assignee, dependsOn: t.dependsOn, + maxRetries: t.maxRetries, + retryDelayMs: t.retryDelayMs, + retryBackoff: t.retryBackoff, })), agentConfigs, queue, @@ -591,6 +750,7 @@ export class OpenMultiAgent { scheduler, agentResults, config: this.config, + runId: this.config.onTrace ? generateRunId() : undefined, } await executeQueue(queue, ctx) @@ -733,7 +893,11 @@ export class OpenMultiAgent { * then resolving them to real IDs before adding tasks to the queue. */ private loadSpecsIntoQueue( - specs: ReadonlyArray, + specs: ReadonlyArray, agentConfigs: AgentConfig[], queue: TaskQueue, ): void { @@ -750,6 +914,9 @@ export class OpenMultiAgent { assignee: spec.assignee && agentNames.has(spec.assignee) ? spec.assignee : undefined, + maxRetries: spec.maxRetries, + retryDelayMs: spec.retryDelayMs, + retryBackoff: spec.retryBackoff, }) titleToId.set(spec.title.toLowerCase().trim(), task.id) createdTasks.push(task) @@ -792,6 +959,8 @@ export class OpenMultiAgent { ...config, model: config.model, provider: config.provider ?? this.config.defaultProvider, + baseURL: config.baseURL ?? this.config.defaultBaseURL, + apiKey: config.apiKey ?? this.config.defaultApiKey, } pool.add(buildAgent(effective)) } @@ -825,13 +994,15 @@ export class OpenMultiAgent { if (!existing) { collapsed.set(agentName, result) } else { - // Merge multiple results for the same agent (multi-task case) + // Merge multiple results for the same agent (multi-task case). + // Keep the latest `structured` value (last completed task wins). collapsed.set(agentName, { success: existing.success && result.success, output: [existing.output, result.output].filter(Boolean).join('\n\n---\n\n'), messages: [...existing.messages, ...result.messages], tokenUsage: addUsage(existing.tokenUsage, result.tokenUsage), toolCalls: [...existing.toolCalls, ...result.toolCalls], + structured: result.structured !== undefined ? result.structured : existing.structured, }) } diff --git a/src/task/queue.ts b/src/task/queue.ts index 60149ff..8888c09 100644 --- a/src/task/queue.ts +++ b/src/task/queue.ts @@ -356,7 +356,7 @@ export class TaskQueue { // Re-check against the current state of the whole task set. // Pass the pre-built map to avoid rebuilding it for every candidate task. - if (isTaskReady(task, allTasks, taskById)) { + if (isTaskReady({ ...task, status: 'pending' }, allTasks, taskById)) { const unblocked: Task = { ...task, status: 'pending', diff --git a/src/task/task.ts b/src/task/task.ts index a297100..d74e70b 100644 --- a/src/task/task.ts +++ b/src/task/task.ts @@ -6,6 +6,7 @@ * Stateful orchestration belongs in {@link TaskQueue}. */ +import { randomUUID } from 'node:crypto' import type { Task, TaskStatus } from '../types.js' // --------------------------------------------------------------------------- @@ -30,10 +31,13 @@ export function createTask(input: { description: string assignee?: string dependsOn?: string[] + maxRetries?: number + retryDelayMs?: number + retryBackoff?: number }): Task { const now = new Date() return { - id: crypto.randomUUID(), + id: randomUUID(), title: input.title, description: input.description, status: 'pending' as TaskStatus, @@ -42,6 +46,9 @@ export function createTask(input: { result: undefined, createdAt: now, updatedAt: now, + maxRetries: input.maxRetries, + retryDelayMs: input.retryDelayMs, + retryBackoff: input.retryBackoff, } } diff --git a/src/team/messaging.ts b/src/team/messaging.ts index de4cdae..35a4c2e 100644 --- a/src/team/messaging.ts +++ b/src/team/messaging.ts @@ -6,6 +6,8 @@ * for replay and audit; read-state is tracked per recipient. */ +import { randomUUID } from 'node:crypto' + // --------------------------------------------------------------------------- // Message type // --------------------------------------------------------------------------- @@ -93,7 +95,7 @@ export class MessageBus { */ send(from: string, to: string, content: string): Message { const message: Message = { - id: crypto.randomUUID(), + id: randomUUID(), from, to, content, diff --git a/src/types.ts b/src/types.ts index ef6f005..418d54e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -186,13 +186,27 @@ export interface ToolDefinition> { export interface AgentConfig { readonly name: string readonly model: string - readonly provider?: 'anthropic' | 'ollama' | 'openai' + readonly provider?: 'anthropic' | 'copilot' | 'openai' + /** + * Custom base URL for OpenAI-compatible APIs (Ollama, vLLM, LM Studio, etc.). + * Note: local servers that don't require auth still need `apiKey` set to a + * non-empty placeholder (e.g. `'ollama'`) because the OpenAI SDK validates it. + */ + readonly baseURL?: string + /** API key override; falls back to the provider's standard env var. */ + readonly apiKey?: string readonly systemPrompt?: string /** Names of tools (from the tool registry) available to this agent. */ readonly tools?: readonly string[] readonly maxTurns?: number readonly maxTokens?: number readonly temperature?: number + /** + * Optional Zod schema for structured output. When set, the agent's final + * output is parsed as JSON and validated against this schema. A single + * retry with error feedback is attempted on validation failure. + */ + readonly outputSchema?: ZodSchema } /** Lifecycle state tracked during an agent run. */ @@ -219,6 +233,12 @@ export interface AgentRunResult { readonly messages: LLMMessage[] readonly tokenUsage: TokenUsage readonly toolCalls: ToolCallRecord[] + /** + * Parsed and validated structured output when `outputSchema` is set on the + * agent config. `undefined` when no schema is configured or validation + * failed after retry. + */ + readonly structured?: unknown } // --------------------------------------------------------------------------- @@ -261,6 +281,12 @@ export interface Task { result?: string readonly createdAt: Date updatedAt: Date + /** Maximum number of retry attempts on failure (default: 0 — no retry). */ + readonly maxRetries?: number + /** Base delay in ms before the first retry (default: 1000). */ + readonly retryDelayMs?: number + /** Exponential backoff multiplier (default: 2). */ + readonly retryBackoff?: number } // --------------------------------------------------------------------------- @@ -274,6 +300,7 @@ export interface OrchestratorEvent { | 'agent_complete' | 'task_start' | 'task_complete' + | 'task_retry' | 'message' | 'error' readonly agent?: string @@ -285,10 +312,72 @@ export interface OrchestratorEvent { export interface OrchestratorConfig { readonly maxConcurrency?: number readonly defaultModel?: string - readonly defaultProvider?: 'anthropic' | 'ollama' | 'openai' - onProgress?: (event: OrchestratorEvent) => void + readonly defaultProvider?: 'anthropic' | 'copilot' | 'openai' + readonly defaultBaseURL?: string + readonly defaultApiKey?: string + readonly onProgress?: (event: OrchestratorEvent) => void + readonly onTrace?: (event: TraceEvent) => void | Promise } +// --------------------------------------------------------------------------- +// Trace events — lightweight observability spans +// --------------------------------------------------------------------------- + +/** Trace event type discriminants. */ +export type TraceEventType = 'llm_call' | 'tool_call' | 'task' | 'agent' + +/** Shared fields present on every trace event. */ +export interface TraceEventBase { + /** Unique identifier for the entire run (runTeam / runTasks / runAgent call). */ + readonly runId: string + readonly type: TraceEventType + /** Unix epoch ms when the span started. */ + readonly startMs: number + /** Unix epoch ms when the span ended. */ + readonly endMs: number + /** Wall-clock duration in milliseconds (`endMs - startMs`). */ + readonly durationMs: number + /** Agent name associated with this span. */ + readonly agent: string + /** Task ID associated with this span. */ + readonly taskId?: string +} + +/** Emitted for each LLM API call (one per agent turn). */ +export interface LLMCallTrace extends TraceEventBase { + readonly type: 'llm_call' + readonly model: string + readonly turn: number + readonly tokens: TokenUsage +} + +/** Emitted for each tool execution. */ +export interface ToolCallTrace extends TraceEventBase { + readonly type: 'tool_call' + readonly tool: string + readonly isError: boolean +} + +/** Emitted when a task completes (wraps the full retry sequence). */ +export interface TaskTrace extends TraceEventBase { + readonly type: 'task' + readonly taskId: string + readonly taskTitle: string + readonly success: boolean + readonly retries: number +} + +/** Emitted when an agent run completes (wraps the full conversation loop). */ +export interface AgentTrace extends TraceEventBase { + readonly type: 'agent' + readonly turns: number + readonly tokens: TokenUsage + readonly toolCalls: number +} + +/** Discriminated union of all trace event types. */ +export type TraceEvent = LLMCallTrace | ToolCallTrace | TaskTrace | AgentTrace + // --------------------------------------------------------------------------- // Memory // --------------------------------------------------------------------------- diff --git a/src/utils/trace.ts b/src/utils/trace.ts new file mode 100644 index 0000000..4f01f5f --- /dev/null +++ b/src/utils/trace.ts @@ -0,0 +1,34 @@ +/** + * @fileoverview Trace emission utilities for the observability layer. + */ + +import { randomUUID } from 'node:crypto' +import type { TraceEvent } from '../types.js' + +/** + * Safely emit a trace event. Swallows callback errors so a broken + * subscriber never crashes agent execution. + */ +export function emitTrace( + fn: ((event: TraceEvent) => void | Promise) | undefined, + event: TraceEvent, +): void { + if (!fn) return + try { + // Guard async callbacks: if fn returns a Promise, swallow its rejection + // so an async onTrace never produces an unhandled promise rejection. + const result = fn(event) as unknown + if (result && typeof (result as Promise).catch === 'function') { + ;(result as Promise).catch(noop) + } + } catch { + // Intentionally swallowed — observability must never break execution. + } +} + +function noop() {} + +/** Generate a unique run ID for trace correlation. */ +export function generateRunId(): string { + return randomUUID() +} diff --git a/tests/semaphore.test.ts b/tests/semaphore.test.ts new file mode 100644 index 0000000..ddc1b34 --- /dev/null +++ b/tests/semaphore.test.ts @@ -0,0 +1,57 @@ +import { describe, it, expect } from 'vitest' +import { Semaphore } from '../src/utils/semaphore.js' + +describe('Semaphore', () => { + it('throws on max < 1', () => { + expect(() => new Semaphore(0)).toThrow() + }) + + it('allows up to max concurrent holders', async () => { + const sem = new Semaphore(2) + let running = 0 + let peak = 0 + + const work = async () => { + await sem.acquire() + running++ + peak = Math.max(peak, running) + await new Promise((r) => setTimeout(r, 30)) + running-- + sem.release() + } + + await Promise.all([work(), work(), work(), work()]) + expect(peak).toBeLessThanOrEqual(2) + }) + + it('run() auto-releases on success', async () => { + const sem = new Semaphore(1) + const result = await sem.run(async () => 42) + expect(result).toBe(42) + expect(sem.active).toBe(0) + }) + + it('run() auto-releases on error', async () => { + const sem = new Semaphore(1) + await expect(sem.run(async () => { throw new Error('oops') })).rejects.toThrow('oops') + expect(sem.active).toBe(0) + }) + + it('tracks active and pending counts', async () => { + const sem = new Semaphore(1) + await sem.acquire() + expect(sem.active).toBe(1) + + // This will queue + const p = sem.acquire() + expect(sem.pending).toBe(1) + + sem.release() + await p + expect(sem.active).toBe(1) + expect(sem.pending).toBe(0) + + sem.release() + expect(sem.active).toBe(0) + }) +}) diff --git a/tests/shared-memory.test.ts b/tests/shared-memory.test.ts new file mode 100644 index 0000000..1467c95 --- /dev/null +++ b/tests/shared-memory.test.ts @@ -0,0 +1,122 @@ +import { describe, it, expect } from 'vitest' +import { SharedMemory } from '../src/memory/shared.js' + +describe('SharedMemory', () => { + // ------------------------------------------------------------------------- + // Write & read + // ------------------------------------------------------------------------- + + it('writes and reads a value under a namespaced key', async () => { + const mem = new SharedMemory() + await mem.write('researcher', 'findings', 'TS 5.5 ships const type params') + + const entry = await mem.read('researcher/findings') + expect(entry).not.toBeNull() + expect(entry!.value).toBe('TS 5.5 ships const type params') + }) + + it('returns null for a non-existent key', async () => { + const mem = new SharedMemory() + expect(await mem.read('nope/nothing')).toBeNull() + }) + + // ------------------------------------------------------------------------- + // Namespace isolation + // ------------------------------------------------------------------------- + + it('isolates writes between agents', async () => { + const mem = new SharedMemory() + await mem.write('alice', 'plan', 'plan A') + await mem.write('bob', 'plan', 'plan B') + + const alice = await mem.read('alice/plan') + const bob = await mem.read('bob/plan') + expect(alice!.value).toBe('plan A') + expect(bob!.value).toBe('plan B') + }) + + it('listByAgent returns only that agent\'s entries', async () => { + const mem = new SharedMemory() + await mem.write('alice', 'a1', 'v1') + await mem.write('alice', 'a2', 'v2') + await mem.write('bob', 'b1', 'v3') + + const aliceEntries = await mem.listByAgent('alice') + expect(aliceEntries).toHaveLength(2) + expect(aliceEntries.every((e) => e.key.startsWith('alice/'))).toBe(true) + }) + + // ------------------------------------------------------------------------- + // Overwrite + // ------------------------------------------------------------------------- + + it('overwrites a value and preserves createdAt', async () => { + const mem = new SharedMemory() + await mem.write('agent', 'key', 'first') + const first = await mem.read('agent/key') + + await mem.write('agent', 'key', 'second') + const second = await mem.read('agent/key') + + expect(second!.value).toBe('second') + expect(second!.createdAt.getTime()).toBe(first!.createdAt.getTime()) + }) + + // ------------------------------------------------------------------------- + // Metadata + // ------------------------------------------------------------------------- + + it('stores metadata alongside the value', async () => { + const mem = new SharedMemory() + await mem.write('agent', 'key', 'val', { priority: 'high' }) + + const entry = await mem.read('agent/key') + expect(entry!.metadata).toMatchObject({ priority: 'high', agent: 'agent' }) + }) + + // ------------------------------------------------------------------------- + // Summary + // ------------------------------------------------------------------------- + + it('returns empty string for an empty store', async () => { + const mem = new SharedMemory() + expect(await mem.getSummary()).toBe('') + }) + + it('produces a markdown summary grouped by agent', async () => { + const mem = new SharedMemory() + await mem.write('researcher', 'findings', 'result A') + await mem.write('coder', 'plan', 'implement X') + + const summary = await mem.getSummary() + expect(summary).toContain('## Shared Team Memory') + expect(summary).toContain('### researcher') + expect(summary).toContain('### coder') + expect(summary).toContain('findings: result A') + expect(summary).toContain('plan: implement X') + }) + + it('truncates long values in the summary', async () => { + const mem = new SharedMemory() + const longValue = 'x'.repeat(300) + await mem.write('agent', 'big', longValue) + + const summary = await mem.getSummary() + // Summary truncates at 200 chars → 197 + '…' + expect(summary.length).toBeLessThan(longValue.length) + expect(summary).toContain('…') + }) + + // ------------------------------------------------------------------------- + // listAll + // ------------------------------------------------------------------------- + + it('listAll returns entries from all agents', async () => { + const mem = new SharedMemory() + await mem.write('a', 'k1', 'v1') + await mem.write('b', 'k2', 'v2') + + const all = await mem.listAll() + expect(all).toHaveLength(2) + }) +}) diff --git a/tests/structured-output.test.ts b/tests/structured-output.test.ts new file mode 100644 index 0000000..27f9201 --- /dev/null +++ b/tests/structured-output.test.ts @@ -0,0 +1,331 @@ +import { describe, it, expect } from 'vitest' +import { z } from 'zod' +import { + buildStructuredOutputInstruction, + extractJSON, + validateOutput, +} from '../src/agent/structured-output.js' +import { Agent } from '../src/agent/agent.js' +import { AgentRunner } from '../src/agent/runner.js' +import { ToolRegistry } from '../src/tool/framework.js' +import { ToolExecutor } from '../src/tool/executor.js' +import type { AgentConfig, LLMAdapter, LLMResponse } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Mock LLM adapter factory +// --------------------------------------------------------------------------- + +function mockAdapter(responses: string[]): LLMAdapter { + let callIndex = 0 + return { + name: 'mock', + async chat() { + const text = responses[callIndex++] ?? '' + return { + id: `mock-${callIndex}`, + content: [{ type: 'text' as const, text }], + model: 'mock-model', + stop_reason: 'end_turn', + usage: { input_tokens: 10, output_tokens: 20 }, + } satisfies LLMResponse + }, + async *stream() { + /* unused in these tests */ + }, + } +} + +// --------------------------------------------------------------------------- +// extractJSON +// --------------------------------------------------------------------------- + +describe('extractJSON', () => { + it('parses clean JSON', () => { + expect(extractJSON('{"a":1}')).toEqual({ a: 1 }) + }) + + it('parses JSON wrapped in ```json fence', () => { + const raw = 'Here is the result:\n```json\n{"a":1}\n```\nDone.' + expect(extractJSON(raw)).toEqual({ a: 1 }) + }) + + it('parses JSON wrapped in bare ``` fence', () => { + const raw = '```\n{"a":1}\n```' + expect(extractJSON(raw)).toEqual({ a: 1 }) + }) + + it('extracts embedded JSON object from surrounding text', () => { + const raw = 'The answer is {"summary":"hello","score":5} as shown above.' + expect(extractJSON(raw)).toEqual({ summary: 'hello', score: 5 }) + }) + + it('extracts JSON array', () => { + expect(extractJSON('[1,2,3]')).toEqual([1, 2, 3]) + }) + + it('extracts embedded JSON array from surrounding text', () => { + const raw = 'Here: [{"a":1},{"a":2}] end' + expect(extractJSON(raw)).toEqual([{ a: 1 }, { a: 2 }]) + }) + + it('throws on non-JSON text', () => { + expect(() => extractJSON('just plain text')).toThrow('Failed to extract JSON') + }) + + it('throws on empty string', () => { + expect(() => extractJSON('')).toThrow('Failed to extract JSON') + }) +}) + +// --------------------------------------------------------------------------- +// validateOutput +// --------------------------------------------------------------------------- + +describe('validateOutput', () => { + const schema = z.object({ + summary: z.string(), + score: z.number().min(0).max(10), + }) + + it('returns validated data on success', () => { + const data = { summary: 'hello', score: 5 } + expect(validateOutput(schema, data)).toEqual(data) + }) + + it('throws on missing field', () => { + expect(() => validateOutput(schema, { summary: 'hello' })).toThrow( + 'Output validation failed', + ) + }) + + it('throws on wrong type', () => { + expect(() => + validateOutput(schema, { summary: 'hello', score: 'not a number' }), + ).toThrow('Output validation failed') + }) + + it('throws on value out of range', () => { + expect(() => + validateOutput(schema, { summary: 'hello', score: 99 }), + ).toThrow('Output validation failed') + }) + + it('applies Zod transforms', () => { + const transformSchema = z.object({ + name: z.string().transform(s => s.toUpperCase()), + }) + const result = validateOutput(transformSchema, { name: 'alice' }) + expect(result).toEqual({ name: 'ALICE' }) + }) + + it('strips unknown keys with strict schema', () => { + const strictSchema = z.object({ a: z.number() }).strict() + expect(() => + validateOutput(strictSchema, { a: 1, b: 2 }), + ).toThrow('Output validation failed') + }) + + it('shows (root) for root-level errors', () => { + const stringSchema = z.string() + expect(() => validateOutput(stringSchema, 42)).toThrow('(root)') + }) +}) + +// --------------------------------------------------------------------------- +// buildStructuredOutputInstruction +// --------------------------------------------------------------------------- + +describe('buildStructuredOutputInstruction', () => { + it('includes the JSON Schema representation', () => { + const schema = z.object({ + summary: z.string(), + score: z.number(), + }) + const instruction = buildStructuredOutputInstruction(schema) + + expect(instruction).toContain('Output Format (REQUIRED)') + expect(instruction).toContain('"type": "object"') + expect(instruction).toContain('"summary"') + expect(instruction).toContain('"score"') + expect(instruction).toContain('ONLY valid JSON') + }) + + it('includes description from Zod schema', () => { + const schema = z.object({ + name: z.string().describe('The person name'), + }) + const instruction = buildStructuredOutputInstruction(schema) + expect(instruction).toContain('The person name') + }) +}) + +// --------------------------------------------------------------------------- +// Agent integration (mocked LLM) +// --------------------------------------------------------------------------- + +/** + * Build an Agent with a mocked LLM adapter by injecting an AgentRunner + * directly into the Agent's private `runner` field, bypassing `createAdapter`. + */ +function buildMockAgent(config: AgentConfig, responses: string[]): Agent { + const adapter = mockAdapter(responses) + const registry = new ToolRegistry() + const executor = new ToolExecutor(registry) + const agent = new Agent(config, registry, executor) + + // Inject a pre-built runner so `getRunner()` returns it without calling createAdapter. + const runner = new AgentRunner(adapter, registry, executor, { + model: config.model, + systemPrompt: config.systemPrompt, + maxTurns: config.maxTurns, + maxTokens: config.maxTokens, + temperature: config.temperature, + agentName: config.name, + }) + ;(agent as any).runner = runner + + return agent +} + +describe('Agent structured output (end-to-end)', () => { + const schema = z.object({ + summary: z.string(), + sentiment: z.enum(['positive', 'negative', 'neutral']), + confidence: z.number().min(0).max(1), + }) + + const baseConfig: AgentConfig = { + name: 'test-agent', + model: 'mock-model', + systemPrompt: 'You are a test agent.', + outputSchema: schema, + } + + it('happy path: valid JSON on first attempt', async () => { + const validJSON = JSON.stringify({ + summary: 'Great product', + sentiment: 'positive', + confidence: 0.95, + }) + + const agent = buildMockAgent(baseConfig, [validJSON]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(true) + expect(result.structured).toEqual({ + summary: 'Great product', + sentiment: 'positive', + confidence: 0.95, + }) + }) + + it('retry: invalid first attempt, valid second attempt', async () => { + const invalidJSON = JSON.stringify({ + summary: 'Great product', + sentiment: 'INVALID_VALUE', + confidence: 0.95, + }) + const validJSON = JSON.stringify({ + summary: 'Great product', + sentiment: 'positive', + confidence: 0.95, + }) + + const agent = buildMockAgent(baseConfig, [invalidJSON, validJSON]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(true) + expect(result.structured).toEqual({ + summary: 'Great product', + sentiment: 'positive', + confidence: 0.95, + }) + // Token usage should reflect both attempts + expect(result.tokenUsage.input_tokens).toBe(20) // 10 + 10 + expect(result.tokenUsage.output_tokens).toBe(40) // 20 + 20 + }) + + it('both attempts fail: success=false, structured=undefined', async () => { + const bad1 = '{"summary": "ok", "sentiment": "WRONG"}' + const bad2 = '{"summary": "ok", "sentiment": "ALSO_WRONG"}' + + const agent = buildMockAgent(baseConfig, [bad1, bad2]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(false) + expect(result.structured).toBeUndefined() + }) + + it('no outputSchema: original behavior, structured is undefined', async () => { + const configNoSchema: AgentConfig = { + name: 'plain-agent', + model: 'mock-model', + systemPrompt: 'You are a test agent.', + } + + const agent = buildMockAgent(configNoSchema, ['Just plain text output']) + const result = await agent.run('Hello') + + expect(result.success).toBe(true) + expect(result.output).toBe('Just plain text output') + expect(result.structured).toBeUndefined() + }) + + it('handles JSON wrapped in markdown fence', async () => { + const fenced = '```json\n{"summary":"ok","sentiment":"neutral","confidence":0.5}\n```' + + const agent = buildMockAgent(baseConfig, [fenced]) + const result = await agent.run('Analyze') + + expect(result.success).toBe(true) + expect(result.structured).toEqual({ + summary: 'ok', + sentiment: 'neutral', + confidence: 0.5, + }) + }) + + it('non-JSON output triggers retry, valid JSON on retry succeeds', async () => { + const nonJSON = 'I am not sure how to analyze this.' + const validJSON = JSON.stringify({ + summary: 'Uncertain', + sentiment: 'neutral', + confidence: 0.1, + }) + + const agent = buildMockAgent(baseConfig, [nonJSON, validJSON]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(true) + expect(result.structured).toEqual({ + summary: 'Uncertain', + sentiment: 'neutral', + confidence: 0.1, + }) + }) + + it('non-JSON output on both attempts: success=false', async () => { + const agent = buildMockAgent(baseConfig, [ + 'Sorry, I cannot do that.', + 'Still cannot do it.', + ]) + const result = await agent.run('Analyze this review') + + expect(result.success).toBe(false) + expect(result.structured).toBeUndefined() + }) + + it('token usage on first-attempt success reflects single call only', async () => { + const validJSON = JSON.stringify({ + summary: 'Good', + sentiment: 'positive', + confidence: 0.9, + }) + + const agent = buildMockAgent(baseConfig, [validJSON]) + const result = await agent.run('Analyze') + + expect(result.tokenUsage.input_tokens).toBe(10) + expect(result.tokenUsage.output_tokens).toBe(20) + }) +}) diff --git a/tests/task-queue.test.ts b/tests/task-queue.test.ts new file mode 100644 index 0000000..87a2500 --- /dev/null +++ b/tests/task-queue.test.ts @@ -0,0 +1,244 @@ +import { describe, it, expect, vi } from 'vitest' +import { TaskQueue } from '../src/task/queue.js' +import { createTask } from '../src/task/task.js' + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** Create a simple task with a predictable id. */ +function task(id: string, opts: { dependsOn?: string[]; assignee?: string } = {}) { + const t = createTask({ title: id, description: `task ${id}`, assignee: opts.assignee }) + // Override the random UUID so tests can reference tasks by name. + return { ...t, id, dependsOn: opts.dependsOn } as ReturnType +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('TaskQueue', () => { + // ------------------------------------------------------------------------- + // Basic add & query + // ------------------------------------------------------------------------- + + it('adds a task and lists it', () => { + const q = new TaskQueue() + q.add(task('a')) + expect(q.list()).toHaveLength(1) + expect(q.list()[0].id).toBe('a') + }) + + it('fires task:ready for a task with no dependencies', () => { + const q = new TaskQueue() + const handler = vi.fn() + q.on('task:ready', handler) + + q.add(task('a')) + expect(handler).toHaveBeenCalledTimes(1) + expect(handler.mock.calls[0][0].id).toBe('a') + }) + + it('blocks a task whose dependency is not yet completed', () => { + const q = new TaskQueue() + q.add(task('a')) + q.add(task('b', { dependsOn: ['a'] })) + + const b = q.list().find((t) => t.id === 'b')! + expect(b.status).toBe('blocked') + }) + + // ------------------------------------------------------------------------- + // Dependency resolution + // ------------------------------------------------------------------------- + + it('unblocks a dependent task when its dependency completes', () => { + const q = new TaskQueue() + const readyHandler = vi.fn() + q.on('task:ready', readyHandler) + + q.add(task('a')) + q.add(task('b', { dependsOn: ['a'] })) + + // 'a' fires task:ready, 'b' is blocked + expect(readyHandler).toHaveBeenCalledTimes(1) + + q.complete('a', 'done') + + // 'b' should now be unblocked → fires task:ready + expect(readyHandler).toHaveBeenCalledTimes(2) + expect(readyHandler.mock.calls[1][0].id).toBe('b') + expect(q.list().find((t) => t.id === 'b')!.status).toBe('pending') + }) + + it('keeps a task blocked until ALL dependencies complete', () => { + const q = new TaskQueue() + q.add(task('a')) + q.add(task('b')) + q.add(task('c', { dependsOn: ['a', 'b'] })) + + q.complete('a') + + const cAfterA = q.list().find((t) => t.id === 'c')! + expect(cAfterA.status).toBe('blocked') + + q.complete('b') + + const cAfterB = q.list().find((t) => t.id === 'c')! + expect(cAfterB.status).toBe('pending') + }) + + // ------------------------------------------------------------------------- + // Cascade failure + // ------------------------------------------------------------------------- + + it('cascades failure to direct dependents', () => { + const q = new TaskQueue() + const failHandler = vi.fn() + q.on('task:failed', failHandler) + + q.add(task('a')) + q.add(task('b', { dependsOn: ['a'] })) + + q.fail('a', 'boom') + + expect(failHandler).toHaveBeenCalledTimes(2) // a + b + expect(q.list().find((t) => t.id === 'b')!.status).toBe('failed') + expect(q.list().find((t) => t.id === 'b')!.result).toContain('dependency') + }) + + it('cascades failure transitively (a → b → c)', () => { + const q = new TaskQueue() + q.add(task('a')) + q.add(task('b', { dependsOn: ['a'] })) + q.add(task('c', { dependsOn: ['b'] })) + + q.fail('a', 'boom') + + expect(q.list().every((t) => t.status === 'failed')).toBe(true) + }) + + it('does not cascade failure to independent tasks', () => { + const q = new TaskQueue() + q.add(task('a')) + q.add(task('b')) + q.add(task('c', { dependsOn: ['a'] })) + + q.fail('a', 'boom') + + expect(q.list().find((t) => t.id === 'b')!.status).toBe('pending') + expect(q.list().find((t) => t.id === 'c')!.status).toBe('failed') + }) + + // ------------------------------------------------------------------------- + // Completion + // ------------------------------------------------------------------------- + + it('fires all:complete when every task reaches a terminal state', () => { + const q = new TaskQueue() + const allComplete = vi.fn() + q.on('all:complete', allComplete) + + q.add(task('a')) + q.add(task('b')) + + q.complete('a') + expect(allComplete).not.toHaveBeenCalled() + + q.complete('b') + expect(allComplete).toHaveBeenCalledTimes(1) + }) + + it('fires all:complete when mix of completed and failed', () => { + const q = new TaskQueue() + const allComplete = vi.fn() + q.on('all:complete', allComplete) + + q.add(task('a')) + q.add(task('b', { dependsOn: ['a'] })) + + q.fail('a', 'err') // cascades to b + expect(allComplete).toHaveBeenCalledTimes(1) + }) + + it('isComplete returns true for an empty queue', () => { + const q = new TaskQueue() + expect(q.isComplete()).toBe(true) + }) + + // ------------------------------------------------------------------------- + // Query: next / nextAvailable + // ------------------------------------------------------------------------- + + it('next() returns a pending task for the given assignee', () => { + const q = new TaskQueue() + q.add(task('a', { assignee: 'alice' })) + q.add(task('b', { assignee: 'bob' })) + + expect(q.next('bob')?.id).toBe('b') + }) + + it('next() returns undefined when no pending task matches', () => { + const q = new TaskQueue() + q.add(task('a', { assignee: 'alice' })) + expect(q.next('bob')).toBeUndefined() + }) + + it('nextAvailable() prefers unassigned tasks', () => { + const q = new TaskQueue() + q.add(task('assigned', { assignee: 'alice' })) + q.add(task('unassigned')) + + expect(q.nextAvailable()?.id).toBe('unassigned') + }) + + // ------------------------------------------------------------------------- + // Progress + // ------------------------------------------------------------------------- + + it('getProgress() returns correct counts', () => { + const q = new TaskQueue() + q.add(task('a')) + q.add(task('b')) + q.add(task('c', { dependsOn: ['a'] })) + + q.complete('a') + + const p = q.getProgress() + expect(p.total).toBe(3) + expect(p.completed).toBe(1) + expect(p.pending).toBe(2) // b + c (unblocked) + expect(p.blocked).toBe(0) + }) + + // ------------------------------------------------------------------------- + // Event unsubscribe + // ------------------------------------------------------------------------- + + it('unsubscribe stops receiving events', () => { + const q = new TaskQueue() + const handler = vi.fn() + const off = q.on('task:ready', handler) + + q.add(task('a')) + expect(handler).toHaveBeenCalledTimes(1) + + off() + q.add(task('b')) + expect(handler).toHaveBeenCalledTimes(1) // no new call + }) + + // ------------------------------------------------------------------------- + // Error cases + // ------------------------------------------------------------------------- + + it('throws when completing a non-existent task', () => { + const q = new TaskQueue() + expect(() => q.complete('ghost')).toThrow('not found') + }) + + it('throws when failing a non-existent task', () => { + const q = new TaskQueue() + expect(() => q.fail('ghost', 'err')).toThrow('not found') + }) +}) diff --git a/tests/task-retry.test.ts b/tests/task-retry.test.ts new file mode 100644 index 0000000..56bdb76 --- /dev/null +++ b/tests/task-retry.test.ts @@ -0,0 +1,368 @@ +import { describe, it, expect, vi } from 'vitest' +import { createTask } from '../src/task/task.js' +import { executeWithRetry, computeRetryDelay } from '../src/orchestrator/orchestrator.js' +import type { AgentRunResult } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +const SUCCESS_RESULT: AgentRunResult = { + success: true, + output: 'done', + messages: [], + tokenUsage: { input_tokens: 10, output_tokens: 20 }, + toolCalls: [], +} + +const FAILURE_RESULT: AgentRunResult = { + success: false, + output: 'agent failed', + messages: [], + tokenUsage: { input_tokens: 10, output_tokens: 20 }, + toolCalls: [], +} + +/** No-op delay for tests. */ +const noDelay = () => Promise.resolve() + +// --------------------------------------------------------------------------- +// computeRetryDelay +// --------------------------------------------------------------------------- + +describe('computeRetryDelay', () => { + it('computes exponential backoff', () => { + expect(computeRetryDelay(1000, 2, 1)).toBe(1000) // 1000 * 2^0 + expect(computeRetryDelay(1000, 2, 2)).toBe(2000) // 1000 * 2^1 + expect(computeRetryDelay(1000, 2, 3)).toBe(4000) // 1000 * 2^2 + }) + + it('caps at 30 seconds', () => { + // 1000 * 2^20 = 1,048,576,000 — way over cap + expect(computeRetryDelay(1000, 2, 21)).toBe(30_000) + }) + + it('handles backoff of 1 (constant delay)', () => { + expect(computeRetryDelay(500, 1, 1)).toBe(500) + expect(computeRetryDelay(500, 1, 5)).toBe(500) + }) +}) + +// --------------------------------------------------------------------------- +// createTask: retry fields +// --------------------------------------------------------------------------- + +describe('createTask with retry fields', () => { + it('passes through retry config', () => { + const t = createTask({ + title: 'Retry task', + description: 'test', + maxRetries: 3, + retryDelayMs: 500, + retryBackoff: 1.5, + }) + expect(t.maxRetries).toBe(3) + expect(t.retryDelayMs).toBe(500) + expect(t.retryBackoff).toBe(1.5) + }) + + it('defaults retry fields to undefined', () => { + const t = createTask({ title: 'No retry', description: 'test' }) + expect(t.maxRetries).toBeUndefined() + expect(t.retryDelayMs).toBeUndefined() + expect(t.retryBackoff).toBeUndefined() + }) +}) + +// --------------------------------------------------------------------------- +// executeWithRetry — tests the real exported function +// --------------------------------------------------------------------------- + +describe('executeWithRetry', () => { + it('succeeds on first attempt with no retry config', async () => { + const run = vi.fn().mockResolvedValue(SUCCESS_RESULT) + const task = createTask({ title: 'Simple', description: 'test' }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(true) + expect(result.output).toBe('done') + expect(run).toHaveBeenCalledTimes(1) + }) + + it('succeeds on first attempt even when maxRetries > 0', async () => { + const run = vi.fn().mockResolvedValue(SUCCESS_RESULT) + const task = createTask({ + title: 'Has retries', + description: 'test', + maxRetries: 3, + }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(true) + expect(run).toHaveBeenCalledTimes(1) + }) + + it('retries on exception and succeeds on second attempt', async () => { + const run = vi.fn() + .mockRejectedValueOnce(new Error('transient error')) + .mockResolvedValueOnce(SUCCESS_RESULT) + + const task = createTask({ + title: 'Retry task', + description: 'test', + maxRetries: 2, + retryDelayMs: 100, + retryBackoff: 2, + }) + + const retryEvents: unknown[] = [] + const result = await executeWithRetry( + run, + task, + (data) => retryEvents.push(data), + noDelay, + ) + + expect(result.success).toBe(true) + expect(run).toHaveBeenCalledTimes(2) + expect(retryEvents).toHaveLength(1) + expect(retryEvents[0]).toEqual({ + attempt: 1, + maxAttempts: 3, + error: 'transient error', + nextDelayMs: 100, // 100 * 2^0 + }) + }) + + it('retries on success:false and succeeds on second attempt', async () => { + const run = vi.fn() + .mockResolvedValueOnce(FAILURE_RESULT) + .mockResolvedValueOnce(SUCCESS_RESULT) + + const task = createTask({ + title: 'Retry task', + description: 'test', + maxRetries: 1, + retryDelayMs: 50, + }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(true) + expect(run).toHaveBeenCalledTimes(2) + }) + + it('exhausts all retries on persistent exception', async () => { + const run = vi.fn().mockRejectedValue(new Error('persistent error')) + + const task = createTask({ + title: 'Always fails', + description: 'test', + maxRetries: 2, + retryDelayMs: 10, + retryBackoff: 1, + }) + + const retryEvents: unknown[] = [] + const result = await executeWithRetry( + run, + task, + (data) => retryEvents.push(data), + noDelay, + ) + + expect(result.success).toBe(false) + expect(result.output).toBe('persistent error') + expect(run).toHaveBeenCalledTimes(3) // 1 initial + 2 retries + expect(retryEvents).toHaveLength(2) + }) + + it('exhausts all retries on persistent success:false', async () => { + const run = vi.fn().mockResolvedValue(FAILURE_RESULT) + + const task = createTask({ + title: 'Always fails', + description: 'test', + maxRetries: 1, + }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(false) + expect(result.output).toBe('agent failed') + expect(run).toHaveBeenCalledTimes(2) + }) + + it('emits correct exponential backoff delays', async () => { + const run = vi.fn().mockRejectedValue(new Error('error')) + + const task = createTask({ + title: 'Backoff test', + description: 'test', + maxRetries: 3, + retryDelayMs: 100, + retryBackoff: 2, + }) + + const retryEvents: Array<{ nextDelayMs: number }> = [] + await executeWithRetry( + run, + task, + (data) => retryEvents.push(data), + noDelay, + ) + + expect(retryEvents).toHaveLength(3) + expect(retryEvents[0]!.nextDelayMs).toBe(100) // 100 * 2^0 + expect(retryEvents[1]!.nextDelayMs).toBe(200) // 100 * 2^1 + expect(retryEvents[2]!.nextDelayMs).toBe(400) // 100 * 2^2 + }) + + it('no retry events when maxRetries is 0 (default)', async () => { + const run = vi.fn().mockRejectedValue(new Error('fail')) + const task = createTask({ title: 'No retry', description: 'test' }) + + const retryEvents: unknown[] = [] + const result = await executeWithRetry( + run, + task, + (data) => retryEvents.push(data), + noDelay, + ) + + expect(result.success).toBe(false) + expect(run).toHaveBeenCalledTimes(1) + expect(retryEvents).toHaveLength(0) + }) + + it('calls the delay function with computed delay', async () => { + const run = vi.fn() + .mockRejectedValueOnce(new Error('error')) + .mockResolvedValueOnce(SUCCESS_RESULT) + + const task = createTask({ + title: 'Delay test', + description: 'test', + maxRetries: 1, + retryDelayMs: 250, + retryBackoff: 3, + }) + + const mockDelay = vi.fn().mockResolvedValue(undefined) + await executeWithRetry(run, task, undefined, mockDelay) + + expect(mockDelay).toHaveBeenCalledTimes(1) + expect(mockDelay).toHaveBeenCalledWith(250) // 250 * 3^0 + }) + + it('caps delay at 30 seconds', async () => { + const run = vi.fn() + .mockRejectedValueOnce(new Error('error')) + .mockResolvedValueOnce(SUCCESS_RESULT) + + const task = createTask({ + title: 'Cap test', + description: 'test', + maxRetries: 1, + retryDelayMs: 50_000, + retryBackoff: 2, + }) + + const mockDelay = vi.fn().mockResolvedValue(undefined) + await executeWithRetry(run, task, undefined, mockDelay) + + expect(mockDelay).toHaveBeenCalledWith(30_000) // capped + }) + + it('accumulates token usage across retry attempts', async () => { + const failResult: AgentRunResult = { + ...FAILURE_RESULT, + tokenUsage: { input_tokens: 100, output_tokens: 50 }, + } + const successResult: AgentRunResult = { + ...SUCCESS_RESULT, + tokenUsage: { input_tokens: 200, output_tokens: 80 }, + } + + const run = vi.fn() + .mockResolvedValueOnce(failResult) + .mockResolvedValueOnce(failResult) + .mockResolvedValueOnce(successResult) + + const task = createTask({ + title: 'Token test', + description: 'test', + maxRetries: 2, + retryDelayMs: 10, + }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(true) + // 100+100+200 input, 50+50+80 output + expect(result.tokenUsage.input_tokens).toBe(400) + expect(result.tokenUsage.output_tokens).toBe(180) + }) + + it('accumulates token usage even when all retries fail', async () => { + const failResult: AgentRunResult = { + ...FAILURE_RESULT, + tokenUsage: { input_tokens: 50, output_tokens: 30 }, + } + + const run = vi.fn().mockResolvedValue(failResult) + + const task = createTask({ + title: 'Token fail test', + description: 'test', + maxRetries: 1, + }) + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(false) + // 50+50 input, 30+30 output (2 attempts) + expect(result.tokenUsage.input_tokens).toBe(100) + expect(result.tokenUsage.output_tokens).toBe(60) + }) + + it('clamps negative maxRetries to 0 (single attempt)', async () => { + const run = vi.fn().mockRejectedValue(new Error('fail')) + + const task = createTask({ + title: 'Negative retry', + description: 'test', + maxRetries: -5, + }) + // Manually set negative value since createTask doesn't validate + ;(task as any).maxRetries = -5 + + const result = await executeWithRetry(run, task, undefined, noDelay) + + expect(result.success).toBe(false) + expect(run).toHaveBeenCalledTimes(1) // exactly 1 attempt, no retries + }) + + it('clamps backoff below 1 to 1 (constant delay)', async () => { + const run = vi.fn() + .mockRejectedValueOnce(new Error('error')) + .mockResolvedValueOnce(SUCCESS_RESULT) + + const task = createTask({ + title: 'Bad backoff', + description: 'test', + maxRetries: 1, + retryDelayMs: 100, + retryBackoff: -2, + }) + ;(task as any).retryBackoff = -2 + + const mockDelay = vi.fn().mockResolvedValue(undefined) + await executeWithRetry(run, task, undefined, mockDelay) + + // backoff clamped to 1, so delay = 100 * 1^0 = 100 + expect(mockDelay).toHaveBeenCalledWith(100) + }) +}) diff --git a/tests/task-utils.test.ts b/tests/task-utils.test.ts new file mode 100644 index 0000000..7c3a8f5 --- /dev/null +++ b/tests/task-utils.test.ts @@ -0,0 +1,155 @@ +import { describe, it, expect } from 'vitest' +import { + createTask, + isTaskReady, + getTaskDependencyOrder, + validateTaskDependencies, +} from '../src/task/task.js' +import type { Task } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function task(id: string, opts: { dependsOn?: string[]; status?: Task['status'] } = {}): Task { + const t = createTask({ title: id, description: `task ${id}` }) + return { ...t, id, dependsOn: opts.dependsOn, status: opts.status ?? 'pending' } +} + +// --------------------------------------------------------------------------- +// createTask +// --------------------------------------------------------------------------- + +describe('createTask', () => { + it('creates a task with pending status and timestamps', () => { + const t = createTask({ title: 'Test', description: 'A test task' }) + expect(t.id).toBeDefined() + expect(t.status).toBe('pending') + expect(t.createdAt).toBeInstanceOf(Date) + expect(t.updatedAt).toBeInstanceOf(Date) + }) + + it('copies dependsOn array (no shared reference)', () => { + const deps = ['a'] + const t = createTask({ title: 'T', description: 'D', dependsOn: deps }) + deps.push('b') + expect(t.dependsOn).toEqual(['a']) + }) +}) + +// --------------------------------------------------------------------------- +// isTaskReady +// --------------------------------------------------------------------------- + +describe('isTaskReady', () => { + it('returns true for a pending task with no dependencies', () => { + const t = task('a') + expect(isTaskReady(t, [t])).toBe(true) + }) + + it('returns false for a non-pending task', () => { + const t = task('a', { status: 'blocked' }) + expect(isTaskReady(t, [t])).toBe(false) + }) + + it('returns true when all dependencies are completed', () => { + const dep = task('dep', { status: 'completed' }) + const t = task('a', { dependsOn: ['dep'] }) + expect(isTaskReady(t, [dep, t])).toBe(true) + }) + + it('returns false when a dependency is not yet completed', () => { + const dep = task('dep', { status: 'in_progress' }) + const t = task('a', { dependsOn: ['dep'] }) + expect(isTaskReady(t, [dep, t])).toBe(false) + }) + + it('returns false when a dependency is missing from the task set', () => { + const t = task('a', { dependsOn: ['ghost'] }) + expect(isTaskReady(t, [t])).toBe(false) + }) +}) + +// --------------------------------------------------------------------------- +// getTaskDependencyOrder +// --------------------------------------------------------------------------- + +describe('getTaskDependencyOrder', () => { + it('returns empty array for empty input', () => { + expect(getTaskDependencyOrder([])).toEqual([]) + }) + + it('returns tasks with no deps first', () => { + const a = task('a') + const b = task('b', { dependsOn: ['a'] }) + const ordered = getTaskDependencyOrder([b, a]) + expect(ordered[0].id).toBe('a') + expect(ordered[1].id).toBe('b') + }) + + it('handles a diamond dependency (a → b,c → d)', () => { + const a = task('a') + const b = task('b', { dependsOn: ['a'] }) + const c = task('c', { dependsOn: ['a'] }) + const d = task('d', { dependsOn: ['b', 'c'] }) + + const ordered = getTaskDependencyOrder([d, c, b, a]) + const ids = ordered.map((t) => t.id) + + // a must come before b and c; b and c must come before d + expect(ids.indexOf('a')).toBeLessThan(ids.indexOf('b')) + expect(ids.indexOf('a')).toBeLessThan(ids.indexOf('c')) + expect(ids.indexOf('b')).toBeLessThan(ids.indexOf('d')) + expect(ids.indexOf('c')).toBeLessThan(ids.indexOf('d')) + }) + + it('returns partial result when a cycle exists', () => { + const a = task('a', { dependsOn: ['b'] }) + const b = task('b', { dependsOn: ['a'] }) + const ordered = getTaskDependencyOrder([a, b]) + // Neither can be ordered — result should be empty (or partial) + expect(ordered.length).toBeLessThan(2) + }) +}) + +// --------------------------------------------------------------------------- +// validateTaskDependencies +// --------------------------------------------------------------------------- + +describe('validateTaskDependencies', () => { + it('returns valid for tasks with no deps', () => { + const result = validateTaskDependencies([task('a'), task('b')]) + expect(result.valid).toBe(true) + expect(result.errors).toHaveLength(0) + }) + + it('detects self-dependency', () => { + const t = task('a', { dependsOn: ['a'] }) + const result = validateTaskDependencies([t]) + expect(result.valid).toBe(false) + expect(result.errors[0]).toContain('depends on itself') + }) + + it('detects unknown dependency', () => { + const t = task('a', { dependsOn: ['ghost'] }) + const result = validateTaskDependencies([t]) + expect(result.valid).toBe(false) + expect(result.errors[0]).toContain('unknown dependency') + }) + + it('detects a cycle (a → b → a)', () => { + const a = task('a', { dependsOn: ['b'] }) + const b = task('b', { dependsOn: ['a'] }) + const result = validateTaskDependencies([a, b]) + expect(result.valid).toBe(false) + expect(result.errors.some((e) => e.toLowerCase().includes('cyclic'))).toBe(true) + }) + + it('detects a longer cycle (a → b → c → a)', () => { + const a = task('a', { dependsOn: ['c'] }) + const b = task('b', { dependsOn: ['a'] }) + const c = task('c', { dependsOn: ['b'] }) + const result = validateTaskDependencies([a, b, c]) + expect(result.valid).toBe(false) + }) +}) diff --git a/tests/tool-executor.test.ts b/tests/tool-executor.test.ts new file mode 100644 index 0000000..afa7cb6 --- /dev/null +++ b/tests/tool-executor.test.ts @@ -0,0 +1,193 @@ +import { describe, it, expect, vi } from 'vitest' +import { z } from 'zod' +import { ToolRegistry, defineTool } from '../src/tool/framework.js' +import { ToolExecutor } from '../src/tool/executor.js' +import type { ToolUseContext } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +const dummyContext: ToolUseContext = { + agent: { name: 'test-agent', role: 'tester', model: 'test-model' }, +} + +function echoTool() { + return defineTool({ + name: 'echo', + description: 'Echoes the message.', + inputSchema: z.object({ message: z.string() }), + execute: async ({ message }) => ({ data: message, isError: false }), + }) +} + +function failTool() { + return defineTool({ + name: 'fail', + description: 'Always throws.', + inputSchema: z.object({}), + execute: async () => { + throw new Error('intentional failure') + }, + }) +} + +function makeExecutor(...tools: ReturnType[]) { + const registry = new ToolRegistry() + for (const t of tools) registry.register(t) + return { executor: new ToolExecutor(registry), registry } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('ToolExecutor', () => { + // ------------------------------------------------------------------------- + // Single execution + // ------------------------------------------------------------------------- + + it('executes a tool and returns its result', async () => { + const { executor } = makeExecutor(echoTool()) + const result = await executor.execute('echo', { message: 'hello' }, dummyContext) + expect(result.data).toBe('hello') + expect(result.isError).toBeFalsy() + }) + + it('returns an error result for an unknown tool', async () => { + const { executor } = makeExecutor() + const result = await executor.execute('ghost', {}, dummyContext) + expect(result.isError).toBe(true) + expect(result.data).toContain('not registered') + }) + + it('returns an error result when Zod validation fails', async () => { + const { executor } = makeExecutor(echoTool()) + // 'message' is required but missing + const result = await executor.execute('echo', {}, dummyContext) + expect(result.isError).toBe(true) + expect(result.data).toContain('Invalid input') + }) + + it('catches tool execution errors and returns them as error results', async () => { + const { executor } = makeExecutor(failTool()) + const result = await executor.execute('fail', {}, dummyContext) + expect(result.isError).toBe(true) + expect(result.data).toContain('intentional failure') + }) + + it('returns an error result when aborted before execution', async () => { + const { executor } = makeExecutor(echoTool()) + const controller = new AbortController() + controller.abort() + + const result = await executor.execute( + 'echo', + { message: 'hi' }, + { ...dummyContext, abortSignal: controller.signal }, + ) + expect(result.isError).toBe(true) + expect(result.data).toContain('aborted') + }) + + // ------------------------------------------------------------------------- + // Batch execution + // ------------------------------------------------------------------------- + + it('executeBatch runs multiple tools and returns a map of results', async () => { + const { executor } = makeExecutor(echoTool()) + const results = await executor.executeBatch( + [ + { id: 'c1', name: 'echo', input: { message: 'a' } }, + { id: 'c2', name: 'echo', input: { message: 'b' } }, + ], + dummyContext, + ) + + expect(results.size).toBe(2) + expect(results.get('c1')!.data).toBe('a') + expect(results.get('c2')!.data).toBe('b') + }) + + it('executeBatch isolates errors — one failure does not affect others', async () => { + const { executor } = makeExecutor(echoTool(), failTool()) + const results = await executor.executeBatch( + [ + { id: 'ok', name: 'echo', input: { message: 'fine' } }, + { id: 'bad', name: 'fail', input: {} }, + ], + dummyContext, + ) + + expect(results.get('ok')!.isError).toBeFalsy() + expect(results.get('bad')!.isError).toBe(true) + }) + + // ------------------------------------------------------------------------- + // Concurrency control + // ------------------------------------------------------------------------- + + it('respects maxConcurrency limit', async () => { + let peak = 0 + let running = 0 + + const trackTool = defineTool({ + name: 'track', + description: 'Tracks concurrency.', + inputSchema: z.object({}), + execute: async () => { + running++ + peak = Math.max(peak, running) + await new Promise((r) => setTimeout(r, 50)) + running-- + return { data: 'ok', isError: false } + }, + }) + + const registry = new ToolRegistry() + registry.register(trackTool) + const executor = new ToolExecutor(registry, { maxConcurrency: 2 }) + + await executor.executeBatch( + Array.from({ length: 5 }, (_, i) => ({ id: `t${i}`, name: 'track', input: {} })), + dummyContext, + ) + + expect(peak).toBeLessThanOrEqual(2) + }) +}) + +// --------------------------------------------------------------------------- +// ToolRegistry +// --------------------------------------------------------------------------- + +describe('ToolRegistry', () => { + it('registers and retrieves a tool', () => { + const registry = new ToolRegistry() + registry.register(echoTool()) + expect(registry.get('echo')).toBeDefined() + expect(registry.has('echo')).toBe(true) + }) + + it('throws on duplicate registration', () => { + const registry = new ToolRegistry() + registry.register(echoTool()) + expect(() => registry.register(echoTool())).toThrow('already registered') + }) + + it('unregister removes the tool', () => { + const registry = new ToolRegistry() + registry.register(echoTool()) + registry.unregister('echo') + expect(registry.has('echo')).toBe(false) + }) + + it('toToolDefs produces JSON schema representations', () => { + const registry = new ToolRegistry() + registry.register(echoTool()) + const defs = registry.toToolDefs() + expect(defs).toHaveLength(1) + expect(defs[0].name).toBe('echo') + expect(defs[0].inputSchema).toHaveProperty('properties') + }) +}) diff --git a/tests/trace.test.ts b/tests/trace.test.ts new file mode 100644 index 0000000..fbeb78c --- /dev/null +++ b/tests/trace.test.ts @@ -0,0 +1,453 @@ +import { describe, it, expect, vi } from 'vitest' +import { z } from 'zod' +import { Agent } from '../src/agent/agent.js' +import { AgentRunner, type RunOptions } from '../src/agent/runner.js' +import { ToolRegistry, defineTool } from '../src/tool/framework.js' +import { ToolExecutor } from '../src/tool/executor.js' +import { executeWithRetry } from '../src/orchestrator/orchestrator.js' +import { emitTrace, generateRunId } from '../src/utils/trace.js' +import { createTask } from '../src/task/task.js' +import type { + AgentConfig, + AgentRunResult, + LLMAdapter, + LLMResponse, + TraceEvent, +} from '../src/types.js' + +// --------------------------------------------------------------------------- +// Mock adapters +// --------------------------------------------------------------------------- + +function mockAdapter(responses: LLMResponse[]): LLMAdapter { + let callIndex = 0 + return { + name: 'mock', + async chat() { + return responses[callIndex++]! + }, + async *stream() { + /* unused */ + }, + } +} + +function textResponse(text: string): LLMResponse { + return { + id: `resp-${Math.random().toString(36).slice(2)}`, + content: [{ type: 'text' as const, text }], + model: 'mock-model', + stop_reason: 'end_turn', + usage: { input_tokens: 10, output_tokens: 20 }, + } +} + +function toolUseResponse(toolName: string, input: Record): LLMResponse { + return { + id: `resp-${Math.random().toString(36).slice(2)}`, + content: [ + { + type: 'tool_use' as const, + id: `tu-${Math.random().toString(36).slice(2)}`, + name: toolName, + input, + }, + ], + model: 'mock-model', + stop_reason: 'tool_use', + usage: { input_tokens: 15, output_tokens: 25 }, + } +} + +function buildMockAgent( + config: AgentConfig, + responses: LLMResponse[], + registry?: ToolRegistry, + executor?: ToolExecutor, +): Agent { + const reg = registry ?? new ToolRegistry() + const exec = executor ?? new ToolExecutor(reg) + const adapter = mockAdapter(responses) + const agent = new Agent(config, reg, exec) + + const runner = new AgentRunner(adapter, reg, exec, { + model: config.model, + systemPrompt: config.systemPrompt, + maxTurns: config.maxTurns, + maxTokens: config.maxTokens, + temperature: config.temperature, + agentName: config.name, + }) + ;(agent as any).runner = runner + + return agent +} + +// --------------------------------------------------------------------------- +// emitTrace helper +// --------------------------------------------------------------------------- + +describe('emitTrace', () => { + it('does nothing when fn is undefined', () => { + // Should not throw + emitTrace(undefined, { + type: 'agent', + runId: 'r1', + agent: 'a', + turns: 1, + tokens: { input_tokens: 0, output_tokens: 0 }, + toolCalls: 0, + startMs: 0, + endMs: 0, + durationMs: 0, + }) + }) + + it('calls fn with the event', () => { + const fn = vi.fn() + const event: TraceEvent = { + type: 'agent', + runId: 'r1', + agent: 'a', + turns: 1, + tokens: { input_tokens: 0, output_tokens: 0 }, + toolCalls: 0, + startMs: 0, + endMs: 0, + durationMs: 0, + } + emitTrace(fn, event) + expect(fn).toHaveBeenCalledWith(event) + }) + + it('swallows errors thrown by callback', () => { + const fn = () => { throw new Error('boom') } + expect(() => + emitTrace(fn, { + type: 'agent', + runId: 'r1', + agent: 'a', + turns: 1, + tokens: { input_tokens: 0, output_tokens: 0 }, + toolCalls: 0, + startMs: 0, + endMs: 0, + durationMs: 0, + }), + ).not.toThrow() + }) + + it('swallows rejected promises from async callbacks', async () => { + // An async onTrace that rejects should not produce unhandled rejection + const fn = async () => { throw new Error('async boom') } + emitTrace(fn as unknown as (event: TraceEvent) => void, { + type: 'agent', + runId: 'r1', + agent: 'a', + turns: 1, + tokens: { input_tokens: 0, output_tokens: 0 }, + toolCalls: 0, + startMs: 0, + endMs: 0, + durationMs: 0, + }) + // If the rejection is not caught, vitest will fail with unhandled rejection. + // Give the microtask queue a tick to surface any unhandled rejection. + await new Promise(resolve => setTimeout(resolve, 10)) + }) +}) + +describe('generateRunId', () => { + it('returns a UUID string', () => { + const id = generateRunId() + expect(id).toMatch(/^[0-9a-f-]{36}$/) + }) + + it('returns unique IDs', () => { + const ids = new Set(Array.from({ length: 100 }, generateRunId)) + expect(ids.size).toBe(100) + }) +}) + +// --------------------------------------------------------------------------- +// AgentRunner trace events +// --------------------------------------------------------------------------- + +describe('AgentRunner trace events', () => { + it('emits llm_call trace for each LLM turn', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + const executor = new ToolExecutor(registry) + const adapter = mockAdapter([textResponse('Hello!')]) + + const runner = new AgentRunner(adapter, registry, executor, { + model: 'test-model', + agentName: 'test-agent', + }) + + const runOptions: RunOptions = { + onTrace: (e) => traces.push(e), + runId: 'run-1', + traceAgent: 'test-agent', + } + + await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'hi' }] }], + runOptions, + ) + + const llmTraces = traces.filter(t => t.type === 'llm_call') + expect(llmTraces).toHaveLength(1) + + const llm = llmTraces[0]! + expect(llm.type).toBe('llm_call') + expect(llm.runId).toBe('run-1') + expect(llm.agent).toBe('test-agent') + expect(llm.model).toBe('test-model') + expect(llm.turn).toBe(1) + expect(llm.tokens).toEqual({ input_tokens: 10, output_tokens: 20 }) + expect(llm.durationMs).toBeGreaterThanOrEqual(0) + expect(llm.startMs).toBeLessThanOrEqual(llm.endMs) + }) + + it('emits tool_call trace with correct fields', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'echo', + description: 'echoes', + inputSchema: z.object({ msg: z.string() }), + execute: async ({ msg }) => ({ data: msg }), + }), + ) + const executor = new ToolExecutor(registry) + const adapter = mockAdapter([ + toolUseResponse('echo', { msg: 'hello' }), + textResponse('Done'), + ]) + + const runner = new AgentRunner(adapter, registry, executor, { + model: 'test-model', + agentName: 'tooler', + }) + + await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'test' }] }], + { onTrace: (e) => traces.push(e), runId: 'run-2', traceAgent: 'tooler' }, + ) + + const toolTraces = traces.filter(t => t.type === 'tool_call') + expect(toolTraces).toHaveLength(1) + + const tool = toolTraces[0]! + expect(tool.type).toBe('tool_call') + expect(tool.runId).toBe('run-2') + expect(tool.agent).toBe('tooler') + expect(tool.tool).toBe('echo') + expect(tool.isError).toBe(false) + expect(tool.durationMs).toBeGreaterThanOrEqual(0) + }) + + it('tool_call trace has isError: true on tool failure', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'boom', + description: 'fails', + inputSchema: z.object({}), + execute: async () => { throw new Error('fail') }, + }), + ) + const executor = new ToolExecutor(registry) + const adapter = mockAdapter([ + toolUseResponse('boom', {}), + textResponse('Handled'), + ]) + + const runner = new AgentRunner(adapter, registry, executor, { + model: 'test-model', + agentName: 'err-agent', + }) + + await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'test' }] }], + { onTrace: (e) => traces.push(e), runId: 'run-3', traceAgent: 'err-agent' }, + ) + + const toolTraces = traces.filter(t => t.type === 'tool_call') + expect(toolTraces).toHaveLength(1) + expect(toolTraces[0]!.isError).toBe(true) + }) + + it('does not call Date.now for LLM timing when onTrace is absent', async () => { + // This test just verifies no errors occur when onTrace is not provided + const registry = new ToolRegistry() + const executor = new ToolExecutor(registry) + const adapter = mockAdapter([textResponse('hi')]) + + const runner = new AgentRunner(adapter, registry, executor, { + model: 'test-model', + }) + + const result = await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'test' }] }], + {}, + ) + + expect(result.output).toBe('hi') + }) +}) + +// --------------------------------------------------------------------------- +// Agent-level trace events +// --------------------------------------------------------------------------- + +describe('Agent trace events', () => { + it('emits agent trace with turns, tokens, and toolCalls', async () => { + const traces: TraceEvent[] = [] + const config: AgentConfig = { + name: 'my-agent', + model: 'mock-model', + systemPrompt: 'You are a test.', + } + + const agent = buildMockAgent(config, [textResponse('Hello world')]) + + const runOptions: Partial = { + onTrace: (e) => traces.push(e), + runId: 'run-agent-1', + traceAgent: 'my-agent', + } + + const result = await agent.run('Say hello', runOptions) + expect(result.success).toBe(true) + + const agentTraces = traces.filter(t => t.type === 'agent') + expect(agentTraces).toHaveLength(1) + + const at = agentTraces[0]! + expect(at.type).toBe('agent') + expect(at.runId).toBe('run-agent-1') + expect(at.agent).toBe('my-agent') + expect(at.turns).toBe(1) // one assistant message + expect(at.tokens).toEqual({ input_tokens: 10, output_tokens: 20 }) + expect(at.toolCalls).toBe(0) + expect(at.durationMs).toBeGreaterThanOrEqual(0) + }) + + it('all traces share the same runId', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'greet', + description: 'greets', + inputSchema: z.object({ name: z.string() }), + execute: async ({ name }) => ({ data: `Hi ${name}` }), + }), + ) + const executor = new ToolExecutor(registry) + const config: AgentConfig = { + name: 'multi-trace-agent', + model: 'mock-model', + tools: ['greet'], + } + + const agent = buildMockAgent( + config, + [ + toolUseResponse('greet', { name: 'world' }), + textResponse('Done'), + ], + registry, + executor, + ) + + const runId = 'shared-run-id' + await agent.run('test', { + onTrace: (e) => traces.push(e), + runId, + traceAgent: 'multi-trace-agent', + }) + + // Should have: 2 llm_call, 1 tool_call, 1 agent + expect(traces.length).toBeGreaterThanOrEqual(4) + + for (const trace of traces) { + expect(trace.runId).toBe(runId) + } + }) + + it('onTrace error does not break agent execution', async () => { + const config: AgentConfig = { + name: 'resilient-agent', + model: 'mock-model', + } + + const agent = buildMockAgent(config, [textResponse('OK')]) + + const result = await agent.run('test', { + onTrace: () => { throw new Error('callback exploded') }, + runId: 'run-err', + traceAgent: 'resilient-agent', + }) + + // The run should still succeed despite the broken callback + expect(result.success).toBe(true) + expect(result.output).toBe('OK') + }) + + it('per-turn token usage in llm_call traces', async () => { + const traces: TraceEvent[] = [] + const registry = new ToolRegistry() + registry.register( + defineTool({ + name: 'noop', + description: 'noop', + inputSchema: z.object({}), + execute: async () => ({ data: 'ok' }), + }), + ) + const executor = new ToolExecutor(registry) + + // Two LLM calls: first triggers a tool, second is the final response + const resp1: LLMResponse = { + id: 'r1', + content: [{ type: 'tool_use', id: 'tu1', name: 'noop', input: {} }], + model: 'mock-model', + stop_reason: 'tool_use', + usage: { input_tokens: 100, output_tokens: 50 }, + } + const resp2: LLMResponse = { + id: 'r2', + content: [{ type: 'text', text: 'Final answer' }], + model: 'mock-model', + stop_reason: 'end_turn', + usage: { input_tokens: 200, output_tokens: 100 }, + } + + const adapter = mockAdapter([resp1, resp2]) + const runner = new AgentRunner(adapter, registry, executor, { + model: 'mock-model', + agentName: 'token-agent', + }) + + await runner.run( + [{ role: 'user', content: [{ type: 'text', text: 'go' }] }], + { onTrace: (e) => traces.push(e), runId: 'run-tok', traceAgent: 'token-agent' }, + ) + + const llmTraces = traces.filter(t => t.type === 'llm_call') + expect(llmTraces).toHaveLength(2) + + // Each trace carries its own turn's token usage, not the aggregate + expect(llmTraces[0]!.tokens).toEqual({ input_tokens: 100, output_tokens: 50 }) + expect(llmTraces[1]!.tokens).toEqual({ input_tokens: 200, output_tokens: 100 }) + + // Turn numbers should be sequential + expect(llmTraces[0]!.turn).toBe(1) + expect(llmTraces[1]!.turn).toBe(2) + }) +})