Merge branch 'main' into Ollama-update

2026-04-03 10:15:24 -06:00 · 2026-04-03 10:15:24 -06:00 · d84765f553
parent 4e4918c4bb 071d5dce61
commit d84765f553
45 changed files with 5347 additions and 631 deletions
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@ -0,0 +1,40 @@
+---
+name: Bug Report
+about: Report a bug to help us improve
+title: "[Bug] "
+labels: bug
+assignees: ''
+---
+
+## Describe the bug
+
+A clear and concise description of what the bug is.
+
+## To Reproduce
+
+Steps to reproduce the behavior:
+
+1. Configure agent with '...'
+2. Call `runTeam(...)` with '...'
+3. See error
+
+## Expected behavior
+
+A clear description of what you expected to happen.
+
+## Error output
+
+```
+Paste any error messages or logs here
+```
+
+## Environment
+
+- OS: [e.g. macOS 14, Ubuntu 22.04]
+- Node.js version: [e.g. 20.11]
+- Package version: [e.g. 0.1.0]
+- LLM provider: [e.g. Anthropic, OpenAI]
+
+## Additional context
+
+Add any other context about the problem here.
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@ -0,0 +1,23 @@
+---
+name: Feature Request
+about: Suggest an idea for this project
+title: "[Feature] "
+labels: enhancement
+assignees: ''
+---
+
+## Problem
+
+A clear description of the problem or limitation you're experiencing.
+
+## Proposed Solution
+
+Describe what you'd like to happen.
+
+## Alternatives Considered
+
+Any alternative solutions or features you've considered.
+
+## Additional context
+
+Add any other context, code examples, or screenshots about the feature request here.
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@ -0,0 +1,14 @@
+## What
+
+<!-- What does this PR do? One or two sentences. -->
+
+## Why
+
+<!-- Why is this change needed? Link to an issue if applicable: Fixes #123 -->
+
+## Checklist
+
+- [ ] `npm run lint` passes
+- [ ] `npm test` passes
+- [ ] Added/updated tests for changed behavior
+- [ ] No new runtime dependencies (or justified in the PR description)
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -0,0 +1,23 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        node-version: [18, 20, 22]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: ${{ matrix.node-version }}
+          cache: npm
+      - run: npm ci
+      - run: npm run lint
+      - run: npm test
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,6 @@
 node_modules/
 dist/
+coverage/
 *.tgz
 .DS_Store
 promo-*.md
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -0,0 +1,80 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Commands
+
+```bash
+npm run build          # Compile TypeScript (src/ → dist/)
+npm run dev            # Watch mode compilation
+npm run lint           # Type-check only (tsc --noEmit)
+npm test               # Run all tests (vitest run)
+npm run test:watch     # Vitest watch mode
+```
+
+Tests live in `tests/` (vitest). Examples in `examples/` are standalone scripts requiring API keys (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`).
+
+## Architecture
+
+ES module TypeScript framework for multi-agent orchestration. Three runtime dependencies: `@anthropic-ai/sdk`, `openai`, `zod`.
+
+### Core Execution Flow
+
+**`OpenMultiAgent`** (`src/orchestrator/orchestrator.ts`) is the top-level public API with three execution modes:
+
+1. **`runAgent(config, prompt)`** — single agent, one-shot
+2. **`runTeam(team, goal)`** — automatic orchestration: a temporary "coordinator" agent decomposes the goal into a task DAG via LLM call, then tasks execute in dependency order
+3. **`runTasks(team, tasks)`** — explicit task pipeline with user-defined dependencies
+
+### The Coordinator Pattern (runTeam)
+
+This is the framework's key feature. When `runTeam()` is called:
+1. A coordinator agent receives the goal + agent roster and produces a JSON task array (title, description, assignee, dependsOn)
+2. `TaskQueue` resolves dependencies topologically — independent tasks run in parallel, dependent tasks wait
+3. `Scheduler` auto-assigns any unassigned tasks (strategies: `dependency-first` default, `round-robin`, `least-busy`, `capability-match`)
+4. Each task result is written to `SharedMemory` so subsequent agents see prior results
+5. The coordinator synthesizes all task results into a final output
+
+### Layer Map
+
+| Layer | Files | Responsibility |
+|-------|-------|----------------|
+| Orchestrator | `orchestrator/orchestrator.ts`, `orchestrator/scheduler.ts` | Top-level API, task decomposition, coordinator pattern |
+| Team | `team/team.ts`, `team/messaging.ts` | Agent roster, MessageBus (point-to-point + broadcast), SharedMemory binding |
+| Agent | `agent/agent.ts`, `agent/runner.ts`, `agent/pool.ts`, `agent/structured-output.ts` | Agent lifecycle (idle→running→completed/error), conversation loop, concurrency pool with Semaphore, structured output validation |
+| Task | `task/queue.ts`, `task/task.ts` | Dependency-aware queue, auto-unblock on completion, cascade failure to dependents |
+| Tool | `tool/framework.ts`, `tool/executor.ts`, `tool/built-in/` | `defineTool()` with Zod schemas, ToolRegistry, parallel batch execution with concurrency semaphore |
+| LLM | `llm/adapter.ts`, `llm/anthropic.ts`, `llm/openai.ts` | `LLMAdapter` interface (`chat` + `stream`), factory `createAdapter()` |
+| Memory | `memory/shared.ts`, `memory/store.ts` | Namespaced key-value store (`agentName/key`), markdown summary injection into prompts |
+| Types | `types.ts` | All interfaces in one file to avoid circular deps |
+| Exports | `index.ts` | Public API surface |
+
+### Agent Conversation Loop (AgentRunner)
+
+`AgentRunner.run()`: send messages → extract tool-use blocks → execute tools in parallel batch → append results → loop until `end_turn` or `maxTurns` exhausted. Accumulates `TokenUsage` across all turns.
+
+### Concurrency Control
+
+Two independent semaphores: `AgentPool` (max concurrent agent runs, default 5) and `ToolExecutor` (max concurrent tool calls, default 4).
+
+### Structured Output
+
+Optional `outputSchema` (Zod) on `AgentConfig`. When set, the agent's final output is parsed as JSON and validated. On validation failure, one retry with error feedback is attempted. Validated data is available via `result.structured`. Logic lives in `agent/structured-output.ts`, wired into `Agent.executeRun()`.
+
+### Task Retry
+
+Optional `maxRetries`, `retryDelayMs`, `retryBackoff` on task config (used via `runTasks()`). `executeWithRetry()` in `orchestrator.ts` handles the retry loop with exponential backoff (capped at 30s). Token usage is accumulated across all attempts. Emits `task_retry` event via `onProgress`.
+
+### Error Handling
+
+- Tool errors → caught, returned as `ToolResult(isError: true)`, never thrown
+- Task failures → retry if `maxRetries > 0`, then cascade to all dependents; independent tasks continue
+- LLM API errors → propagate to caller
+
+### Built-in Tools
+
+`bash`, `file_read`, `file_write`, `file_edit`, `grep` — registered via `registerBuiltInTools(registry)`.
+
+### Adding an LLM Adapter
+
+Implement `LLMAdapter` interface with `chat(messages, options)` and `stream(messages, options)`, then register in `createAdapter()` factory in `src/llm/adapter.ts`.
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@ -0,0 +1,48 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a positive experience for everyone, regardless of background or
+identity.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment:
+
+- Using welcoming and inclusive language
+- Being respectful of differing viewpoints and experiences
+- Gracefully accepting constructive feedback
+- Focusing on what is best for the community
+- Showing empathy towards other community members
+
+Examples of unacceptable behavior:
+
+- Trolling, insulting or derogatory comments, and personal attacks
+- Public or private unwelcome conduct
+- Publishing others' private information without explicit permission
+- Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate or harmful.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+
+## Enforcement
+
+Instances of unacceptable behavior may be reported to the community leaders
+responsible for enforcement at **jack@yuanasi.com**. All complaints will be
+reviewed and investigated promptly and fairly.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org),
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html).
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1,72 @@
+# Contributing
+
+Thanks for your interest in contributing to Open Multi-Agent! This guide covers the basics to get you started.
+
+## Setup
+
+```bash
+git clone https://github.com/JackChen-me/open-multi-agent.git
+cd open-multi-agent
+npm install
+```
+
+Requires Node.js >= 18.
+
+## Development Commands
+
+```bash
+npm run build        # Compile TypeScript (src/ → dist/)
+npm run dev          # Watch mode compilation
+npm run lint         # Type-check (tsc --noEmit)
+npm test             # Run all tests (vitest)
+npm run test:watch   # Vitest watch mode
+```
+
+## Running Tests
+
+All tests live in `tests/`. They test core modules (TaskQueue, SharedMemory, ToolExecutor, Semaphore) without requiring API keys or network access.
+
+```bash
+npm test
+```
+
+Every PR must pass `npm run lint && npm test`. CI runs both automatically on Node 18, 20, and 22.
+
+## Making a Pull Request
+
+1. Fork the repo and create a branch from `main`
+2. Make your changes
+3. Add or update tests if you changed behavior
+4. Run `npm run lint && npm test` locally
+5. Open a PR against `main`
+
+### PR Checklist
+
+- [ ] `npm run lint` passes
+- [ ] `npm test` passes
+- [ ] New behavior has test coverage
+- [ ] Linked to a relevant issue (if one exists)
+
+## Code Style
+
+- TypeScript strict mode, ES modules (`.js` extensions in imports)
+- No additional linter/formatter configured — follow existing patterns
+- Keep dependencies minimal (currently 3 runtime deps: `@anthropic-ai/sdk`, `openai`, `zod`)
+
+## Architecture Overview
+
+See the [README](./README.md#architecture) for an architecture diagram. Key entry points:
+
+- **Orchestrator**: `src/orchestrator/orchestrator.ts` — top-level API
+- **Task system**: `src/task/queue.ts`, `src/task/task.ts` — dependency DAG
+- **Agent**: `src/agent/runner.ts` — conversation loop
+- **Tools**: `src/tool/framework.ts`, `src/tool/executor.ts` — tool registry and execution
+- **LLM adapters**: `src/llm/` — Anthropic, OpenAI, Copilot
+
+## Where to Contribute
+
+Check the [issues](https://github.com/JackChen-me/open-multi-agent/issues) page. Issues labeled `good first issue` are scoped and approachable. Issues labeled `help wanted` are larger but well-defined.
+
+## License
+
+By contributing, you agree that your contributions will be licensed under the MIT License.
--- a/DECISIONS.md
+++ b/DECISIONS.md
@ -0,0 +1,43 @@
+# Architecture Decisions
+
+This document records deliberate "won't do" decisions for the project. These are features we evaluated and chose NOT to implement — not because they're bad ideas, but because they conflict with our positioning as the **simplest multi-agent framework**.
+
+If you're considering a PR in any of these areas, please open a discussion first.
+
+## Won't Do
+
+### 1. Agent Handoffs
+
+**What**: Agent A transfers an in-progress conversation to Agent B (like OpenAI Agents SDK `handoff()`).
+
+**Why not**: Handoffs are a different paradigm from our task-based model. Our tasks have clear boundaries — one agent, one task, one result. Handoffs blur those boundaries and add state-transfer complexity. Users who need handoffs likely need a different framework (OpenAI Agents SDK is purpose-built for this).
+
+### 2. State Persistence / Checkpointing
+
+**What**: Save workflow state to a database so long-running workflows can resume after crashes (like LangGraph checkpointing).
+
+**Why not**: Requires a storage backend (SQLite, Redis, Postgres), schema migrations, and serialization logic. This is enterprise infrastructure — it triples the complexity surface. Our target users run workflows that complete in seconds to minutes, not hours. If you need checkpointing, LangGraph is the right tool.
+
+**Related**: Closing #20 with this rationale.
+
+### 3. A2A Protocol (Agent-to-Agent)
+
+**What**: Google's open protocol for agents on different servers to discover and communicate with each other.
+
+**Why not**: Too early — the spec is still evolving and adoption is minimal. Our users run agents in a single process, not across distributed services. If A2A matures and there's real demand, we can revisit. Today it would add complexity for zero practical benefit.
+
+### 4. MCP Integration (Model Context Protocol)
+
+**What**: Anthropic's protocol for connecting LLMs to external tools and data sources.
+
+**Why not**: MCP is valuable but targets a different layer. Our `defineTool()` API already lets users wrap any external service as a tool in ~10 lines of code. Adding MCP would mean maintaining protocol compatibility, transport layers, and tool discovery — complexity that serves tool platform builders, not our target users who just want to run agent teams.
+
+### 5. Dashboard / Visualization
+
+**What**: Built-in web UI to visualize task DAGs, agent activity, and token usage.
+
+**Why not**: We expose data, we don't build UI. The `onProgress` callback and upcoming `onTrace` (#18) give users all the raw data. They can pipe it into Grafana, build a custom dashboard, or use console logs. Shipping a web UI means owning a frontend stack, which is outside our scope.
+
+---
+
+*Last updated: 2026-04-03*
--- a/README.md
+++ b/README.md
@ -1,6 +1,8 @@
 # Open Multi-Agent

-Build AI agent teams that work together. One agent plans, another implements, a third reviews — the framework handles task scheduling, dependencies, and communication automatically.
+TypeScript framework for multi-agent orchestration. One `runTeam()` call from goal to result — the framework decomposes it into tasks, resolves dependencies, and runs agents in parallel.
+
+3 runtime dependencies · 27 source files · Deploys anywhere Node.js runs · Mentioned in [Latent Space](https://www.latent.space/p/ainews-a-quiet-april-fools) AI News

 [![GitHub stars](https://img.shields.io/github/stars/JackChen-me/open-multi-agent)](https://github.com/JackChen-me/open-multi-agent/stargazers)
 [![license](https://img.shields.io/github/license/JackChen-me/open-multi-agent)](./LICENSE)
@ -10,40 +12,26 @@ Build AI agent teams that work together. One agent plans, another implements, a

 ## Why Open Multi-Agent?

- **Multi-Agent Teams** — Define agents with different roles, tools, and even different models. They collaborate through a message bus and shared memory.
- **Task DAG Scheduling** — Tasks have dependencies. The framework resolves them topologically — dependent tasks wait, independent tasks run in parallel.
- **Model Agnostic** — Claude and GPT in the same team. Swap models per agent. Bring your own adapter for any LLM.
- **In-Process Execution** — No subprocess overhead. Everything runs in one Node.js process. Deploy to serverless, Docker, CI/CD.
+- **Goal In, Result Out** — `runTeam(team, "Build a REST API")`. A coordinator agent auto-decomposes the goal into a task DAG with dependencies and assignees, runs independent tasks in parallel, and synthesizes the final output. No manual task definitions or graph wiring required.
+- **TypeScript-Native** — Built for the Node.js ecosystem. `npm install`, import, run. No Python runtime, no subprocess bridge, no sidecar services. Embed in Express, Next.js, serverless functions, or CI/CD pipelines.
+- **Auditable and Lightweight** — 3 runtime dependencies (`@anthropic-ai/sdk`, `openai`, `zod`). 27 source files. The entire codebase is readable in an afternoon.
+- **Model Agnostic** — Claude, GPT, Gemma 4, and local models (Ollama, vLLM, LM Studio) in the same team. Swap models per agent via `baseURL`.
+- **Multi-Agent Collaboration** — Agents with different roles, tools, and models collaborate through a message bus and shared memory.
+- **Structured Output** — Add `outputSchema` (Zod) to any agent. Output is parsed as JSON, validated, and auto-retried once on failure. Access typed results via `result.structured`.
+- **Task Retry** — Set `maxRetries` on tasks for automatic retry with exponential backoff. Failed attempts accumulate token usage for accurate billing.
+- **Observability** — Optional `onTrace` callback emits structured spans for every LLM call, tool execution, task, and agent run — with timing, token usage, and a shared `runId` for correlation. Zero overhead when not subscribed, zero extra dependencies.

 ## Quick Start

+Requires Node.js >= 18.
+
 ```bash
 npm install @jackchen_me/open-multi-agent
 ```

-Set `ANTHROPIC_API_KEY` (and optionally `OPENAI_API_KEY`) in your environment.
+Set `ANTHROPIC_API_KEY` (and optionally `OPENAI_API_KEY` or `GITHUB_TOKEN` for Copilot) in your environment. Local models via Ollama require no API key — see [example 06](examples/06-local-model.ts).

-```typescript
-import { OpenMultiAgent } from '@jackchen_me/open-multi-agent'
-
-const orchestrator = new OpenMultiAgent({ defaultModel: 'claude-sonnet-4-6' })
-
-// One agent, one task
-const result = await orchestrator.runAgent(
-  {
-    name: 'coder',
-    model: 'claude-sonnet-4-6',
-    tools: ['bash', 'file_write'],
-  },
-  'Write a TypeScript function that reverses a string, save it to /tmp/reverse.ts, and run it.',
-)
-
-console.log(result.output)
-```
-
-## Multi-Agent Team
-
-This is where it gets interesting. Three agents, one goal:
+Three agents, one goal — the framework handles the rest:

 ```typescript
 import { OpenMultiAgent } from '@jackchen_me/open-multi-agent'
@ -88,132 +76,52 @@ console.log(`Success: ${result.success}`)
 console.log(`Tokens: ${result.totalTokenUsage.output_tokens} output tokens`)
 ```

-## More Examples
+What happens under the hood:

-<details>
-<summary><b>Task Pipeline</b> — explicit control over task graph and assignments</summary>
-
-```typescript
-const result = await orchestrator.runTasks(team, [
-  {
-    title: 'Design the data model',
-    description: 'Write a TypeScript interface spec to /tmp/spec.md',
-    assignee: 'architect',
-  },
-  {
-    title: 'Implement the module',
-    description: 'Read /tmp/spec.md and implement the module in /tmp/src/',
-    assignee: 'developer',
-    dependsOn: ['Design the data model'], // blocked until design completes
-  },
-  {
-    title: 'Write tests',
-    description: 'Read the implementation and write Vitest tests.',
-    assignee: 'developer',
-    dependsOn: ['Implement the module'],
-  },
-  {
-    title: 'Review code',
-    description: 'Review /tmp/src/ and produce a structured code review.',
-    assignee: 'reviewer',
-    dependsOn: ['Implement the module'], // can run in parallel with tests
-  },
-])
+```
+agent_start coordinator
+task_start architect
+task_complete architect
+task_start developer
+task_start developer              // independent tasks run in parallel
+task_complete developer
+task_start reviewer               // unblocked after implementation
+task_complete developer
+task_complete reviewer
+agent_complete coordinator        // synthesizes final result
+Success: true
+Tokens: 12847 output tokens
 ```

-</details>
+## Three Ways to Run

-<details>
-<summary><b>Custom Tools</b> — define tools with Zod schemas</summary>
+| Mode | Method | When to use |
+|------|--------|-------------|
+| Single agent | `runAgent()` | One agent, one prompt — simplest entry point |
+| Auto-orchestrated team | `runTeam()` | Give a goal, framework plans and executes |
+| Explicit pipeline | `runTasks()` | You define the task graph and assignments |

-```typescript
-import { z } from 'zod'
-import { defineTool, Agent, ToolRegistry, ToolExecutor, registerBuiltInTools } from '@jackchen_me/open-multi-agent'
+## Examples

-const searchTool = defineTool({
-  name: 'web_search',
-  description: 'Search the web and return the top results.',
-  inputSchema: z.object({
-    query: z.string().describe('The search query.'),
-    maxResults: z.number().optional().describe('Number of results (default 5).'),
-  }),
-  execute: async ({ query, maxResults = 5 }) => {
-    const results = await mySearchProvider(query, maxResults)
-    return { data: JSON.stringify(results), isError: false }
-  },
-})
+All examples are runnable scripts in [`examples/`](./examples/). Run any of them with `npx tsx`:

-const registry = new ToolRegistry()
-registerBuiltInTools(registry)
-registry.register(searchTool)
-
-const executor = new ToolExecutor(registry)
-const agent = new Agent(
-  { name: 'researcher', model: 'claude-sonnet-4-6', tools: ['web_search'] },
-  registry,
-  executor,
-)
-
-const result = await agent.run('Find the three most recent TypeScript releases.')
+```bash
+npx tsx examples/01-single-agent.ts
 ```

-</details>
-
-<details>
-<summary><b>Multi-Model Teams</b> — mix Claude and GPT in one workflow</summary>
-
-```typescript
-const claudeAgent: AgentConfig = {
-  name: 'strategist',
-  model: 'claude-opus-4-6',
-  provider: 'anthropic',
-  systemPrompt: 'You plan high-level approaches.',
-  tools: ['file_write'],
-}
-
-const gptAgent: AgentConfig = {
-  name: 'implementer',
-  model: 'gpt-5.4',
-  provider: 'openai',
-  systemPrompt: 'You implement plans as working code.',
-  tools: ['bash', 'file_read', 'file_write'],
-}
-
-const team = orchestrator.createTeam('mixed-team', {
-  name: 'mixed-team',
-  agents: [claudeAgent, gptAgent],
-  sharedMemory: true,
-})
-
-const result = await orchestrator.runTeam(team, 'Build a CLI tool that converts JSON to CSV.')
-```
-
-</details>
-
-<details>
-<summary><b>Streaming Output</b></summary>
-
-```typescript
-import { Agent, ToolRegistry, ToolExecutor, registerBuiltInTools } from '@jackchen_me/open-multi-agent'
-
-const registry = new ToolRegistry()
-registerBuiltInTools(registry)
-const executor = new ToolExecutor(registry)
-
-const agent = new Agent(
-  { name: 'writer', model: 'claude-sonnet-4-6', maxTurns: 3 },
-  registry,
-  executor,
-)
-
-for await (const event of agent.stream('Explain monads in two sentences.')) {
-  if (event.type === 'text' && typeof event.data === 'string') {
-    process.stdout.write(event.data)
-  }
-}
-```
-
-</details>
+| Example | What it shows |
+|---------|---------------|
+| [01 — Single Agent](examples/01-single-agent.ts) | `runAgent()` one-shot, `stream()` streaming, `prompt()` multi-turn |
+| [02 — Team Collaboration](examples/02-team-collaboration.ts) | `runTeam()` auto-orchestration with coordinator pattern |
+| [03 — Task Pipeline](examples/03-task-pipeline.ts) | `runTasks()` explicit dependency graph (design → implement → test + review) |
+| [04 — Multi-Model Team](examples/04-multi-model-team.ts) | `defineTool()` custom tools, mixed Anthropic + OpenAI providers, `AgentPool` |
+| [05 — Copilot](examples/05-copilot-test.ts) | GitHub Copilot as an LLM provider |
+| [06 — Local Model](examples/06-local-model.ts) | Ollama + Claude in one pipeline via `baseURL` (works with vLLM, LM Studio, etc.) |
+| [07 — Fan-Out / Aggregate](examples/07-fan-out-aggregate.ts) | `runParallel()` MapReduce — 3 analysts in parallel, then synthesize |
+| [08 — Gemma 4 Local](examples/08-gemma4-local.ts) | `runTasks()` + `runTeam()` with local Gemma 4 via Ollama — zero API cost |
+| [09 — Structured Output](examples/09-structured-output.ts) | `outputSchema` (Zod) on AgentConfig — validated JSON via `result.structured` |
+| [10 — Task Retry](examples/10-task-retry.ts) | `maxRetries` / `retryDelayMs` / `retryBackoff` with `task_retry` progress events |
+| [11 — Trace Observability](examples/11-trace-observability.ts) | `onTrace` callback — structured spans for LLM calls, tools, tasks, and agents |

 ## Architecture

@ -246,6 +154,7 @@ for await (const event of agent.stream('Explain monads in two sentences.')) {
 │  - prompt()       │───►│  LLMAdapter          │
 │  - stream()       │    │  - AnthropicAdapter  │
 └────────┬──────────┘    │  - OpenAIAdapter     │
+         │               │  - CopilotAdapter    │
         │               └──────────────────────┘
 ┌────────▼──────────┐
 │  AgentRunner      │    ┌──────────────────────┐
@ -265,17 +174,46 @@ for await (const event of agent.stream('Explain monads in two sentences.')) {
 | `file_edit` | Edit a file by replacing an exact string match. |
 | `grep` | Search file contents with regex. Uses ripgrep when available, falls back to Node.js. |

+## Supported Providers
+
+| Provider | Config | Env var | Status |
+|----------|--------|---------|--------|
+| Anthropic (Claude) | `provider: 'anthropic'` | `ANTHROPIC_API_KEY` | Verified |
+| OpenAI (GPT) | `provider: 'openai'` | `OPENAI_API_KEY` | Verified |
+| GitHub Copilot | `provider: 'copilot'` | `GITHUB_TOKEN` | Verified |
+| Ollama / vLLM / LM Studio | `provider: 'openai'` + `baseURL` | — | Verified |
+
+Verified local models with tool-calling: **Gemma 4** (see [example 08](examples/08-gemma4-local.ts)).
+
+Any OpenAI-compatible API should work via `provider: 'openai'` + `baseURL` (DeepSeek, Groq, Mistral, Qwen, MiniMax, etc.). These providers have not been fully verified yet — contributions welcome via [#25](https://github.com/JackChen-me/open-multi-agent/issues/25).
+
 ## Contributing

 Issues, feature requests, and PRs are welcome. Some areas where contributions would be especially valuable:

- **LLM Adapters** — Ollama, llama.cpp, vLLM, Gemini. The `LLMAdapter` interface requires just two methods: `chat()` and `stream()`.
+- **Provider integrations** — Verify and document OpenAI-compatible providers (DeepSeek, Groq, Qwen, MiniMax, etc.) via `baseURL`. See [#25](https://github.com/JackChen-me/open-multi-agent/issues/25). For providers that are NOT OpenAI-compatible (e.g. Gemini), a new `LLMAdapter` implementation is welcome — the interface requires just two methods: `chat()` and `stream()`.
 - **Examples** — Real-world workflows and use cases.
 - **Documentation** — Guides, tutorials, and API docs.

+## Author
+
+> JackChen — Ex PM (¥100M+ revenue), now indie builder. Follow on [X](https://x.com/JackChen_x) for AI Agent insights.
+
+## Contributors
+
+<a href="https://github.com/JackChen-me/open-multi-agent/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=JackChen-me/open-multi-agent" />
+</a>
+
 ## Star History

-[![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260402)](https://star-history.com/#JackChen-me/open-multi-agent&Date)
+<a href="https://star-history.com/#JackChen-me/open-multi-agent&Date">
+ <picture>
+   <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&theme=dark&v=20260403" />
+   <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260403" />
+   <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260403" />
+ </picture>
+</a>

 ## License

--- a/README_zh.md
+++ b/README_zh.md
@ -1,6 +1,8 @@
 # Open Multi-Agent

-构建能协同工作的 AI 智能体团队。一个智能体负责规划，一个负责实现，一个负责审查——框架自动处理任务调度、依赖关系和智能体间通信。
+TypeScript 多智能体编排框架。一次 `runTeam()` 调用从目标到结果——框架自动拆解任务、解析依赖、并行执行。
+
+3 个运行时依赖 · 27 个源文件 · Node.js 能跑的地方都能部署 · 被 [Latent Space](https://www.latent.space/p/ainews-a-quiet-april-fools) AI News 提及（AI 工程领域头部 Newsletter，17 万+订阅者）

 [![GitHub stars](https://img.shields.io/github/stars/JackChen-me/open-multi-agent)](https://github.com/JackChen-me/open-multi-agent/stargazers)
 [![license](https://img.shields.io/github/license/JackChen-me/open-multi-agent)](./LICENSE)
@ -10,40 +12,26 @@

 ## 为什么选择 Open Multi-Agent？

- **多智能体团队** — 定义不同角色、工具甚至不同模型的智能体。它们通过消息总线和共享内存协作。
- **任务 DAG 调度** — 任务之间存在依赖关系。框架进行拓扑排序——有依赖的任务等待，无依赖的任务并行执行。
- **模型无关** — Claude 和 GPT 可以在同一个团队中使用。每个智能体可以单独配置模型。你也可以为任何 LLM 编写自己的适配器。
- **进程内执行** — 没有子进程开销。所有内容在一个 Node.js 进程中运行。可部署到 Serverless、Docker、CI/CD。
+- **目标进，结果出** — `runTeam(team, "构建一个 REST API")`。协调者智能体自动将目标拆解为带依赖关系的任务图，分配给对应智能体，独立任务并行执行，最终合成输出。无需手动定义任务或编排流程图。
+- **TypeScript 原生** — 为 Node.js 生态而生。`npm install` 即用，无需 Python 运行时、无子进程桥接、无额外基础设施。可嵌入 Express、Next.js、Serverless 函数或 CI/CD 流水线。
+- **可审计、极轻量** — 3 个运行时依赖（`@anthropic-ai/sdk`、`openai`、`zod`），27 个源文件。一个下午就能读完全部源码。
+- **模型无关** — Claude、GPT、Gemma 4 和本地模型（Ollama、vLLM、LM Studio）可以在同一个团队中使用。通过 `baseURL` 即可接入任何 OpenAI 兼容服务。
+- **多智能体协作** — 定义不同角色、工具和模型的智能体，通过消息总线和共享内存协作。
+- **结构化输出** — 为任意智能体添加 `outputSchema`（Zod），输出自动解析为 JSON 并校验，校验失败自动重试一次。通过 `result.structured` 获取类型化结果。
+- **任务重试** — 为任务设置 `maxRetries`，失败时自动指数退避重试。所有尝试的 token 用量累计，确保计费准确。
+- **可观测性** — 可选的 `onTrace` 回调为每次 LLM 调用、工具执行、任务和智能体运行发出结构化 span 事件——包含耗时、token 用量和共享的 `runId` 用于关联追踪。未订阅时零开销，零额外依赖。

 ## 快速开始

+需要 Node.js >= 18。
+
 ```bash
 npm install @jackchen_me/open-multi-agent
 ```

-在环境变量中设置 `ANTHROPIC_API_KEY`（以及可选的 `OPENAI_API_KEY`）。
+在环境变量中设置 `ANTHROPIC_API_KEY`（以及可选的 `OPENAI_API_KEY` 或用于 Copilot 的 `GITHUB_TOKEN`）。通过 Ollama 使用本地模型无需 API key — 参见 [example 06](examples/06-local-model.ts)。

-```typescript
-import { OpenMultiAgent } from '@jackchen_me/open-multi-agent'
-
-const orchestrator = new OpenMultiAgent({ defaultModel: 'claude-sonnet-4-6' })
-
-// 一个智能体，一个任务
-const result = await orchestrator.runAgent(
-  {
-    name: 'coder',
-    model: 'claude-sonnet-4-6',
-    tools: ['bash', 'file_write'],
-  },
-  'Write a TypeScript function that reverses a string, save it to /tmp/reverse.ts, and run it.',
-)
-
-console.log(result.output)
-```
-
-## 多智能体团队
-
-这才是有意思的地方。三个智能体，一个目标：
+三个智能体，一个目标——框架处理剩下的一切：

 ```typescript
 import { OpenMultiAgent } from '@jackchen_me/open-multi-agent'
@ -88,132 +76,52 @@ console.log(`成功: ${result.success}`)
 console.log(`Token 用量: ${result.totalTokenUsage.output_tokens} output tokens`)
 ```

-## 更多示例
+执行过程：

-<details>
-<summary><b>任务流水线</b> — 显式控制任务图和分配</summary>
-
-```typescript
-const result = await orchestrator.runTasks(team, [
-  {
-    title: 'Design the data model',
-    description: 'Write a TypeScript interface spec to /tmp/spec.md',
-    assignee: 'architect',
-  },
-  {
-    title: 'Implement the module',
-    description: 'Read /tmp/spec.md and implement the module in /tmp/src/',
-    assignee: 'developer',
-    dependsOn: ['Design the data model'], // 等待设计完成后才开始
-  },
-  {
-    title: 'Write tests',
-    description: 'Read the implementation and write Vitest tests.',
-    assignee: 'developer',
-    dependsOn: ['Implement the module'],
-  },
-  {
-    title: 'Review code',
-    description: 'Review /tmp/src/ and produce a structured code review.',
-    assignee: 'reviewer',
-    dependsOn: ['Implement the module'], // 可以和测试并行执行
-  },
-])
+```
+agent_start coordinator
+task_start architect
+task_complete architect
+task_start developer
+task_start developer              // 无依赖的任务并行执行
+task_complete developer
+task_start reviewer               // 实现完成后自动解锁
+task_complete developer
+task_complete reviewer
+agent_complete coordinator        // 综合所有结果
+Success: true
+Tokens: 12847 output tokens
 ```

-</details>
+## 三种运行模式

-<details>
-<summary><b>自定义工具</b> — 使用 Zod schema 定义工具</summary>
+| 模式 | 方法 | 适用场景 |
+|------|------|----------|
+| 单智能体 | `runAgent()` | 一个智能体，一个提示词——最简入口 |
+| 自动编排团队 | `runTeam()` | 给一个目标，框架自动规划和执行 |
+| 显式任务管线 | `runTasks()` | 你自己定义任务图和分配 |

-```typescript
-import { z } from 'zod'
-import { defineTool, Agent, ToolRegistry, ToolExecutor, registerBuiltInTools } from '@jackchen_me/open-multi-agent'
+## 示例

-const searchTool = defineTool({
-  name: 'web_search',
-  description: 'Search the web and return the top results.',
-  inputSchema: z.object({
-    query: z.string().describe('The search query.'),
-    maxResults: z.number().optional().describe('Number of results (default 5).'),
-  }),
-  execute: async ({ query, maxResults = 5 }) => {
-    const results = await mySearchProvider(query, maxResults)
-    return { data: JSON.stringify(results), isError: false }
-  },
-})
+所有示例都是可运行脚本，位于 [`examples/`](./examples/) 目录。使用 `npx tsx` 运行：

-const registry = new ToolRegistry()
-registerBuiltInTools(registry)
-registry.register(searchTool)
-
-const executor = new ToolExecutor(registry)
-const agent = new Agent(
-  { name: 'researcher', model: 'claude-sonnet-4-6', tools: ['web_search'] },
-  registry,
-  executor,
-)
-
-const result = await agent.run('Find the three most recent TypeScript releases.')
+```bash
+npx tsx examples/01-single-agent.ts
 ```

-</details>
-
-<details>
-<summary><b>多模型团队</b> — 在一个工作流中混合使用 Claude 和 GPT</summary>
-
-```typescript
-const claudeAgent: AgentConfig = {
-  name: 'strategist',
-  model: 'claude-opus-4-6',
-  provider: 'anthropic',
-  systemPrompt: 'You plan high-level approaches.',
-  tools: ['file_write'],
-}
-
-const gptAgent: AgentConfig = {
-  name: 'implementer',
-  model: 'gpt-5.4',
-  provider: 'openai',
-  systemPrompt: 'You implement plans as working code.',
-  tools: ['bash', 'file_read', 'file_write'],
-}
-
-const team = orchestrator.createTeam('mixed-team', {
-  name: 'mixed-team',
-  agents: [claudeAgent, gptAgent],
-  sharedMemory: true,
-})
-
-const result = await orchestrator.runTeam(team, 'Build a CLI tool that converts JSON to CSV.')
-```
-
-</details>
-
-<details>
-<summary><b>流式输出</b></summary>
-
-```typescript
-import { Agent, ToolRegistry, ToolExecutor, registerBuiltInTools } from '@jackchen_me/open-multi-agent'
-
-const registry = new ToolRegistry()
-registerBuiltInTools(registry)
-const executor = new ToolExecutor(registry)
-
-const agent = new Agent(
-  { name: 'writer', model: 'claude-sonnet-4-6', maxTurns: 3 },
-  registry,
-  executor,
-)
-
-for await (const event of agent.stream('Explain monads in two sentences.')) {
-  if (event.type === 'text' && typeof event.data === 'string') {
-    process.stdout.write(event.data)
-  }
-}
-```
-
-</details>
+| 示例 | 展示内容 |
+|------|----------|
+| [01 — 单智能体](examples/01-single-agent.ts) | `runAgent()` 单次调用、`stream()` 流式输出、`prompt()` 多轮对话 |
+| [02 — 团队协作](examples/02-team-collaboration.ts) | `runTeam()` 自动编排 + 协调者模式 |
+| [03 — 任务流水线](examples/03-task-pipeline.ts) | `runTasks()` 显式依赖图（设计 → 实现 → 测试 + 评审） |
+| [04 — 多模型团队](examples/04-multi-model-team.ts) | `defineTool()` 自定义工具、Anthropic + OpenAI 混合、`AgentPool` |
+| [05 — Copilot](examples/05-copilot-test.ts) | GitHub Copilot 作为 LLM 提供者 |
+| [06 — 本地模型](examples/06-local-model.ts) | Ollama + Claude 混合流水线，通过 `baseURL` 接入（兼容 vLLM、LM Studio 等） |
+| [07 — 扇出聚合](examples/07-fan-out-aggregate.ts) | `runParallel()` MapReduce — 3 个分析师并行，然后综合 |
+| [08 — Gemma 4 本地](examples/08-gemma4-local.ts) | `runTasks()` + `runTeam()` 本地 Gemma 4 via Ollama — 零 API 费用 |
+| [09 — 结构化输出](examples/09-structured-output.ts) | `outputSchema`（Zod）— 校验 JSON 输出，通过 `result.structured` 获取 |
+| [10 — 任务重试](examples/10-task-retry.ts) | `maxRetries` / `retryDelayMs` / `retryBackoff` + `task_retry` 进度事件 |
+| [11 — 可观测性](examples/11-trace-observability.ts) | `onTrace` 回调 — LLM 调用、工具、任务、智能体的结构化 span 事件 |

 ## 架构

@ -246,6 +154,7 @@ for await (const event of agent.stream('Explain monads in two sentences.')) {
 │  - prompt()       │───►│  LLMAdapter          │
 │  - stream()       │    │  - AnthropicAdapter  │
 └────────┬──────────┘    │  - OpenAIAdapter     │
+         │               │  - CopilotAdapter    │
         │               └──────────────────────┘
 ┌────────▼──────────┐
 │  AgentRunner      │    ┌──────────────────────┐
@ -265,17 +174,46 @@ for await (const event of agent.stream('Explain monads in two sentences.')) {
 | `file_edit` | 通过精确字符串匹配编辑文件。 |
 | `grep` | 使用正则表达式搜索文件内容。优先使用 ripgrep，回退到 Node.js 实现。 |

+## 支持的 Provider
+
+| Provider | 配置 | 环境变量 | 状态 |
+|----------|------|----------|------|
+| Anthropic (Claude) | `provider: 'anthropic'` | `ANTHROPIC_API_KEY` | 已验证 |
+| OpenAI (GPT) | `provider: 'openai'` | `OPENAI_API_KEY` | 已验证 |
+| GitHub Copilot | `provider: 'copilot'` | `GITHUB_TOKEN` | 已验证 |
+| Ollama / vLLM / LM Studio | `provider: 'openai'` + `baseURL` | — | 已验证 |
+
+已验证支持 tool-calling 的本地模型：**Gemma 4**（见[示例 08](examples/08-gemma4-local.ts)）。
+
+任何 OpenAI 兼容 API 均可通过 `provider: 'openai'` + `baseURL` 接入（DeepSeek、Groq、Mistral、Qwen、MiniMax 等）。这些 Provider 尚未完整验证——欢迎通过 [#25](https://github.com/JackChen-me/open-multi-agent/issues/25) 贡献验证。
+
 ## 参与贡献

 欢迎提 Issue、功能需求和 PR。以下方向的贡献尤其有价值：

- **LLM 适配器** — Ollama、llama.cpp、vLLM、Gemini。`LLMAdapter` 接口只需实现两个方法：`chat()` 和 `stream()`。
+- **Provider 集成** — 验证并文档化 OpenAI 兼容 Provider（DeepSeek、Groq、Qwen、MiniMax 等）通过 `baseURL` 接入。详见 [#25](https://github.com/JackChen-me/open-multi-agent/issues/25)。对于非 OpenAI 兼容的 Provider（如 Gemini），欢迎贡献新的 `LLMAdapter` 实现——接口只需两个方法：`chat()` 和 `stream()`。
 - **示例** — 真实场景的工作流和用例。
 - **文档** — 指南、教程和 API 文档。

+## 作者
+
+> JackChen — 前 WPS 产品经理，现独立创业者。关注小红书[「杰克西｜硅基杠杆」](https://www.xiaohongshu.com/user/profile/5a1bdc1e4eacab4aa39ea6d6)，持续获取我的 AI Agent 观点和思考。
+
+## 贡献者
+
+<a href="https://github.com/JackChen-me/open-multi-agent/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=JackChen-me/open-multi-agent" />
+</a>
+
 ## Star 趋势

-[![Star History Chart](https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260402)](https://star-history.com/#JackChen-me/open-multi-agent&Date)
+<a href="https://star-history.com/#JackChen-me/open-multi-agent&Date">
+ <picture>
+   <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&theme=dark&v=20260403" />
+   <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260403" />
+   <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260403" />
+ </picture>
+</a>

 ## 许可证

--- a/SECURITY.md
+++ b/SECURITY.md
@ -0,0 +1,17 @@
+# Security Policy
+
+## Supported Versions
+
+| Version | Supported |
+|---------|-----------|
+| latest  | Yes       |
+
+## Reporting a Vulnerability
+
+If you discover a security vulnerability, please report it responsibly via email:
+
+**jack@yuanasi.com**
+
+Please do **not** open a public GitHub issue for security vulnerabilities.
+
+We will acknowledge receipt within 48 hours and aim to provide a fix or mitigation plan within 7 days.
--- a/examples/05-copilot-test.ts
+++ b/examples/05-copilot-test.ts
@ -0,0 +1,49 @@
+/**
+ * Quick smoke test for the Copilot adapter.
+ *
+ * Run:
+ *   npx tsx examples/05-copilot-test.ts
+ *
+ * If GITHUB_COPILOT_TOKEN is not set, the adapter will start an interactive
+ * OAuth2 device flow — you'll be prompted to sign in via your browser.
+ */
+
+import { OpenMultiAgent } from '../src/index.js'
+import type { OrchestratorEvent } from '../src/types.js'
+
+const orchestrator = new OpenMultiAgent({
+  defaultModel: 'gpt-4o',
+  defaultProvider: 'copilot',
+  onProgress: (event: OrchestratorEvent) => {
+    if (event.type === 'agent_start') {
+      console.log(`[start]    agent=${event.agent}`)
+    } else if (event.type === 'agent_complete') {
+      console.log(`[complete] agent=${event.agent}`)
+    }
+  },
+})
+
+console.log('Testing Copilot adapter with gpt-4o...\n')
+
+const result = await orchestrator.runAgent(
+  {
+    name: 'assistant',
+    model: 'gpt-4o',
+    provider: 'copilot',
+    systemPrompt: 'You are a helpful assistant. Keep answers brief.',
+    maxTurns: 1,
+    maxTokens: 256,
+  },
+  'What is 2 + 2? Reply in one sentence.',
+)
+
+if (result.success) {
+  console.log('\nAgent output:')
+  console.log('─'.repeat(60))
+  console.log(result.output)
+  console.log('─'.repeat(60))
+  console.log(`\nTokens: input=${result.tokenUsage.input_tokens}, output=${result.tokenUsage.output_tokens}`)
+} else {
+  console.error('Agent failed:', result.output)
+  process.exit(1)
+}
--- a/examples/06-local-model.ts
+++ b/examples/06-local-model.ts
@ -0,0 +1,199 @@
+/**
+ * Example 06 — Local Model + Cloud Model Team (Ollama + Claude)
+ *
+ * Demonstrates mixing a local model served by Ollama with a cloud model
+ * (Claude) in the same task pipeline. The key technique is using
+ * `provider: 'openai'` with a custom `baseURL` pointing at Ollama's
+ * OpenAI-compatible endpoint.
+ *
+ * This pattern works with ANY OpenAI-compatible local server:
+ * - Ollama        → http://localhost:11434/v1
+ * - vLLM          → http://localhost:8000/v1
+ * - LM Studio     → http://localhost:1234/v1
+ * - llama.cpp     → http://localhost:8080/v1
+ * Just change the baseURL and model name below.
+ *
+ * Run:
+ *   npx tsx examples/06-local-model.ts
+ *
+ * Prerequisites:
+ *   1. Ollama installed and running: https://ollama.com
+ *   2. Pull the model: ollama pull llama3.1
+ *   3. ANTHROPIC_API_KEY env var must be set.
+ */
+
+import { OpenMultiAgent } from '../src/index.js'
+import type { AgentConfig, OrchestratorEvent, Task } from '../src/types.js'
+
+// ---------------------------------------------------------------------------
+// Agents
+// ---------------------------------------------------------------------------
+
+/**
+ * Coder — uses Claude (Anthropic) for high-quality code generation.
+ */
+const coder: AgentConfig = {
+  name: 'coder',
+  model: 'claude-sonnet-4-6',
+  provider: 'anthropic',
+  systemPrompt: `You are a senior TypeScript developer. Write clean, well-typed,
+production-quality code. Use the tools to write files to /tmp/local-model-demo/.
+Always include brief JSDoc comments on exported functions.`,
+  tools: ['bash', 'file_write'],
+  maxTurns: 6,
+}
+
+/**
+ * Reviewer — uses a local Ollama model via the OpenAI-compatible API.
+ * The apiKey is required by the OpenAI SDK but Ollama ignores it,
+ * so we pass the placeholder string 'ollama'.
+ */
+const reviewer: AgentConfig = {
+  name: 'reviewer',
+  model: 'llama3.1',
+  provider: 'openai', // 'openai' here means "OpenAI-compatible protocol", not the OpenAI cloud
+  baseURL: 'http://localhost:11434/v1',
+  apiKey: 'ollama',
+  systemPrompt: `You are a code reviewer. You read source files and produce a structured review.
+Your review MUST include these sections:
+- Summary (2-3 sentences)
+- Strengths (bullet list)
+- Issues (bullet list — or "None found" if the code is clean)
+- Verdict: SHIP or NEEDS WORK
+
+Be specific and constructive. Reference line numbers or function names when possible.`,
+  tools: ['file_read'],
+  maxTurns: 4,
+}
+
+// ---------------------------------------------------------------------------
+// Progress handler
+// ---------------------------------------------------------------------------
+
+const taskTimes = new Map<string, number>()
+
+function handleProgress(event: OrchestratorEvent): void {
+  const ts = new Date().toISOString().slice(11, 23)
+
+  switch (event.type) {
+    case 'task_start': {
+      taskTimes.set(event.task ?? '', Date.now())
+      const task = event.data as Task | undefined
+      console.log(`[${ts}] TASK READY    "${task?.title ?? event.task}" → ${task?.assignee ?? '?'}`)
+      break
+    }
+    case 'task_complete': {
+      const elapsed = Date.now() - (taskTimes.get(event.task ?? '') ?? Date.now())
+      console.log(`[${ts}] TASK DONE     task=${event.task} in ${elapsed}ms`)
+      break
+    }
+    case 'agent_start':
+      console.log(`[${ts}] AGENT START   ${event.agent}`)
+      break
+    case 'agent_complete':
+      console.log(`[${ts}] AGENT DONE    ${event.agent}`)
+      break
+    case 'error':
+      console.error(`[${ts}] ERROR         ${event.agent ?? ''}  task=${event.task ?? '?'}`)
+      break
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Orchestrator + Team
+// ---------------------------------------------------------------------------
+
+const orchestrator = new OpenMultiAgent({
+  defaultModel: 'claude-sonnet-4-6',
+  maxConcurrency: 2,
+  onProgress: handleProgress,
+})
+
+const team = orchestrator.createTeam('local-cloud-team', {
+  name: 'local-cloud-team',
+  agents: [coder, reviewer],
+  sharedMemory: true,
+})
+
+// ---------------------------------------------------------------------------
+// Task pipeline: code → review
+// ---------------------------------------------------------------------------
+
+const OUTPUT_DIR = '/tmp/local-model-demo'
+
+const tasks: Array<{
+  title: string
+  description: string
+  assignee?: string
+  dependsOn?: string[]
+}> = [
+  {
+    title: 'Write: retry utility',
+    description: `Write a small but complete TypeScript utility to ${OUTPUT_DIR}/retry.ts.
+
+The module should export:
+1. A \`RetryOptions\` interface with: maxRetries (number), delayMs (number),
+   backoffFactor (optional number, default 2), shouldRetry (optional predicate
+   taking the error and returning boolean).
+2. An async \`retry<T>(fn: () => Promise<T>, options: RetryOptions): Promise<T>\`
+   function that retries \`fn\` with exponential backoff.
+3. A convenience \`withRetry\` wrapper that returns a new function with retry
+   behaviour baked in.
+
+Include JSDoc comments. No external dependencies — use only Node built-ins.
+After writing the file, also create a small test script at ${OUTPUT_DIR}/retry-test.ts
+that exercises the happy path and a failure case, then run it with \`npx tsx\`.`,
+    assignee: 'coder',
+  },
+  {
+    title: 'Review: retry utility',
+    description: `Read the files at ${OUTPUT_DIR}/retry.ts and ${OUTPUT_DIR}/retry-test.ts.
+
+Produce a structured code review covering:
+- Summary (2-3 sentences describing the module)
+- Strengths (bullet list)
+- Issues (bullet list — be specific about what and why)
+- Verdict: SHIP or NEEDS WORK`,
+    assignee: 'reviewer',
+    dependsOn: ['Write: retry utility'],
+  },
+]
+
+// ---------------------------------------------------------------------------
+// Run
+// ---------------------------------------------------------------------------
+
+console.log('Local + Cloud model team')
+console.log(`  coder    → Claude (${coder.model}) via Anthropic API`)
+console.log(`  reviewer → Ollama (${reviewer.model}) at ${reviewer.baseURL}`)
+console.log()
+console.log('Pipeline: coder writes code → local model reviews it')
+console.log('='.repeat(60))
+
+const result = await orchestrator.runTasks(team, tasks)
+
+// ---------------------------------------------------------------------------
+// Summary
+// ---------------------------------------------------------------------------
+
+console.log('\n' + '='.repeat(60))
+console.log('Pipeline complete.\n')
+console.log(`Overall success: ${result.success}`)
+console.log(`Tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`)
+
+console.log('\nPer-agent summary:')
+for (const [name, r] of result.agentResults) {
+  const icon = r.success ? 'OK  ' : 'FAIL'
+  const provider = name === 'coder' ? 'anthropic' : 'ollama (local)'
+  const tools = r.toolCalls.map(c => c.toolName).join(', ')
+  console.log(`  [${icon}] ${name.padEnd(10)} (${provider.padEnd(16)})  tools: ${tools || '(none)'}`)
+}
+
+// Print the reviewer's output
+const review = result.agentResults.get('reviewer')
+if (review?.success) {
+  console.log('\nCode review (from local model):')
+  console.log('─'.repeat(60))
+  console.log(review.output)
+  console.log('─'.repeat(60))
+}
--- a/examples/07-fan-out-aggregate.ts
+++ b/examples/07-fan-out-aggregate.ts
@ -0,0 +1,209 @@
+/**
+ * Example 07 — Fan-Out / Aggregate (MapReduce) Pattern
+ *
+ * Demonstrates:
+ * - Fan-out: send the same question to N "analyst" agents in parallel
+ * - Aggregate: a "synthesizer" agent reads all analyst outputs and produces
+ *   a balanced final report
+ * - AgentPool with runParallel() for concurrent fan-out
+ * - No tools needed — pure LLM reasoning to keep the focus on the pattern
+ *
+ * Run:
+ *   npx tsx examples/07-fan-out-aggregate.ts
+ *
+ * Prerequisites:
+ *   ANTHROPIC_API_KEY env var must be set.
+ */
+
+import { Agent, AgentPool, ToolRegistry, ToolExecutor, registerBuiltInTools } from '../src/index.js'
+import type { AgentConfig, AgentRunResult } from '../src/types.js'
+
+// ---------------------------------------------------------------------------
+// Analysis topic
+// ---------------------------------------------------------------------------
+
+const TOPIC = `Should a solo developer build a SaaS product that uses AI agents
+for automated customer support? Consider the current state of AI technology,
+market demand, competition, costs, and the unique constraints of being a solo
+founder with limited time (~6 hours/day of productive work).`
+
+// ---------------------------------------------------------------------------
+// Analyst agent configs — three perspectives on the same question
+// ---------------------------------------------------------------------------
+
+const optimistConfig: AgentConfig = {
+  name: 'optimist',
+  model: 'claude-sonnet-4-6',
+  systemPrompt: `You are an optimistic technology analyst who focuses on
+opportunities, upside potential, and emerging trends. You see possibilities
+where others see obstacles. Back your optimism with concrete reasoning —
+cite market trends, cost curves, and real capabilities. Keep your analysis
+to 200-300 words.`,
+  maxTurns: 1,
+  temperature: 0.4,
+}
+
+const skepticConfig: AgentConfig = {
+  name: 'skeptic',
+  model: 'claude-sonnet-4-6',
+  systemPrompt: `You are a skeptical technology analyst who focuses on risks,
+challenges, failure modes, and hidden costs. You stress-test assumptions and
+ask "what could go wrong?" Back your skepticism with concrete reasoning —
+cite failure rates, technical limitations, and market realities. Keep your
+analysis to 200-300 words.`,
+  maxTurns: 1,
+  temperature: 0.4,
+}
+
+const pragmatistConfig: AgentConfig = {
+  name: 'pragmatist',
+  model: 'claude-sonnet-4-6',
+  systemPrompt: `You are a pragmatic technology analyst who focuses on practical
+feasibility, execution complexity, and resource requirements. You care about
+what works today, not what might work someday. You think in terms of MVPs,
+timelines, and concrete tradeoffs. Keep your analysis to 200-300 words.`,
+  maxTurns: 1,
+  temperature: 0.4,
+}
+
+const synthesizerConfig: AgentConfig = {
+  name: 'synthesizer',
+  model: 'claude-sonnet-4-6',
+  systemPrompt: `You are a senior strategy advisor who synthesizes multiple
+perspectives into a balanced, actionable recommendation. You do not simply
+summarise — you weigh the arguments, identify where they agree and disagree,
+and produce a clear verdict with next steps. Structure your output as:
+
+1. Key agreements across perspectives
+2. Key disagreements and how you weigh them
+3. Verdict (go / no-go / conditional go)
+4. Recommended next steps (3-5 bullet points)
+
+Keep the final report to 300-400 words.`,
+  maxTurns: 1,
+  temperature: 0.3,
+}
+
+// ---------------------------------------------------------------------------
+// Build agents — no tools needed for pure reasoning
+// ---------------------------------------------------------------------------
+
+function buildAgent(config: AgentConfig): Agent {
+  const registry = new ToolRegistry()
+  registerBuiltInTools(registry) // not needed here, but safe if tools are added later
+  const executor = new ToolExecutor(registry)
+  return new Agent(config, registry, executor)
+}
+
+const optimist = buildAgent(optimistConfig)
+const skeptic = buildAgent(skepticConfig)
+const pragmatist = buildAgent(pragmatistConfig)
+const synthesizer = buildAgent(synthesizerConfig)
+
+// ---------------------------------------------------------------------------
+// Set up the pool
+// ---------------------------------------------------------------------------
+
+const pool = new AgentPool(3) // 3 analysts can run simultaneously
+pool.add(optimist)
+pool.add(skeptic)
+pool.add(pragmatist)
+pool.add(synthesizer)
+
+console.log('Fan-Out / Aggregate (MapReduce) Pattern')
+console.log('='.repeat(60))
+console.log(`\nTopic: ${TOPIC.replace(/\n/g, ' ').trim()}\n`)
+
+// ---------------------------------------------------------------------------
+// Step 1: Fan-out — run all 3 analysts in parallel
+// ---------------------------------------------------------------------------
+
+console.log('[Step 1] Fan-out: 3 analysts running in parallel...\n')
+
+const analystResults: Map<string, AgentRunResult> = await pool.runParallel([
+  { agent: 'optimist',   prompt: TOPIC },
+  { agent: 'skeptic',    prompt: TOPIC },
+  { agent: 'pragmatist', prompt: TOPIC },
+])
+
+// Print each analyst's output (truncated)
+const analysts = ['optimist', 'skeptic', 'pragmatist'] as const
+for (const name of analysts) {
+  const result = analystResults.get(name)!
+  const status = result.success ? 'OK' : 'FAILED'
+  console.log(`  ${name} [${status}] — ${result.tokenUsage.output_tokens} output tokens`)
+  console.log(`  ${result.output.slice(0, 150).replace(/\n/g, ' ')}...`)
+  console.log()
+}
+
+// Check all analysts succeeded
+for (const name of analysts) {
+  if (!analystResults.get(name)!.success) {
+    console.error(`Analyst '${name}' failed: ${analystResults.get(name)!.output}`)
+    process.exit(1)
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Step 2: Aggregate — synthesizer reads all 3 analyses
+// ---------------------------------------------------------------------------
+
+console.log('[Step 2] Aggregate: synthesizer producing final report...\n')
+
+const synthesizerPrompt = `Three analysts have independently evaluated the same question.
+Read their analyses below and produce your synthesis report.
+
+--- OPTIMIST ---
+${analystResults.get('optimist')!.output}
+
+--- SKEPTIC ---
+${analystResults.get('skeptic')!.output}
+
+--- PRAGMATIST ---
+${analystResults.get('pragmatist')!.output}
+
+Now synthesize these three perspectives into a balanced recommendation.`
+
+const synthResult = await pool.run('synthesizer', synthesizerPrompt)
+
+if (!synthResult.success) {
+  console.error('Synthesizer failed:', synthResult.output)
+  process.exit(1)
+}
+
+// ---------------------------------------------------------------------------
+// Final output
+// ---------------------------------------------------------------------------
+
+console.log('='.repeat(60))
+console.log('SYNTHESIZED REPORT')
+console.log('='.repeat(60))
+console.log()
+console.log(synthResult.output)
+console.log()
+console.log('-'.repeat(60))
+
+// ---------------------------------------------------------------------------
+// Token usage comparison
+// ---------------------------------------------------------------------------
+
+console.log('\nToken Usage Summary:')
+console.log('-'.repeat(60))
+
+let totalInput = 0
+let totalOutput = 0
+
+for (const name of analysts) {
+  const r = analystResults.get(name)!
+  totalInput += r.tokenUsage.input_tokens
+  totalOutput += r.tokenUsage.output_tokens
+  console.log(`  ${name.padEnd(12)} — input: ${r.tokenUsage.input_tokens}, output: ${r.tokenUsage.output_tokens}`)
+}
+
+totalInput += synthResult.tokenUsage.input_tokens
+totalOutput += synthResult.tokenUsage.output_tokens
+console.log(`  ${'synthesizer'.padEnd(12)} — input: ${synthResult.tokenUsage.input_tokens}, output: ${synthResult.tokenUsage.output_tokens}`)
+console.log('-'.repeat(60))
+console.log(`  ${'TOTAL'.padEnd(12)} — input: ${totalInput}, output: ${totalOutput}`)
+
+console.log('\nDone.')
--- a/examples/08-gemma4-local.ts
+++ b/examples/08-gemma4-local.ts
@ -0,0 +1,192 @@
+/**
+ * Example 08 — Gemma 4 Local (100% Local, Zero API Cost)
+ *
+ * Demonstrates both execution modes with a fully local Gemma 4 model via
+ * Ollama. No cloud API keys needed — everything runs on your machine.
+ *
+ * Part 1 — runTasks(): explicit task pipeline (researcher → summarizer)
+ * Part 2 — runTeam(): auto-orchestration where Gemma 4 acts as coordinator,
+ *           decomposes the goal into tasks, and synthesises the final result
+ *
+ * This is the hardest test for a local model — runTeam() requires it to
+ * produce valid JSON for task decomposition AND do tool-calling for execution.
+ * Gemma 4 e2b (5.1B params) handles both reliably.
+ *
+ * Run:
+ *   no_proxy=localhost npx tsx examples/08-gemma4-local.ts
+ *
+ * Prerequisites:
+ *   1. Ollama >= 0.20.0 installed and running: https://ollama.com
+ *   2. Pull the model: ollama pull gemma4:e2b
+ *      (or gemma4:e4b for better quality on machines with more RAM)
+ *   3. No API keys needed!
+ *
+ * Note: The no_proxy=localhost prefix is needed if you have an HTTP proxy
+ * configured, since the OpenAI SDK would otherwise route Ollama requests
+ * through the proxy.
+ */
+
+import { OpenMultiAgent } from '../src/index.js'
+import type { AgentConfig, OrchestratorEvent, Task } from '../src/types.js'
+
+// ---------------------------------------------------------------------------
+// Configuration — change this to match your Ollama setup
+// ---------------------------------------------------------------------------
+
+// See available tags at https://ollama.com/library/gemma4
+const OLLAMA_MODEL = 'gemma4:e2b'      // or 'gemma4:e4b', 'gemma4:26b'
+const OLLAMA_BASE_URL = 'http://localhost:11434/v1'
+const OUTPUT_DIR = '/tmp/gemma4-demo'
+
+// ---------------------------------------------------------------------------
+// Agents
+// ---------------------------------------------------------------------------
+
+const researcher: AgentConfig = {
+  name: 'researcher',
+  model: OLLAMA_MODEL,
+  provider: 'openai',
+  baseURL: OLLAMA_BASE_URL,
+  apiKey: 'ollama', // placeholder — Ollama ignores this, but the OpenAI SDK requires a non-empty value
+  systemPrompt: `You are a system researcher. Use bash to run non-destructive,
+read-only commands (uname -a, sw_vers, df -h, uptime, etc.) and report results.
+Use file_write to save reports when asked.`,
+  tools: ['bash', 'file_write'],
+  maxTurns: 8,
+}
+
+const summarizer: AgentConfig = {
+  name: 'summarizer',
+  model: OLLAMA_MODEL,
+  provider: 'openai',
+  baseURL: OLLAMA_BASE_URL,
+  apiKey: 'ollama',
+  systemPrompt: `You are a technical writer. Read files and produce concise,
+structured Markdown summaries. Use file_write to save reports when asked.`,
+  tools: ['file_read', 'file_write'],
+  maxTurns: 4,
+}
+
+// ---------------------------------------------------------------------------
+// Progress handler
+// ---------------------------------------------------------------------------
+
+function handleProgress(event: OrchestratorEvent): void {
+  const ts = new Date().toISOString().slice(11, 23)
+  switch (event.type) {
+    case 'task_start': {
+      const task = event.data as Task | undefined
+      console.log(`[${ts}] TASK START    "${task?.title ?? event.task}" → ${task?.assignee ?? '?'}`)
+      break
+    }
+    case 'task_complete':
+      console.log(`[${ts}] TASK DONE     "${event.task}"`)
+      break
+    case 'agent_start':
+      console.log(`[${ts}] AGENT START   ${event.agent}`)
+      break
+    case 'agent_complete':
+      console.log(`[${ts}] AGENT DONE    ${event.agent}`)
+      break
+    case 'error':
+      console.error(`[${ts}] ERROR         ${event.agent ?? ''}  task=${event.task ?? '?'}`)
+      break
+  }
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Part 1: runTasks() — Explicit task pipeline
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('Part 1: runTasks() — Explicit Pipeline')
+console.log('='.repeat(60))
+console.log(`  model       → ${OLLAMA_MODEL} via Ollama`)
+console.log(`  pipeline    → researcher gathers info → summarizer writes summary`)
+console.log()
+
+const orchestrator1 = new OpenMultiAgent({
+  defaultModel: OLLAMA_MODEL,
+  maxConcurrency: 1, // local model serves one request at a time
+  onProgress: handleProgress,
+})
+
+const team1 = orchestrator1.createTeam('explicit', {
+  name: 'explicit',
+  agents: [researcher, summarizer],
+  sharedMemory: true,
+})
+
+const tasks = [
+  {
+    title: 'Gather system information',
+    description: `Use bash to run system info commands (uname -a, sw_vers, sysctl, df -h, uptime).
+Then write a structured Markdown report to ${OUTPUT_DIR}/system-report.md with sections:
+OS, Hardware, Disk, and Uptime.`,
+    assignee: 'researcher',
+  },
+  {
+    title: 'Summarize the report',
+    description: `Read the file at ${OUTPUT_DIR}/system-report.md.
+Produce a concise one-paragraph executive summary of the system information.`,
+    assignee: 'summarizer',
+    dependsOn: ['Gather system information'],
+  },
+]
+
+const start1 = Date.now()
+const result1 = await orchestrator1.runTasks(team1, tasks)
+
+console.log(`\nSuccess: ${result1.success}  Time: ${((Date.now() - start1) / 1000).toFixed(1)}s`)
+console.log(`Tokens — input: ${result1.totalTokenUsage.input_tokens}, output: ${result1.totalTokenUsage.output_tokens}`)
+
+const summary = result1.agentResults.get('summarizer')
+if (summary?.success) {
+  console.log('\nSummary (from local Gemma 4):')
+  console.log('-'.repeat(60))
+  console.log(summary.output)
+  console.log('-'.repeat(60))
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Part 2: runTeam() — Auto-orchestration (Gemma 4 as coordinator)
+// ═══════════════════════════════════════════════════════════════════════════
+
+console.log('\n\nPart 2: runTeam() — Auto-Orchestration')
+console.log('='.repeat(60))
+console.log(`  coordinator  → auto-created by runTeam(), also Gemma 4`)
+console.log(`  goal         → given in natural language, framework plans everything`)
+console.log()
+
+const orchestrator2 = new OpenMultiAgent({
+  defaultModel: OLLAMA_MODEL,
+  defaultProvider: 'openai',
+  defaultBaseURL: OLLAMA_BASE_URL,
+  defaultApiKey: 'ollama',
+  maxConcurrency: 1,
+  onProgress: handleProgress,
+})
+
+const team2 = orchestrator2.createTeam('auto', {
+  name: 'auto',
+  agents: [researcher, summarizer],
+  sharedMemory: true,
+})
+
+const goal = `Check this machine's Node.js version, npm version, and OS info,
+then write a short Markdown summary report to /tmp/gemma4-auto/report.md`
+
+const start2 = Date.now()
+const result2 = await orchestrator2.runTeam(team2, goal)
+
+console.log(`\nSuccess: ${result2.success}  Time: ${((Date.now() - start2) / 1000).toFixed(1)}s`)
+console.log(`Tokens — input: ${result2.totalTokenUsage.input_tokens}, output: ${result2.totalTokenUsage.output_tokens}`)
+
+const coordResult = result2.agentResults.get('coordinator')
+if (coordResult?.success) {
+  console.log('\nFinal synthesis (from local Gemma 4 coordinator):')
+  console.log('-'.repeat(60))
+  console.log(coordResult.output)
+  console.log('-'.repeat(60))
+}
+
+console.log('\nAll processing done locally. $0 API cost.')
--- a/examples/09-structured-output.ts
+++ b/examples/09-structured-output.ts
@ -0,0 +1,73 @@
+/**
+ * Example 09 — Structured Output
+ *
+ * Demonstrates `outputSchema` on AgentConfig. The agent's response is
+ * automatically parsed as JSON and validated against a Zod schema.
+ * On validation failure, the framework retries once with error feedback.
+ *
+ * The validated result is available via `result.structured`.
+ *
+ * Run:
+ *   npx tsx examples/09-structured-output.ts
+ *
+ * Prerequisites:
+ *   ANTHROPIC_API_KEY env var must be set.
+ */
+
+import { z } from 'zod'
+import { OpenMultiAgent } from '../src/index.js'
+import type { AgentConfig } from '../src/types.js'
+
+// ---------------------------------------------------------------------------
+// Define a Zod schema for the expected output
+// ---------------------------------------------------------------------------
+
+const ReviewAnalysis = z.object({
+  summary: z.string().describe('One-sentence summary of the review'),
+  sentiment: z.enum(['positive', 'negative', 'neutral']),
+  confidence: z.number().min(0).max(1).describe('How confident the analysis is'),
+  keyTopics: z.array(z.string()).describe('Main topics mentioned in the review'),
+})
+
+type ReviewAnalysis = z.infer<typeof ReviewAnalysis>
+
+// ---------------------------------------------------------------------------
+// Agent with outputSchema
+// ---------------------------------------------------------------------------
+
+const analyst: AgentConfig = {
+  name: 'analyst',
+  model: 'claude-sonnet-4-6',
+  systemPrompt: 'You are a product review analyst. Analyze the given review and extract structured insights.',
+  outputSchema: ReviewAnalysis,
+}
+
+// ---------------------------------------------------------------------------
+// Run
+// ---------------------------------------------------------------------------
+
+const orchestrator = new OpenMultiAgent({ defaultModel: 'claude-sonnet-4-6' })
+
+const reviews = [
+  'This keyboard is amazing! The mechanical switches feel incredible and the RGB lighting is stunning. Build quality is top-notch. Only downside is the price.',
+  'Terrible experience. The product arrived broken, customer support was unhelpful, and the return process took 3 weeks.',
+  'It works fine. Nothing special, nothing bad. Does what it says on the box.',
+]
+
+console.log('Analyzing product reviews with structured output...\n')
+
+for (const review of reviews) {
+  const result = await orchestrator.runAgent(analyst, `Analyze this review: "${review}"`)
+
+  if (result.structured) {
+    const data = result.structured as ReviewAnalysis
+    console.log(`Sentiment: ${data.sentiment} (confidence: ${data.confidence})`)
+    console.log(`Summary:   ${data.summary}`)
+    console.log(`Topics:    ${data.keyTopics.join(', ')}`)
+  } else {
+    console.log(`Validation failed. Raw output: ${result.output.slice(0, 100)}`)
+  }
+
+  console.log(`Tokens:    ${result.tokenUsage.input_tokens} in / ${result.tokenUsage.output_tokens} out`)
+  console.log('---')
+}
--- a/examples/10-task-retry.ts
+++ b/examples/10-task-retry.ts
@ -0,0 +1,132 @@
+/**
+ * Example 10 — Task Retry with Exponential Backoff
+ *
+ * Demonstrates `maxRetries`, `retryDelayMs`, and `retryBackoff` on task config.
+ * When a task fails, the framework automatically retries with exponential
+ * backoff. The `onProgress` callback receives `task_retry` events so you can
+ * log retry attempts in real time.
+ *
+ * Scenario: a two-step pipeline where the first task (data fetch) is configured
+ * to retry on failure, and the second task (analysis) depends on it.
+ *
+ * Run:
+ *   npx tsx examples/10-task-retry.ts
+ *
+ * Prerequisites:
+ *   ANTHROPIC_API_KEY env var must be set.
+ */
+
+import { OpenMultiAgent } from '../src/index.js'
+import type { AgentConfig, OrchestratorEvent } from '../src/types.js'
+
+// ---------------------------------------------------------------------------
+// Agents
+// ---------------------------------------------------------------------------
+
+const fetcher: AgentConfig = {
+  name: 'fetcher',
+  model: 'claude-sonnet-4-6',
+  systemPrompt: `You are a data-fetching agent. When given a topic, produce a short
+JSON summary with 3-5 key facts. Output ONLY valid JSON, no markdown fences.
+Example: {"topic":"...", "facts":["fact1","fact2","fact3"]}`,
+  maxTurns: 2,
+}
+
+const analyst: AgentConfig = {
+  name: 'analyst',
+  model: 'claude-sonnet-4-6',
+  systemPrompt: `You are a data analyst. Read the fetched data from shared memory
+and produce a brief analysis (3-4 sentences) highlighting trends or insights.`,
+  maxTurns: 2,
+}
+
+// ---------------------------------------------------------------------------
+// Progress handler — watch for task_retry events
+// ---------------------------------------------------------------------------
+
+function handleProgress(event: OrchestratorEvent): void {
+  const ts = new Date().toISOString().slice(11, 23)
+
+  switch (event.type) {
+    case 'task_start':
+      console.log(`[${ts}] TASK START    "${event.task}" (agent: ${event.agent})`)
+      break
+    case 'task_complete':
+      console.log(`[${ts}] TASK DONE     "${event.task}"`)
+      break
+    case 'task_retry': {
+      const d = event.data as { attempt: number; maxAttempts: number; error: string; nextDelayMs: number }
+      console.log(`[${ts}] TASK RETRY    "${event.task}" — attempt ${d.attempt}/${d.maxAttempts}, next in ${d.nextDelayMs}ms`)
+      console.log(`               error: ${d.error.slice(0, 120)}`)
+      break
+    }
+    case 'error':
+      console.log(`[${ts}] ERROR         "${event.task}" agent=${event.agent}`)
+      break
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Orchestrator + team
+// ---------------------------------------------------------------------------
+
+const orchestrator = new OpenMultiAgent({
+  defaultModel: 'claude-sonnet-4-6',
+  onProgress: handleProgress,
+})
+
+const team = orchestrator.createTeam('retry-demo', {
+  name: 'retry-demo',
+  agents: [fetcher, analyst],
+  sharedMemory: true,
+})
+
+// ---------------------------------------------------------------------------
+// Tasks — fetcher has retry config, analyst depends on it
+// ---------------------------------------------------------------------------
+
+const tasks = [
+  {
+    title: 'Fetch data',
+    description: 'Fetch key facts about the adoption of TypeScript in open-source projects as of 2024. Output a JSON object with a "topic" and "facts" array.',
+    assignee: 'fetcher',
+    // Retry config: up to 2 retries, 500ms base delay, 2x backoff (500ms, 1000ms)
+    maxRetries: 2,
+    retryDelayMs: 500,
+    retryBackoff: 2,
+  },
+  {
+    title: 'Analyze data',
+    description: 'Read the fetched data from shared memory and produce a 3-4 sentence analysis of TypeScript adoption trends.',
+    assignee: 'analyst',
+    dependsOn: ['Fetch data'],
+    // No retry — if analysis fails, just report the error
+  },
+]
+
+// ---------------------------------------------------------------------------
+// Run
+// ---------------------------------------------------------------------------
+
+console.log('Task Retry Example')
+console.log('='.repeat(60))
+console.log('Pipeline: fetch (with retry) → analyze')
+console.log(`Retry config: maxRetries=2, delay=500ms, backoff=2x`)
+console.log('='.repeat(60))
+console.log()
+
+const result = await orchestrator.runTasks(team, tasks)
+
+// ---------------------------------------------------------------------------
+// Summary
+// ---------------------------------------------------------------------------
+
+console.log('\n' + '='.repeat(60))
+console.log(`Overall success: ${result.success}`)
+console.log(`Tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`)
+
+for (const [name, r] of result.agentResults) {
+  const icon = r.success ? 'OK  ' : 'FAIL'
+  console.log(`  [${icon}] ${name}`)
+  console.log(`         ${r.output.slice(0, 200)}`)
+}
--- a/examples/11-trace-observability.ts
+++ b/examples/11-trace-observability.ts
@ -0,0 +1,133 @@
+/**
+ * Example 11 — Trace Observability
+ *
+ * Demonstrates the `onTrace` callback for lightweight observability. Every LLM
+ * call, tool execution, task lifecycle, and agent run emits a structured trace
+ * event with timing data and token usage — giving you full visibility into
+ * what's happening inside a multi-agent run.
+ *
+ * Trace events share a `runId` for correlation, so you can reconstruct the
+ * full execution timeline. Pipe them into your own logging, OpenTelemetry, or
+ * dashboard.
+ *
+ * Run:
+ *   npx tsx examples/11-trace-observability.ts
+ *
+ * Prerequisites:
+ *   ANTHROPIC_API_KEY env var must be set.
+ */
+
+import { OpenMultiAgent } from '../src/index.js'
+import type { AgentConfig, TraceEvent } from '../src/types.js'
+
+// ---------------------------------------------------------------------------
+// Agents
+// ---------------------------------------------------------------------------
+
+const researcher: AgentConfig = {
+  name: 'researcher',
+  model: 'claude-sonnet-4-6',
+  systemPrompt: 'You are a research assistant. Provide concise, factual answers.',
+  maxTurns: 2,
+}
+
+const writer: AgentConfig = {
+  name: 'writer',
+  model: 'claude-sonnet-4-6',
+  systemPrompt: 'You are a technical writer. Summarize research into clear prose.',
+  maxTurns: 2,
+}
+
+// ---------------------------------------------------------------------------
+// Trace handler — log every span with timing
+// ---------------------------------------------------------------------------
+
+function handleTrace(event: TraceEvent): void {
+  const dur = `${event.durationMs}ms`.padStart(7)
+
+  switch (event.type) {
+    case 'llm_call':
+      console.log(
+        `  [LLM]   ${dur}  agent=${event.agent}  model=${event.model}  turn=${event.turn}` +
+        `  tokens=${event.tokens.input_tokens}in/${event.tokens.output_tokens}out`,
+      )
+      break
+    case 'tool_call':
+      console.log(
+        `  [TOOL]  ${dur}  agent=${event.agent}  tool=${event.tool}` +
+        `  error=${event.isError}`,
+      )
+      break
+    case 'task':
+      console.log(
+        `  [TASK]  ${dur}  task="${event.taskTitle}"  agent=${event.agent}` +
+        `  success=${event.success}  retries=${event.retries}`,
+      )
+      break
+    case 'agent':
+      console.log(
+        `  [AGENT] ${dur}  agent=${event.agent}  turns=${event.turns}` +
+        `  tools=${event.toolCalls}  tokens=${event.tokens.input_tokens}in/${event.tokens.output_tokens}out`,
+      )
+      break
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Orchestrator + team
+// ---------------------------------------------------------------------------
+
+const orchestrator = new OpenMultiAgent({
+  defaultModel: 'claude-sonnet-4-6',
+  onTrace: handleTrace,
+})
+
+const team = orchestrator.createTeam('trace-demo', {
+  name: 'trace-demo',
+  agents: [researcher, writer],
+  sharedMemory: true,
+})
+
+// ---------------------------------------------------------------------------
+// Tasks — researcher first, then writer summarizes
+// ---------------------------------------------------------------------------
+
+const tasks = [
+  {
+    title: 'Research topic',
+    description: 'List 5 key benefits of TypeScript for large codebases. Be concise.',
+    assignee: 'researcher',
+  },
+  {
+    title: 'Write summary',
+    description: 'Read the research from shared memory and write a 3-sentence summary.',
+    assignee: 'writer',
+    dependsOn: ['Research topic'],
+  },
+]
+
+// ---------------------------------------------------------------------------
+// Run
+// ---------------------------------------------------------------------------
+
+console.log('Trace Observability Example')
+console.log('='.repeat(60))
+console.log('Pipeline: research → write (with full trace output)')
+console.log('='.repeat(60))
+console.log()
+
+const result = await orchestrator.runTasks(team, tasks)
+
+// ---------------------------------------------------------------------------
+// Summary
+// ---------------------------------------------------------------------------
+
+console.log('\n' + '='.repeat(60))
+console.log(`Overall success: ${result.success}`)
+console.log(`Tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`)
+
+for (const [name, r] of result.agentResults) {
+  const icon = r.success ? 'OK  ' : 'FAIL'
+  console.log(`  [${icon}] ${name}`)
+  console.log(`         ${r.output.slice(0, 200)}`)
+}
--- a/package-lock.json
+++ b/package-lock.json
@ -16,6 +16,7 @@
      },
      "devDependencies": {
        "@types/node": "^22.0.0",
+        "tsx": "^4.21.0",
        "typescript": "^5.6.0",
        "vitest": "^2.1.0"
      },
@ -321,6 +322,23 @@
        "node": ">=12"
      }
    },
+    "node_modules/@esbuild/netbsd-arm64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.7.tgz",
+      "integrity": "sha512-b6pqtrQdigZBwZxAn1UpazEisvwaIDvdbMbmrly7cDTMFnw/+3lVxxCTGOrkPVnsYIosJJXAsILG9XcQS+Yu6w==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "netbsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
    "node_modules/@esbuild/netbsd-x64": {
      "version": "0.21.5",
      "resolved": "https://registry.npmmirror.com/@esbuild/netbsd-x64/-/netbsd-x64-0.21.5.tgz",
@ -338,6 +356,23 @@
        "node": ">=12"
      }
    },
+    "node_modules/@esbuild/openbsd-arm64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.7.tgz",
+      "integrity": "sha512-AFuojMQTxAz75Fo8idVcqoQWEHIXFRbOc1TrVcFSgCZtQfSdc1RXgB3tjOn/krRHENUB4j00bfGjyl2mJrU37A==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "openbsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
    "node_modules/@esbuild/openbsd-x64": {
      "version": "0.21.5",
      "resolved": "https://registry.npmmirror.com/@esbuild/openbsd-x64/-/openbsd-x64-0.21.5.tgz",
@ -355,6 +390,23 @@
        "node": ">=12"
      }
    },
+    "node_modules/@esbuild/openharmony-arm64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.7.tgz",
+      "integrity": "sha512-+KrvYb/C8zA9CU/g0sR6w2RBw7IGc5J2BPnc3dYc5VJxHCSF1yNMxTV5LQ7GuKteQXZtspjFbiuW5/dOj7H4Yw==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "openharmony"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
    "node_modules/@esbuild/sunos-x64": {
      "version": "0.21.5",
      "resolved": "https://registry.npmmirror.com/@esbuild/sunos-x64/-/sunos-x64-0.21.5.tgz",
@ -1288,6 +1340,19 @@
        "node": ">= 0.4"
      }
    },
+    "node_modules/get-tsconfig": {
+      "version": "4.13.7",
+      "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.7.tgz",
+      "integrity": "sha512-7tN6rFgBlMgpBML5j8typ92BKFi2sFQvIdpAqLA2beia5avZDrMs0FLZiM5etShWq5irVyGcGMEA1jcDaK7A/Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "resolve-pkg-maps": "^1.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
+      }
+    },
    "node_modules/gopd": {
      "version": "1.2.0",
      "resolved": "https://registry.npmmirror.com/gopd/-/gopd-1.2.0.tgz",
@ -1564,6 +1629,16 @@
        "node": "^10 || ^12 || >=14"
      }
    },
+    "node_modules/resolve-pkg-maps": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
+      "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
+      }
+    },
    "node_modules/rollup": {
      "version": "4.60.1",
      "resolved": "https://registry.npmmirror.com/rollup/-/rollup-4.60.1.tgz",
@ -1690,6 +1765,459 @@
      "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
      "license": "MIT"
    },
+    "node_modules/tsx": {
+      "version": "4.21.0",
+      "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz",
+      "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "esbuild": "~0.27.0",
+        "get-tsconfig": "^4.7.5"
+      },
+      "bin": {
+        "tsx": "dist/cli.mjs"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      },
+      "optionalDependencies": {
+        "fsevents": "~2.3.3"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/aix-ppc64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.7.tgz",
+      "integrity": "sha512-EKX3Qwmhz1eMdEJokhALr0YiD0lhQNwDqkPYyPhiSwKrh7/4KRjQc04sZ8db+5DVVnZ1LmbNDI1uAMPEUBnQPg==",
+      "cpu": [
+        "ppc64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "aix"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/android-arm": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.7.tgz",
+      "integrity": "sha512-jbPXvB4Yj2yBV7HUfE2KHe4GJX51QplCN1pGbYjvsyCZbQmies29EoJbkEc+vYuU5o45AfQn37vZlyXy4YJ8RQ==",
+      "cpu": [
+        "arm"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/android-arm64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.7.tgz",
+      "integrity": "sha512-62dPZHpIXzvChfvfLJow3q5dDtiNMkwiRzPylSCfriLvZeq0a1bWChrGx/BbUbPwOrsWKMn8idSllklzBy+dgQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/android-x64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.7.tgz",
+      "integrity": "sha512-x5VpMODneVDb70PYV2VQOmIUUiBtY3D3mPBG8NxVk5CogneYhkR7MmM3yR/uMdITLrC1ml/NV1rj4bMJuy9MCg==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/darwin-arm64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.7.tgz",
+      "integrity": "sha512-5lckdqeuBPlKUwvoCXIgI2D9/ABmPq3Rdp7IfL70393YgaASt7tbju3Ac+ePVi3KDH6N2RqePfHnXkaDtY9fkw==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/darwin-x64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.7.tgz",
+      "integrity": "sha512-rYnXrKcXuT7Z+WL5K980jVFdvVKhCHhUwid+dDYQpH+qu+TefcomiMAJpIiC2EM3Rjtq0sO3StMV/+3w3MyyqQ==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/freebsd-arm64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.7.tgz",
+      "integrity": "sha512-B48PqeCsEgOtzME2GbNM2roU29AMTuOIN91dsMO30t+Ydis3z/3Ngoj5hhnsOSSwNzS+6JppqWsuhTp6E82l2w==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "freebsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/freebsd-x64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.7.tgz",
+      "integrity": "sha512-jOBDK5XEjA4m5IJK3bpAQF9/Lelu/Z9ZcdhTRLf4cajlB+8VEhFFRjWgfy3M1O4rO2GQ/b2dLwCUGpiF/eATNQ==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "freebsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/linux-arm": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.7.tgz",
+      "integrity": "sha512-RkT/YXYBTSULo3+af8Ib0ykH8u2MBh57o7q/DAs3lTJlyVQkgQvlrPTnjIzzRPQyavxtPtfg0EopvDyIt0j1rA==",
+      "cpu": [
+        "arm"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/linux-arm64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.7.tgz",
+      "integrity": "sha512-RZPHBoxXuNnPQO9rvjh5jdkRmVizktkT7TCDkDmQ0W2SwHInKCAV95GRuvdSvA7w4VMwfCjUiPwDi0ZO6Nfe9A==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/linux-ia32": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.7.tgz",
+      "integrity": "sha512-GA48aKNkyQDbd3KtkplYWT102C5sn/EZTY4XROkxONgruHPU72l+gW+FfF8tf2cFjeHaRbWpOYa/uRBz/Xq1Pg==",
+      "cpu": [
+        "ia32"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/linux-loong64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.7.tgz",
+      "integrity": "sha512-a4POruNM2oWsD4WKvBSEKGIiWQF8fZOAsycHOt6JBpZ+JN2n2JH9WAv56SOyu9X5IqAjqSIPTaJkqN8F7XOQ5Q==",
+      "cpu": [
+        "loong64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/linux-mips64el": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.7.tgz",
+      "integrity": "sha512-KabT5I6StirGfIz0FMgl1I+R1H73Gp0ofL9A3nG3i/cYFJzKHhouBV5VWK1CSgKvVaG4q1RNpCTR2LuTVB3fIw==",
+      "cpu": [
+        "mips64el"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/linux-ppc64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.7.tgz",
+      "integrity": "sha512-gRsL4x6wsGHGRqhtI+ifpN/vpOFTQtnbsupUF5R5YTAg+y/lKelYR1hXbnBdzDjGbMYjVJLJTd2OFmMewAgwlQ==",
+      "cpu": [
+        "ppc64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/linux-riscv64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.7.tgz",
+      "integrity": "sha512-hL25LbxO1QOngGzu2U5xeXtxXcW+/GvMN3ejANqXkxZ/opySAZMrc+9LY/WyjAan41unrR3YrmtTsUpwT66InQ==",
+      "cpu": [
+        "riscv64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/linux-s390x": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.7.tgz",
+      "integrity": "sha512-2k8go8Ycu1Kb46vEelhu1vqEP+UeRVj2zY1pSuPdgvbd5ykAw82Lrro28vXUrRmzEsUV0NzCf54yARIK8r0fdw==",
+      "cpu": [
+        "s390x"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/linux-x64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.7.tgz",
+      "integrity": "sha512-hzznmADPt+OmsYzw1EE33ccA+HPdIqiCRq7cQeL1Jlq2gb1+OyWBkMCrYGBJ+sxVzve2ZJEVeePbLM2iEIZSxA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/netbsd-x64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.7.tgz",
+      "integrity": "sha512-OfatkLojr6U+WN5EDYuoQhtM+1xco+/6FSzJJnuWiUw5eVcicbyK3dq5EeV/QHT1uy6GoDhGbFpprUiHUYggrw==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "netbsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/openbsd-x64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.7.tgz",
+      "integrity": "sha512-+A1NJmfM8WNDv5CLVQYJ5PshuRm/4cI6WMZRg1by1GwPIQPCTs1GLEUHwiiQGT5zDdyLiRM/l1G0Pv54gvtKIg==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "openbsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/sunos-x64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.7.tgz",
+      "integrity": "sha512-ikktIhFBzQNt/QDyOL580ti9+5mL/YZeUPKU2ivGtGjdTYoqz6jObj6nOMfhASpS4GU4Q/Clh1QtxWAvcYKamA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "sunos"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/win32-arm64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.7.tgz",
+      "integrity": "sha512-7yRhbHvPqSpRUV7Q20VuDwbjW5kIMwTHpptuUzV+AA46kiPze5Z7qgt6CLCK3pWFrHeNfDd1VKgyP4O+ng17CA==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/win32-ia32": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.7.tgz",
+      "integrity": "sha512-SmwKXe6VHIyZYbBLJrhOoCJRB/Z1tckzmgTLfFYOfpMAx63BJEaL9ExI8x7v0oAO3Zh6D/Oi1gVxEYr5oUCFhw==",
+      "cpu": [
+        "ia32"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/@esbuild/win32-x64": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.7.tgz",
+      "integrity": "sha512-56hiAJPhwQ1R4i+21FVF7V8kSD5zZTdHcVuRFMW0hn753vVfQN8xlx4uOPT4xoGH0Z/oVATuR82AiqSTDIpaHg==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tsx/node_modules/esbuild": {
+      "version": "0.27.7",
+      "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.7.tgz",
+      "integrity": "sha512-IxpibTjyVnmrIQo5aqNpCgoACA/dTKLTlhMHihVHhdkxKyPO1uBBthumT0rdHmcsk9uMonIWS0m4FljWzILh3w==",
+      "dev": true,
+      "hasInstallScript": true,
+      "license": "MIT",
+      "bin": {
+        "esbuild": "bin/esbuild"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "optionalDependencies": {
+        "@esbuild/aix-ppc64": "0.27.7",
+        "@esbuild/android-arm": "0.27.7",
+        "@esbuild/android-arm64": "0.27.7",
+        "@esbuild/android-x64": "0.27.7",
+        "@esbuild/darwin-arm64": "0.27.7",
+        "@esbuild/darwin-x64": "0.27.7",
+        "@esbuild/freebsd-arm64": "0.27.7",
+        "@esbuild/freebsd-x64": "0.27.7",
+        "@esbuild/linux-arm": "0.27.7",
+        "@esbuild/linux-arm64": "0.27.7",
+        "@esbuild/linux-ia32": "0.27.7",
+        "@esbuild/linux-loong64": "0.27.7",
+        "@esbuild/linux-mips64el": "0.27.7",
+        "@esbuild/linux-ppc64": "0.27.7",
+        "@esbuild/linux-riscv64": "0.27.7",
+        "@esbuild/linux-s390x": "0.27.7",
+        "@esbuild/linux-x64": "0.27.7",
+        "@esbuild/netbsd-arm64": "0.27.7",
+        "@esbuild/netbsd-x64": "0.27.7",
+        "@esbuild/openbsd-arm64": "0.27.7",
+        "@esbuild/openbsd-x64": "0.27.7",
+        "@esbuild/openharmony-arm64": "0.27.7",
+        "@esbuild/sunos-x64": "0.27.7",
+        "@esbuild/win32-arm64": "0.27.7",
+        "@esbuild/win32-ia32": "0.27.7",
+        "@esbuild/win32-x64": "0.27.7"
+      }
+    },
    "node_modules/typescript": {
      "version": "5.9.3",
      "resolved": "https://registry.npmmirror.com/typescript/-/typescript-5.9.3.tgz",
--- a/package.json
+++ b/package.json
@ -1 +1,50 @@
-{"name":"@jackchen_me/open-multi-agent","version":"0.1.0","description":"Production-grade multi-agent orchestration framework. Model-agnostic, supports team collaboration, task scheduling, and inter-agent communication.","type":"module","main":"dist/index.js","types":"dist/index.d.ts","exports":{".":{"types":"./dist/index.d.ts","import":"./dist/index.js"}},"scripts":{"build":"tsc","dev":"tsc --watch","test":"vitest run","test:watch":"vitest","lint":"tsc --noEmit","prepublishOnly":"npm run build"},"keywords":["ai","agent","multi-agent","orchestration","llm","claude","openai","ollama","mcp","tool-use","agent-framework"],"author":"","license":"MIT","engines":{"node":">=18.0.0"},"dependencies":{"@anthropic-ai/sdk":"^0.52.0","openai":"^4.73.0","zod":"^3.23.0"},"devDependencies":{"typescript":"^5.6.0","vitest":"^2.1.0","@types/node":"^22.0.0"}}
+{
+  "name":"@jackchen_me/open-multi-agent",
+  "version":"0.1.0",
+  "description":"Production-grade multi-agent orchestration framework. Model-agnostic, supports team collaboration, task scheduling, and inter-agent communication.",
+  "type":"module",
+  "main":"dist/index.js",
+  "types":"dist/index.d.ts",
+  "exports": {
+     ".": {
+       "types":"./dist/index.d.ts",
+       "import":"./dist/index.js"
+     }
+  },
+  "scripts": {
+    "build":"tsc",
+    "dev":"tsc --watch",
+    "test":"vitest run",
+    "test:watch":"vitest",
+    "lint":"tsc --noEmit",
+    "prepublishOnly":"npm run build"
+  },
+  "keywords": [
+    "ai",
+    "agent",
+    "multi-agent",
+    "orchestration",
+    "llm",
+    "claude",
+    "openai",
+    "ollama",
+    "mcp",
+    "tool-use",
+    "agent-framework"
+  ],
+  "author":"",
+  "license":"MIT",
+  "engines":{
+    "node":">=18.0.0"
+  },
+  "dependencies": {
+    "@anthropic-ai/sdk":"^0.52.0",
+    "openai":"^4.73.0",
+    "zod":"^3.23.0"
+  },
+  "devDependencies": {
+    "typescript":"^5.6.0",
+    "vitest":"^2.1.0",
+    "@types/node":"^22.0.0"
+  }
+}
--- a/src/agent/agent.ts
+++ b/src/agent/agent.ts
@ -32,10 +32,16 @@ import type {
  TokenUsage,
  ToolUseContext,
 } from '../types.js'
+import { emitTrace, generateRunId } from '../utils/trace.js'
 import type { ToolDefinition as FrameworkToolDefinition, ToolRegistry } from '../tool/framework.js'
 import type { ToolExecutor } from '../tool/executor.js'
 import { createAdapter } from '../llm/adapter.js'
-import { AgentRunner, type RunnerOptions, type RunOptions } from './runner.js'
+import { AgentRunner, type RunnerOptions, type RunOptions, type RunResult } from './runner.js'
+import {
+  buildStructuredOutputInstruction,
+  extractJSON,
+  validateOutput,
+} from './structured-output.js'

 // ---------------------------------------------------------------------------
 // Internal helpers
@ -109,11 +115,20 @@ export class Agent {
    }

    const provider = this.config.provider ?? 'anthropic'
-    const adapter = await createAdapter(provider)
+    const adapter = await createAdapter(provider, this.config.apiKey, this.config.baseURL)
+
+    // Append structured-output instructions when an outputSchema is configured.
+    let effectiveSystemPrompt = this.config.systemPrompt
+    if (this.config.outputSchema) {
+      const instruction = buildStructuredOutputInstruction(this.config.outputSchema)
+      effectiveSystemPrompt = effectiveSystemPrompt
+        ? effectiveSystemPrompt + '\n' + instruction
+        : instruction
+    }

    const runnerOptions: RunnerOptions = {
      model: this.config.model,
-      systemPrompt: this.config.systemPrompt,
+      systemPrompt: effectiveSystemPrompt,
      maxTurns: this.config.maxTurns,
      maxTokens: this.config.maxTokens,
      temperature: this.config.temperature,
@ -144,12 +159,12 @@ export class Agent {
   *
   * Use this for one-shot queries where past context is irrelevant.
   */
-  async run(prompt: string): Promise<AgentRunResult> {
+  async run(prompt: string, runOptions?: Partial<RunOptions>): Promise<AgentRunResult> {
    const messages: LLMMessage[] = [
      { role: 'user', content: [{ type: 'text', text: prompt }] },
    ]

-    return this.executeRun(messages)
+    return this.executeRun(messages, runOptions)
  }

  /**
@ -160,6 +175,7 @@ export class Agent {
   *
   * Use this for multi-turn interactions.
   */
+  // TODO(#18): accept optional RunOptions to forward trace context
  async prompt(message: string): Promise<AgentRunResult> {
    const userMessage: LLMMessage = {
      role: 'user',
@ -183,6 +199,7 @@ export class Agent {
   *
   * Like {@link run}, this does not use or update the persistent history.
   */
+  // TODO(#18): accept optional RunOptions to forward trace context
  async *stream(prompt: string): AsyncGenerator<StreamEvent> {
    const messages: LLMMessage[] = [
      { role: 'user', content: [{ type: 'text', text: prompt }] },
@ -252,33 +269,165 @@ export class Agent {
   * Shared execution path used by both `run` and `prompt`.
   * Handles state transitions and error wrapping.
   */
-  private async executeRun(messages: LLMMessage[]): Promise<AgentRunResult> {
+  private async executeRun(
+    messages: LLMMessage[],
+    callerOptions?: Partial<RunOptions>,
+  ): Promise<AgentRunResult> {
    this.transitionTo('running')

+    const agentStartMs = Date.now()
+
    try {
      const runner = await this.getRunner()
+      const internalOnMessage = (msg: LLMMessage) => {
+        this.state.messages.push(msg)
+        callerOptions?.onMessage?.(msg)
+      }
+      // Auto-generate runId when onTrace is provided but runId is missing
+      const needsRunId = callerOptions?.onTrace && !callerOptions.runId
      const runOptions: RunOptions = {
-        onMessage: msg => {
-          this.state.messages.push(msg)
-        },
+        ...callerOptions,
+        onMessage: internalOnMessage,
+        ...(needsRunId ? { runId: generateRunId() } : undefined),
      }

      const result = await runner.run(messages, runOptions)
-
      this.state.tokenUsage = addUsage(this.state.tokenUsage, result.tokenUsage)
-      this.transitionTo('completed')

-      return this.toAgentRunResult(result, true)
+      // --- Structured output validation ---
+      if (this.config.outputSchema) {
+        const validated = await this.validateStructuredOutput(
+          messages,
+          result,
+          runner,
+          runOptions,
+        )
+        this.emitAgentTrace(callerOptions, agentStartMs, validated)
+        return validated
+      }
+
+      this.transitionTo('completed')
+      const agentResult = this.toAgentRunResult(result, true)
+      this.emitAgentTrace(callerOptions, agentStartMs, agentResult)
+      return agentResult
    } catch (err) {
      const error = err instanceof Error ? err : new Error(String(err))
      this.transitionToError(error)

-      return {
+      const errorResult: AgentRunResult = {
        success: false,
        output: error.message,
        messages: [],
        tokenUsage: ZERO_USAGE,
        toolCalls: [],
+        structured: undefined,
+      }
+      this.emitAgentTrace(callerOptions, agentStartMs, errorResult)
+      return errorResult
+    }
+  }
+
+  /** Emit an `agent` trace event if `onTrace` is provided. */
+  private emitAgentTrace(
+    options: Partial<RunOptions> | undefined,
+    startMs: number,
+    result: AgentRunResult,
+  ): void {
+    if (!options?.onTrace) return
+    const endMs = Date.now()
+    emitTrace(options.onTrace, {
+      type: 'agent',
+      runId: options.runId ?? '',
+      taskId: options.taskId,
+      agent: options.traceAgent ?? this.name,
+      turns: result.messages.filter(m => m.role === 'assistant').length,
+      tokens: result.tokenUsage,
+      toolCalls: result.toolCalls.length,
+      startMs,
+      endMs,
+      durationMs: endMs - startMs,
+    })
+  }
+
+  /**
+   * Validate agent output against the configured `outputSchema`.
+   * On first validation failure, retry once with error feedback.
+   */
+  private async validateStructuredOutput(
+    originalMessages: LLMMessage[],
+    result: RunResult,
+    runner: AgentRunner,
+    runOptions: RunOptions,
+  ): Promise<AgentRunResult> {
+    const schema = this.config.outputSchema!
+
+    // First attempt
+    let firstAttemptError: unknown
+    try {
+      const parsed = extractJSON(result.output)
+      const validated = validateOutput(schema, parsed)
+      this.transitionTo('completed')
+      return this.toAgentRunResult(result, true, validated)
+    } catch (e) {
+      firstAttemptError = e
+    }
+
+    // Retry: send full context + error feedback
+    const errorMsg = firstAttemptError instanceof Error
+      ? firstAttemptError.message
+      : String(firstAttemptError)
+
+    const errorFeedbackMessage: LLMMessage = {
+      role: 'user' as const,
+      content: [{
+        type: 'text' as const,
+        text: [
+          'Your previous response did not produce valid JSON matching the required schema.',
+          '',
+          `Error: ${errorMsg}`,
+          '',
+          'Please try again. Respond with ONLY valid JSON, no other text.',
+        ].join('\n'),
+      }],
+    }
+
+    const retryMessages: LLMMessage[] = [
+      ...originalMessages,
+      ...result.messages,
+      errorFeedbackMessage,
+    ]
+
+    const retryResult = await runner.run(retryMessages, runOptions)
+    this.state.tokenUsage = addUsage(this.state.tokenUsage, retryResult.tokenUsage)
+
+    const mergedTokenUsage = addUsage(result.tokenUsage, retryResult.tokenUsage)
+    // Include the error feedback turn to maintain alternating user/assistant roles,
+    // which is required by Anthropic's API for subsequent prompt() calls.
+    const mergedMessages = [...result.messages, errorFeedbackMessage, ...retryResult.messages]
+    const mergedToolCalls = [...result.toolCalls, ...retryResult.toolCalls]
+
+    try {
+      const parsed = extractJSON(retryResult.output)
+      const validated = validateOutput(schema, parsed)
+      this.transitionTo('completed')
+      return {
+        success: true,
+        output: retryResult.output,
+        messages: mergedMessages,
+        tokenUsage: mergedTokenUsage,
+        toolCalls: mergedToolCalls,
+        structured: validated,
+      }
+    } catch {
+      // Retry also failed
+      this.transitionTo('completed')
+      return {
+        success: false,
+        output: retryResult.output,
+        messages: mergedMessages,
+        tokenUsage: mergedTokenUsage,
+        toolCalls: mergedToolCalls,
+        structured: undefined,
      }
    }
  }
@ -331,8 +480,9 @@ export class Agent {
  // -------------------------------------------------------------------------

  private toAgentRunResult(
-    result: import('./runner.js').RunResult,
+    result: RunResult,
    success: boolean,
+    structured?: unknown,
  ): AgentRunResult {
    return {
      success,
@ -340,6 +490,7 @@ export class Agent {
      messages: result.messages,
      tokenUsage: result.tokenUsage,
      toolCalls: result.toolCalls,
+      structured,
    }
  }

--- a/src/agent/pool.ts
+++ b/src/agent/pool.ts
@ -21,6 +21,7 @@
 */

 import type { AgentRunResult } from '../types.js'
+import type { RunOptions } from './runner.js'
 import type { Agent } from './agent.js'
 import { Semaphore } from '../utils/semaphore.js'

@ -123,12 +124,16 @@ export class AgentPool {
   *
   * @throws {Error} If the agent name is not found.
   */
-  async run(agentName: string, prompt: string): Promise<AgentRunResult> {
+  async run(
+    agentName: string,
+    prompt: string,
+    runOptions?: Partial<RunOptions>,
+  ): Promise<AgentRunResult> {
    const agent = this.requireAgent(agentName)

    await this.semaphore.acquire()
    try {
-      return await agent.run(prompt)
+      return await agent.run(prompt, runOptions)
    } finally {
      this.semaphore.release()
    }
@ -144,6 +149,7 @@ export class AgentPool {
   *
   * @param tasks - Array of `{ agent, prompt }` descriptors.
   */
+  // TODO(#18): accept RunOptions per task to forward trace context
  async runParallel(
    tasks: ReadonlyArray<{ readonly agent: string; readonly prompt: string }>,
  ): Promise<Map<string, AgentRunResult>> {
@ -182,6 +188,7 @@ export class AgentPool {
   *
   * @throws {Error} If the pool is empty.
   */
+  // TODO(#18): accept RunOptions to forward trace context
  async runAny(prompt: string): Promise<AgentRunResult> {
    const allAgents = this.list()
    if (allAgents.length === 0) {
--- a/src/agent/runner.ts
+++ b/src/agent/runner.ts
@ -25,7 +25,9 @@ import type {
  ToolUseContext,
  LLMAdapter,
  LLMChatOptions,
+  TraceEvent,
 } from '../types.js'
+import { emitTrace } from '../utils/trace.js'
 import type { ToolRegistry } from '../tool/framework.js'
 import type { ToolExecutor } from '../tool/executor.js'

@ -76,6 +78,14 @@ export interface RunOptions {
  readonly onToolResult?: (name: string, result: ToolResult) => void
  /** Fired after each complete {@link LLMMessage} is appended. */
  readonly onMessage?: (message: LLMMessage) => void
+  /** Trace callback for observability spans. Async callbacks are safe. */
+  readonly onTrace?: (event: TraceEvent) => void | Promise<void>
+  /** Run ID for trace correlation. */
+  readonly runId?: string
+  /** Task ID for trace correlation. */
+  readonly taskId?: string
+  /** Agent name for trace correlation (overrides RunnerOptions.agentName). */
+  readonly traceAgent?: string
 }

 /** The aggregated result returned when a full run completes. */
@ -254,7 +264,23 @@ export class AgentRunner {
        // ------------------------------------------------------------------
        // Step 1: Call the LLM and collect the full response for this turn.
        // ------------------------------------------------------------------
+        const llmStartMs = Date.now()
        const response = await this.adapter.chat(conversationMessages, baseChatOptions)
+        if (options.onTrace) {
+          const llmEndMs = Date.now()
+          emitTrace(options.onTrace, {
+            type: 'llm_call',
+            runId: options.runId ?? '',
+            taskId: options.taskId,
+            agent: options.traceAgent ?? this.options.agentName ?? 'unknown',
+            model: this.options.model,
+            turn: turns,
+            tokens: response.usage,
+            startMs: llmStartMs,
+            endMs: llmEndMs,
+            durationMs: llmEndMs - llmStartMs,
+          })
+        }

        totalUsage = addTokenUsage(totalUsage, response.usage)

@ -319,10 +345,25 @@ export class AgentRunner {
            result = { data: message, isError: true }
          }

-          const duration = Date.now() - startTime
+          const endTime = Date.now()
+          const duration = endTime - startTime

          options.onToolResult?.(block.name, result)

+          if (options.onTrace) {
+            emitTrace(options.onTrace, {
+              type: 'tool_call',
+              runId: options.runId ?? '',
+              taskId: options.taskId,
+              agent: options.traceAgent ?? this.options.agentName ?? 'unknown',
+              tool: block.name,
+              isError: result.isError ?? false,
+              startMs: startTime,
+              endMs: endTime,
+              durationMs: duration,
+            })
+          }
+
          const record: ToolCallRecord = {
            toolName: block.name,
            input: block.input,
--- a/src/agent/structured-output.ts
+++ b/src/agent/structured-output.ts
@ -0,0 +1,126 @@
+/**
+ * @fileoverview Structured output utilities for agent responses.
+ *
+ * Provides JSON extraction, Zod validation, and system-prompt injection so
+ * that agents can return typed, schema-validated output.
+ */
+
+import { type ZodSchema } from 'zod'
+import { zodToJsonSchema } from '../tool/framework.js'
+
+// ---------------------------------------------------------------------------
+// System-prompt instruction builder
+// ---------------------------------------------------------------------------
+
+/**
+ * Build a JSON-mode instruction block to append to the agent's system prompt.
+ *
+ * Converts the Zod schema to JSON Schema and formats it as a clear directive
+ * for the LLM to respond with valid JSON matching the schema.
+ */
+export function buildStructuredOutputInstruction(schema: ZodSchema): string {
+  const jsonSchema = zodToJsonSchema(schema)
+  return [
+    '',
+    '## Output Format (REQUIRED)',
+    'You MUST respond with ONLY valid JSON that conforms to the following JSON Schema.',
+    'Do NOT include any text, markdown fences, or explanation outside the JSON object.',
+    'Do NOT wrap the JSON in ```json code fences.',
+    '',
+    '```',
+    JSON.stringify(jsonSchema, null, 2),
+    '```',
+  ].join('\n')
+}
+
+// ---------------------------------------------------------------------------
+// JSON extraction
+// ---------------------------------------------------------------------------
+
+/**
+ * Attempt to extract and parse JSON from the agent's raw text output.
+ *
+ * Handles three cases in order:
+ * 1. The output is already valid JSON (ideal case)
+ * 2. The output contains a ` ```json ` fenced block
+ * 3. The output contains a bare JSON object/array (first `{`/`[` to last `}`/`]`)
+ *
+ * @throws {Error} when no valid JSON can be extracted
+ */
+export function extractJSON(raw: string): unknown {
+  const trimmed = raw.trim()
+
+  // Case 1: Direct parse
+  try {
+    return JSON.parse(trimmed)
+  } catch {
+    // Continue to fallback strategies
+  }
+
+  // Case 2a: Prefer ```json tagged fence
+  const jsonFenceMatch = trimmed.match(/```json\s*([\s\S]*?)```/)
+  if (jsonFenceMatch?.[1]) {
+    try {
+      return JSON.parse(jsonFenceMatch[1].trim())
+    } catch {
+      // Continue
+    }
+  }
+
+  // Case 2b: Fall back to bare ``` fence
+  const bareFenceMatch = trimmed.match(/```\s*([\s\S]*?)```/)
+  if (bareFenceMatch?.[1]) {
+    try {
+      return JSON.parse(bareFenceMatch[1].trim())
+    } catch {
+      // Continue
+    }
+  }
+
+  // Case 3: Find first { to last } (object)
+  const objStart = trimmed.indexOf('{')
+  const objEnd = trimmed.lastIndexOf('}')
+  if (objStart !== -1 && objEnd > objStart) {
+    try {
+      return JSON.parse(trimmed.slice(objStart, objEnd + 1))
+    } catch {
+      // Fall through
+    }
+  }
+
+  // Case 3b: Find first [ to last ] (array)
+  const arrStart = trimmed.indexOf('[')
+  const arrEnd = trimmed.lastIndexOf(']')
+  if (arrStart !== -1 && arrEnd > arrStart) {
+    try {
+      return JSON.parse(trimmed.slice(arrStart, arrEnd + 1))
+    } catch {
+      // Fall through
+    }
+  }
+
+  throw new Error(
+    `Failed to extract JSON from output. Raw output begins with: "${trimmed.slice(0, 100)}"`,
+  )
+}
+
+// ---------------------------------------------------------------------------
+// Zod validation
+// ---------------------------------------------------------------------------
+
+/**
+ * Validate a parsed JSON value against a Zod schema.
+ *
+ * @returns The validated (and potentially transformed) value on success.
+ * @throws {Error} with a human-readable Zod error message on failure.
+ */
+export function validateOutput(schema: ZodSchema, data: unknown): unknown {
+  const result = schema.safeParse(data)
+  if (result.success) {
+    return result.data
+  }
+  const issues = result.error.issues
+    .map(issue => `  - ${issue.path.length > 0 ? issue.path.join('.') : '(root)'}: ${issue.message}`)
+    .join('\n')
+  throw new Error(`Output validation failed:\n${issues}`)
+}
--- a/src/index.ts
+++ b/src/index.ts
@ -54,7 +54,7 @@
 // Orchestrator (primary entry point)
 // ---------------------------------------------------------------------------

-export { OpenMultiAgent } from './orchestrator/orchestrator.js'
+export { OpenMultiAgent, executeWithRetry, computeRetryDelay } from './orchestrator/orchestrator.js'
 export { Scheduler } from './orchestrator/scheduler.js'
 export type { SchedulingStrategy } from './orchestrator/scheduler.js'

@ -63,6 +63,7 @@ export type { SchedulingStrategy } from './orchestrator/scheduler.js'
 // ---------------------------------------------------------------------------

 export { Agent } from './agent/agent.js'
+export { buildStructuredOutputInstruction, extractJSON, validateOutput } from './agent/structured-output.js'
 export { AgentPool, Semaphore } from './agent/pool.js'
 export type { PoolStatus } from './agent/pool.js'

@ -160,7 +161,18 @@ export type {
  OrchestratorConfig,
  OrchestratorEvent,

+  // Trace
+  TraceEventType,
+  TraceEventBase,
+  TraceEvent,
+  LLMCallTrace,
+  ToolCallTrace,
+  TaskTrace,
+  AgentTrace,
+
  // Memory
  MemoryEntry,
  MemoryStore,
 } from './types.js'
+
+export { generateRunId } from './utils/trace.js'
--- a/src/llm/adapter.ts
+++ b/src/llm/adapter.ts
@ -39,6 +39,7 @@ import type { LLMAdapter } from '../types.js'
 * directly and bypassing this factory.
 */
 export type SupportedProvider = 'anthropic' | 'openai' | 'ollama'
+export type SupportedProvider = 'anthropic' | 'copilot' | 'openai'

 /**
 * Instantiate the appropriate {@link LLMAdapter} for the given provider.
@ -46,26 +47,41 @@ export type SupportedProvider = 'anthropic' | 'openai' | 'ollama'
 * API keys fall back to the standard environment variables
 * (`ANTHROPIC_API_KEY` / `OPENAI_API_KEY`) when not supplied explicitly.
 * Ollama uses `OLLAMA_BASE_URL` (defaults to http://localhost:11434).
+ * API keys fall back to the standard environment variables when not supplied
+ * explicitly:
+ * - `anthropic` → `ANTHROPIC_API_KEY`
+ * - `openai`    → `OPENAI_API_KEY`
+ * - `copilot`   → `GITHUB_COPILOT_TOKEN` / `GITHUB_TOKEN`, or interactive
+ *                  OAuth2 device flow if neither is set
 *
 * Adapters are imported lazily so that projects using only one provider
 * are not forced to install the SDK for the other.
 *
 * @param provider - Which LLM provider to target.
 * @param apiKey   - Optional API key override; falls back to env var.
+ * @param baseURL  - Optional base URL for OpenAI-compatible APIs (Ollama, vLLM, etc.).
 * @throws {Error} When the provider string is not recognised.
 */
 export async function createAdapter(
  provider: SupportedProvider,
  apiKey?: string,
+  baseURL?: string,
 ): Promise<LLMAdapter> {
  switch (provider) {
    case 'anthropic': {
      const { AnthropicAdapter } = await import('./anthropic.js')
-      return new AnthropicAdapter(apiKey)
+      return new AnthropicAdapter(apiKey, baseURL)
+    }
+    case 'copilot': {
+      if (baseURL) {
+        console.warn('[open-multi-agent] baseURL is not supported for the copilot provider and will be ignored.')
+      }
+      const { CopilotAdapter } = await import('./copilot.js')
+      return new CopilotAdapter(apiKey)
    }
    case 'openai': {
      const { OpenAIAdapter } = await import('./openai.js')
-      return new OpenAIAdapter(apiKey)
+      return new OpenAIAdapter(apiKey, baseURL)
    }
    case 'ollama': {
      const { OllamaAdapter } = await import('./ollama.js')
--- a/src/llm/anthropic.ts
+++ b/src/llm/anthropic.ts
@ -189,9 +189,10 @@ export class AnthropicAdapter implements LLMAdapter {

  readonly #client: Anthropic

-  constructor(apiKey?: string) {
+  constructor(apiKey?: string, baseURL?: string) {
    this.#client = new Anthropic({
      apiKey: apiKey ?? process.env['ANTHROPIC_API_KEY'],
+      baseURL,
    })
  }

--- a/src/llm/copilot.ts
+++ b/src/llm/copilot.ts
@ -0,0 +1,551 @@
+/**
+ * @fileoverview GitHub Copilot adapter implementing {@link LLMAdapter}.
+ *
+ * Uses the OpenAI-compatible Copilot Chat Completions endpoint at
+ * `https://api.githubcopilot.com`. Authentication requires a GitHub token
+ * which is exchanged for a short-lived Copilot session token via the
+ * internal token endpoint.
+ *
+ * API key resolution order:
+ *   1. `apiKey` constructor argument
+ *   2. `GITHUB_COPILOT_TOKEN` environment variable
+ *   3. `GITHUB_TOKEN` environment variable
+ *   4. Interactive OAuth2 device flow (prompts the user to sign in)
+ *
+ * @example
+ * ```ts
+ * import { CopilotAdapter } from './copilot.js'
+ *
+ * const adapter = new CopilotAdapter()          // uses GITHUB_COPILOT_TOKEN, falling back to GITHUB_TOKEN
+ * const response = await adapter.chat(messages, {
+ *   model: 'claude-sonnet-4',
+ *   maxTokens: 4096,
+ * })
+ * ```
+ */
+
+import OpenAI from 'openai'
+import type {
+  ChatCompletionChunk,
+} from 'openai/resources/chat/completions/index.js'
+
+import type {
+  ContentBlock,
+  LLMAdapter,
+  LLMChatOptions,
+  LLMMessage,
+  LLMResponse,
+  LLMStreamOptions,
+  LLMToolDef,
+  StreamEvent,
+  TextBlock,
+  ToolUseBlock,
+} from '../types.js'
+
+import {
+  toOpenAITool,
+  fromOpenAICompletion,
+  normalizeFinishReason,
+  buildOpenAIMessageList,
+} from './openai-common.js'
+
+// ---------------------------------------------------------------------------
+// Copilot auth — OAuth2 device flow + token exchange
+// ---------------------------------------------------------------------------
+
+const COPILOT_TOKEN_URL = 'https://api.github.com/copilot_internal/v2/token'
+const DEVICE_CODE_URL   = 'https://github.com/login/device/code'
+const POLL_URL          = 'https://github.com/login/oauth/access_token'
+const COPILOT_CLIENT_ID = 'Iv1.b507a08c87ecfe98'
+
+const COPILOT_HEADERS: Record<string, string> = {
+  'Copilot-Integration-Id': 'vscode-chat',
+  'Editor-Version': 'vscode/1.100.0',
+  'Editor-Plugin-Version': 'copilot-chat/0.42.2',
+}
+
+interface CopilotTokenResponse {
+  token: string
+  expires_at: number
+}
+
+interface DeviceCodeResponse {
+  device_code: string
+  user_code: string
+  verification_uri: string
+  interval: number
+  expires_in: number
+}
+
+interface PollResponse {
+  access_token?: string
+  error?: string
+  error_description?: string
+}
+
+/**
+ * Callback invoked when the OAuth2 device flow needs the user to authorize.
+ * Receives the verification URI and user code. If not provided, defaults to
+ * printing them to stdout.
+ */
+export type DeviceCodeCallback = (verificationUri: string, userCode: string) => void
+
+const defaultDeviceCodeCallback: DeviceCodeCallback = (uri, code) => {
+  console.log(`\n┌─────────────────────────────────────────────┐`)
+  console.log(`│  GitHub Copilot — Sign in                    │`)
+  console.log(`│                                              │`)
+  console.log(`│  Open:  ${uri.padEnd(35)}│`)
+  console.log(`│  Code:  ${code.padEnd(35)}│`)
+  console.log(`└─────────────────────────────────────────────┘\n`)
+}
+
+/**
+ * Start the GitHub OAuth2 device code flow with the Copilot client ID.
+ *
+ * Calls `onDeviceCode` with the verification URI and user code, then polls
+ * until the user completes authorization. Returns a GitHub OAuth token
+ * scoped for Copilot access.
+ */
+async function deviceCodeLogin(onDeviceCode: DeviceCodeCallback): Promise<string> {
+  // Step 1: Request a device code
+  const codeRes = await fetch(DEVICE_CODE_URL, {
+    method: 'POST',
+    headers: {
+      Accept: 'application/json',
+      'Content-Type': 'application/x-www-form-urlencoded',
+    },
+    body: new URLSearchParams({ client_id: COPILOT_CLIENT_ID, scope: 'copilot' }),
+  })
+
+  if (!codeRes.ok) {
+    const body = await codeRes.text().catch(() => '')
+    throw new Error(`Device code request failed (${codeRes.status}): ${body}`)
+  }
+
+  const codeData = (await codeRes.json()) as DeviceCodeResponse
+
+  // Step 2: Prompt the user via callback
+  onDeviceCode(codeData.verification_uri, codeData.user_code)
+
+  // Step 3: Poll for the user to complete auth
+  const interval = (codeData.interval || 5) * 1000
+  const deadline = Date.now() + codeData.expires_in * 1000
+
+  while (Date.now() < deadline) {
+    await new Promise((resolve) => setTimeout(resolve, interval))
+
+    const pollRes = await fetch(POLL_URL, {
+      method: 'POST',
+      headers: {
+        Accept: 'application/json',
+        'Content-Type': 'application/x-www-form-urlencoded',
+      },
+      body: new URLSearchParams({
+        client_id: COPILOT_CLIENT_ID,
+        device_code: codeData.device_code,
+        grant_type: 'urn:ietf:params:oauth:grant-type:device_code',
+      }),
+    })
+
+    const pollData = (await pollRes.json()) as PollResponse
+
+    if (pollData.access_token) {
+      console.log('✓ Authenticated with GitHub Copilot\n')
+      return pollData.access_token
+    }
+
+    if (pollData.error === 'authorization_pending') continue
+    if (pollData.error === 'slow_down') {
+      await new Promise((resolve) => setTimeout(resolve, 5000))
+      continue
+    }
+
+    throw new Error(
+      `OAuth device flow failed: ${pollData.error} — ${pollData.error_description ?? ''}`,
+    )
+  }
+
+  throw new Error('Device code expired. Please try again.')
+}
+
+/**
+ * Exchange a GitHub OAuth token (from the Copilot device flow) for a
+ * short-lived Copilot session token.
+ *
+ * Note: the token exchange endpoint does NOT require the Copilot-specific
+ * headers (Editor-Version etc.) — only the chat completions endpoint does.
+ */
+async function fetchCopilotToken(githubToken: string): Promise<CopilotTokenResponse> {
+  const res = await fetch(COPILOT_TOKEN_URL, {
+    method: 'GET',
+    headers: {
+      Authorization: `token ${githubToken}`,
+      Accept: 'application/json',
+      'User-Agent': 'GitHubCopilotChat/0.28.0',
+    },
+  })
+
+  if (!res.ok) {
+    const body = await res.text().catch(() => '')
+    throw new Error(
+      `Copilot token exchange failed (${res.status}): ${body || res.statusText}`,
+    )
+  }
+
+  return (await res.json()) as CopilotTokenResponse
+}
+
+// ---------------------------------------------------------------------------
+// Adapter implementation
+// ---------------------------------------------------------------------------
+
+/** Options for the {@link CopilotAdapter} constructor. */
+export interface CopilotAdapterOptions {
+  /** GitHub OAuth token already scoped for Copilot. Falls back to env vars. */
+  apiKey?: string
+  /**
+   * Callback invoked when the OAuth2 device flow needs user action.
+   * Defaults to printing the verification URI and user code to stdout.
+   */
+  onDeviceCode?: DeviceCodeCallback
+}
+
+/**
+ * LLM adapter backed by the GitHub Copilot Chat Completions API.
+ *
+ * Authentication options (tried in order):
+ *   1. `apiKey` constructor arg — a GitHub OAuth token already scoped for Copilot
+ *   2. `GITHUB_COPILOT_TOKEN` env var
+ *   3. `GITHUB_TOKEN` env var
+ *   4. Interactive OAuth2 device flow
+ *
+ * The GitHub token is exchanged for a short-lived Copilot session token, which
+ * is cached and auto-refreshed.
+ *
+ * Thread-safe — a single instance may be shared across concurrent agent runs.
+ * Concurrent token refreshes are serialised via an internal mutex.
+ */
+export class CopilotAdapter implements LLMAdapter {
+  readonly name = 'copilot'
+
+  #githubToken: string | null
+  #cachedToken: string | null = null
+  #tokenExpiresAt = 0
+  #refreshPromise: Promise<string> | null = null
+  readonly #onDeviceCode: DeviceCodeCallback
+
+  constructor(apiKeyOrOptions?: string | CopilotAdapterOptions) {
+    const opts = typeof apiKeyOrOptions === 'string'
+      ? { apiKey: apiKeyOrOptions }
+      : apiKeyOrOptions ?? {}
+
+    this.#githubToken = opts.apiKey
+      ?? process.env['GITHUB_COPILOT_TOKEN']
+      ?? process.env['GITHUB_TOKEN']
+      ?? null
+    this.#onDeviceCode = opts.onDeviceCode ?? defaultDeviceCodeCallback
+  }
+
+  /**
+   * Return a valid Copilot session token, refreshing if necessary.
+   * If no GitHub token is available, triggers the interactive device flow.
+   * Concurrent calls share a single in-flight refresh to avoid races.
+   */
+  async #getSessionToken(): Promise<string> {
+    const now = Math.floor(Date.now() / 1000)
+    if (this.#cachedToken && this.#tokenExpiresAt - 60 > now) {
+      return this.#cachedToken
+    }
+
+    // If another call is already refreshing, piggyback on that promise
+    if (this.#refreshPromise) {
+      return this.#refreshPromise
+    }
+
+    this.#refreshPromise = this.#doRefresh()
+    try {
+      return await this.#refreshPromise
+    } finally {
+      this.#refreshPromise = null
+    }
+  }
+
+  async #doRefresh(): Promise<string> {
+    if (!this.#githubToken) {
+      this.#githubToken = await deviceCodeLogin(this.#onDeviceCode)
+    }
+
+    const resp = await fetchCopilotToken(this.#githubToken)
+    this.#cachedToken = resp.token
+    this.#tokenExpiresAt = resp.expires_at
+    return resp.token
+  }
+
+  /** Build a short-lived OpenAI client pointed at the Copilot endpoint. */
+  async #createClient(): Promise<OpenAI> {
+    const sessionToken = await this.#getSessionToken()
+    return new OpenAI({
+      apiKey: sessionToken,
+      baseURL: 'https://api.githubcopilot.com',
+      defaultHeaders: COPILOT_HEADERS,
+    })
+  }
+
+  // -------------------------------------------------------------------------
+  // chat()
+  // -------------------------------------------------------------------------
+
+  async chat(messages: LLMMessage[], options: LLMChatOptions): Promise<LLMResponse> {
+    const client = await this.#createClient()
+    const openAIMessages = buildOpenAIMessageList(messages, options.systemPrompt)
+
+    const completion = await client.chat.completions.create(
+      {
+        model: options.model,
+        messages: openAIMessages,
+        max_tokens: options.maxTokens,
+        temperature: options.temperature,
+        tools: options.tools ? options.tools.map(toOpenAITool) : undefined,
+        stream: false,
+      },
+      {
+        signal: options.abortSignal,
+      },
+    )
+
+    return fromOpenAICompletion(completion)
+  }
+
+  // -------------------------------------------------------------------------
+  // stream()
+  // -------------------------------------------------------------------------
+
+  async *stream(
+    messages: LLMMessage[],
+    options: LLMStreamOptions,
+  ): AsyncIterable<StreamEvent> {
+    const client = await this.#createClient()
+    const openAIMessages = buildOpenAIMessageList(messages, options.systemPrompt)
+
+    const streamResponse = await client.chat.completions.create(
+      {
+        model: options.model,
+        messages: openAIMessages,
+        max_tokens: options.maxTokens,
+        temperature: options.temperature,
+        tools: options.tools ? options.tools.map(toOpenAITool) : undefined,
+        stream: true,
+        stream_options: { include_usage: true },
+      },
+      {
+        signal: options.abortSignal,
+      },
+    )
+
+    let completionId = ''
+    let completionModel = ''
+    let finalFinishReason: string = 'stop'
+    let inputTokens = 0
+    let outputTokens = 0
+    const toolCallBuffers = new Map<
+      number,
+      { id: string; name: string; argsJson: string }
+    >()
+    let fullText = ''
+
+    try {
+      for await (const chunk of streamResponse) {
+        completionId = chunk.id
+        completionModel = chunk.model
+
+        if (chunk.usage !== null && chunk.usage !== undefined) {
+          inputTokens = chunk.usage.prompt_tokens
+          outputTokens = chunk.usage.completion_tokens
+        }
+
+        const choice: ChatCompletionChunk.Choice | undefined = chunk.choices[0]
+        if (choice === undefined) continue
+
+        const delta = choice.delta
+
+        if (delta.content !== null && delta.content !== undefined) {
+          fullText += delta.content
+          const textEvent: StreamEvent = { type: 'text', data: delta.content }
+          yield textEvent
+        }
+
+        for (const toolCallDelta of delta.tool_calls ?? []) {
+          const idx = toolCallDelta.index
+
+          if (!toolCallBuffers.has(idx)) {
+            toolCallBuffers.set(idx, {
+              id: toolCallDelta.id ?? '',
+              name: toolCallDelta.function?.name ?? '',
+              argsJson: '',
+            })
+          }
+
+          const buf = toolCallBuffers.get(idx)
+          if (buf !== undefined) {
+            if (toolCallDelta.id) buf.id = toolCallDelta.id
+            if (toolCallDelta.function?.name) buf.name = toolCallDelta.function.name
+            if (toolCallDelta.function?.arguments) {
+              buf.argsJson += toolCallDelta.function.arguments
+            }
+          }
+        }
+
+        if (choice.finish_reason !== null && choice.finish_reason !== undefined) {
+          finalFinishReason = choice.finish_reason
+        }
+      }
+
+      const finalToolUseBlocks: ToolUseBlock[] = []
+      for (const buf of toolCallBuffers.values()) {
+        let parsedInput: Record<string, unknown> = {}
+        try {
+          const parsed: unknown = JSON.parse(buf.argsJson)
+          if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) {
+            parsedInput = parsed as Record<string, unknown>
+          }
+        } catch {
+          // Malformed JSON — surface as empty object.
+        }
+
+        const toolUseBlock: ToolUseBlock = {
+          type: 'tool_use',
+          id: buf.id,
+          name: buf.name,
+          input: parsedInput,
+        }
+        finalToolUseBlocks.push(toolUseBlock)
+        const toolUseEvent: StreamEvent = { type: 'tool_use', data: toolUseBlock }
+        yield toolUseEvent
+      }
+
+      const doneContent: ContentBlock[] = []
+      if (fullText.length > 0) {
+        const textBlock: TextBlock = { type: 'text', text: fullText }
+        doneContent.push(textBlock)
+      }
+      doneContent.push(...finalToolUseBlocks)
+
+      const finalResponse: LLMResponse = {
+        id: completionId,
+        content: doneContent,
+        model: completionModel,
+        stop_reason: normalizeFinishReason(finalFinishReason),
+        usage: { input_tokens: inputTokens, output_tokens: outputTokens },
+      }
+
+      const doneEvent: StreamEvent = { type: 'done', data: finalResponse }
+      yield doneEvent
+    } catch (err) {
+      const error = err instanceof Error ? err : new Error(String(err))
+      const errorEvent: StreamEvent = { type: 'error', data: error }
+      yield errorEvent
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Premium request multipliers
+// ---------------------------------------------------------------------------
+
+/**
+ * Model metadata used for display names, context windows, and premium request
+ * multiplier lookup.
+ */
+export interface CopilotModelInfo {
+  readonly id: string
+  readonly name: string
+  readonly contextWindow: number
+}
+
+/**
+ * Return the premium-request multiplier for a Copilot model.
+ *
+ * Copilot doesn't charge per-token — instead each request costs
+ * `multiplier × 1 premium request` from the user's monthly allowance.
+ * A multiplier of 0 means the model is included at no premium cost.
+ *
+ * Based on https://docs.github.com/en/copilot/reference/ai-models/supported-models#model-multipliers
+ */
+export function getCopilotMultiplier(modelId: string): number {
+  const id = modelId.toLowerCase()
+
+  // 0x — included models
+  if (id.includes('gpt-4.1'))   return 0
+  if (id.includes('gpt-4o'))    return 0
+  if (id.includes('gpt-5-mini') || id.includes('gpt-5 mini')) return 0
+  if (id.includes('raptor'))    return 0
+  if (id.includes('goldeneye')) return 0
+
+  // 0.25x
+  if (id.includes('grok'))      return 0.25
+
+  // 0.33x
+  if (id.includes('claude-haiku'))                             return 0.33
+  if (id.includes('gemini-3-flash') || id.includes('gemini-3.0-flash')) return 0.33
+  if (id.includes('gpt-5.1-codex-mini'))                      return 0.33
+  if (id.includes('gpt-5.4-mini') || id.includes('gpt-5.4 mini')) return 0.33
+
+  // 1x — standard premium
+  if (id.includes('claude-sonnet'))  return 1
+  if (id.includes('gemini-2.5-pro')) return 1
+  if (id.includes('gemini-3-pro') || id.includes('gemini-3.0-pro')) return 1
+  if (id.includes('gemini-3.1-pro')) return 1
+  if (id.includes('gpt-5.1'))       return 1
+  if (id.includes('gpt-5.2'))       return 1
+  if (id.includes('gpt-5.3'))       return 1
+  if (id.includes('gpt-5.4'))       return 1
+
+  // 30x — fast opus
+  if (id.includes('claude-opus') && id.includes('fast')) return 30
+
+  // 3x — opus
+  if (id.includes('claude-opus'))    return 3
+
+  return 1
+}
+
+/**
+ * Human-readable string describing the premium-request cost for a model.
+ *
+ * Examples: `"included (0×)"`, `"1× premium request"`, `"0.33× premium request"`
+ */
+export function formatCopilotMultiplier(multiplier: number): string {
+  if (multiplier === 0) return 'included (0×)'
+  if (Number.isInteger(multiplier)) return `${multiplier}× premium request`
+  return `${multiplier}× premium request`
+}
+
+/** Known model metadata for Copilot-available models. */
+export const COPILOT_MODELS: readonly CopilotModelInfo[] = [
+  { id: 'gpt-4.1',             name: 'GPT-4.1',                  contextWindow: 128_000  },
+  { id: 'gpt-4o',              name: 'GPT-4o',                   contextWindow: 128_000  },
+  { id: 'gpt-5-mini',          name: 'GPT-5 mini',               contextWindow: 200_000  },
+  { id: 'gpt-5.1',             name: 'GPT-5.1',                  contextWindow: 200_000  },
+  { id: 'gpt-5.1-codex',       name: 'GPT-5.1-Codex',            contextWindow: 200_000  },
+  { id: 'gpt-5.1-codex-mini',  name: 'GPT-5.1-Codex-Mini',       contextWindow: 200_000  },
+  { id: 'gpt-5.1-codex-max',   name: 'GPT-5.1-Codex-Max',        contextWindow: 200_000  },
+  { id: 'gpt-5.2',             name: 'GPT-5.2',                  contextWindow: 200_000  },
+  { id: 'gpt-5.2-codex',       name: 'GPT-5.2-Codex',            contextWindow: 200_000  },
+  { id: 'gpt-5.3-codex',       name: 'GPT-5.3-Codex',            contextWindow: 200_000  },
+  { id: 'gpt-5.4',             name: 'GPT-5.4',                  contextWindow: 200_000  },
+  { id: 'gpt-5.4-mini',        name: 'GPT-5.4 mini',             contextWindow: 200_000  },
+  { id: 'claude-haiku-4.5',    name: 'Claude Haiku 4.5',          contextWindow: 200_000  },
+  { id: 'claude-opus-4.5',     name: 'Claude Opus 4.5',           contextWindow: 200_000  },
+  { id: 'claude-opus-4.6',     name: 'Claude Opus 4.6',           contextWindow: 200_000  },
+  { id: 'claude-opus-4.6-fast', name: 'Claude Opus 4.6 (fast)',   contextWindow: 200_000  },
+  { id: 'claude-sonnet-4',     name: 'Claude Sonnet 4',           contextWindow: 200_000  },
+  { id: 'claude-sonnet-4.5',   name: 'Claude Sonnet 4.5',         contextWindow: 200_000  },
+  { id: 'claude-sonnet-4.6',   name: 'Claude Sonnet 4.6',         contextWindow: 200_000  },
+  { id: 'gemini-2.5-pro',      name: 'Gemini 2.5 Pro',            contextWindow: 1_000_000 },
+  { id: 'gemini-3-flash',      name: 'Gemini 3 Flash',            contextWindow: 1_000_000 },
+  { id: 'gemini-3-pro',        name: 'Gemini 3 Pro',              contextWindow: 1_000_000 },
+  { id: 'gemini-3.1-pro',      name: 'Gemini 3.1 Pro',            contextWindow: 1_000_000 },
+  { id: 'grok-code-fast-1',    name: 'Grok Code Fast 1',          contextWindow: 128_000  },
+  { id: 'raptor-mini',         name: 'Raptor mini',               contextWindow: 128_000  },
+  { id: 'goldeneye',           name: 'Goldeneye',                 contextWindow: 128_000  },
+] as const
--- a/src/llm/openai-common.ts
+++ b/src/llm/openai-common.ts
@ -1,15 +1,16 @@
 /**
- * @fileoverview Shared OpenAI wire-format helpers for Ollama and OpenAI adapters.
+ * @fileoverview Shared OpenAI wire-format conversion helpers.
 *
- * These functions convert between the framework's internal types and the
- * OpenAI/Ollama Chat Completions wire format. Both adapters should import
- * from here rather than duplicating the conversion logic.
+ * Both the OpenAI and Copilot adapters use the OpenAI Chat Completions API
+ * format. This module contains the common conversion logic so it isn't
+ * duplicated across adapters.
 */

+import OpenAI from 'openai'
 import type {
  ChatCompletion,
-  ChatCompletionAssistantMessageParam,
  ChatCompletionChunk,
+  ChatCompletionAssistantMessageParam,
  ChatCompletionMessageParam,
  ChatCompletionMessageToolCall,
  ChatCompletionTool,
@ -27,8 +28,12 @@ import type {
  ToolUseBlock,
 } from '../types.js'

+// ---------------------------------------------------------------------------
+// Framework → OpenAI
+// ---------------------------------------------------------------------------
+
 /**
- * Convert a framework {@link LLMToolDef} to an OpenAI/Ollama {@link ChatCompletionTool}.
+ * Convert a framework {@link LLMToolDef} to an OpenAI {@link ChatCompletionTool}.
 */
 export function toOpenAITool(tool: LLMToolDef): ChatCompletionTool {
  return {
@ -43,15 +48,19 @@ export function toOpenAITool(tool: LLMToolDef): ChatCompletionTool {

 /**
 * Determine whether a framework message contains any `tool_result` content
- * blocks, which must be serialised as separate OpenAI/Ollama `tool`-role messages.
+ * blocks, which must be serialised as separate OpenAI `tool`-role messages.
 */
-export function hasToolResults(msg: LLMMessage): boolean {
+function hasToolResults(msg: LLMMessage): boolean {
  return msg.content.some((b) => b.type === 'tool_result')
 }

 /**
- * Convert a single framework {@link LLMMessage} into one or more OpenAI/Ollama
+ * Convert framework {@link LLMMessage}s into OpenAI
 * {@link ChatCompletionMessageParam} entries.
+ *
+ * `tool_result` blocks are expanded into top-level `tool`-role messages
+ * because OpenAI uses a dedicated role for tool results rather than embedding
+ * them inside user-content arrays.
 */
 export function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageParam[] {
  const result: ChatCompletionMessageParam[] = []
@ -60,6 +69,7 @@ export function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageP
    if (msg.role === 'assistant') {
      result.push(toOpenAIAssistantMessage(msg))
    } else {
+      // user role
      if (!hasToolResults(msg)) {
        result.push(toOpenAIUserMessage(msg))
      } else {
@ -85,13 +95,18 @@ export function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageP
  return result
 }

-export function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageParam {
+/**
+ * Convert a `user`-role framework message into an OpenAI user message.
+ * Image blocks are converted to the OpenAI image_url content part format.
+ */
+function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageParam {
  if (msg.content.length === 1 && msg.content[0]?.type === 'text') {
    return { role: 'user', content: msg.content[0].text }
  }

  const parts: Array<{ type: 'text', text: string } | { type: 'image_url', image_url: { url: string } }> = []
-
+  type ContentPart = OpenAI.Chat.ChatCompletionContentPartText | OpenAI.Chat.ChatCompletionContentPartImage
+  
  for (const block of msg.content) {
    if (block.type === 'text') {
      parts.push({ type: 'text', text: block.text })
@ -103,12 +118,17 @@ export function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageP
        },
      })
    }
+    // tool_result blocks are handled by the caller (toOpenAIMessages); skip here.
  }

  return { role: 'user', content: parts }
 }

-export function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessageParam {
+/**
+ * Convert an `assistant`-role framework message into an OpenAI assistant message.
+ * `tool_use` blocks become `tool_calls`; `text` blocks become message content.
+ */
+function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessageParam {
  const toolCalls: ChatCompletionMessageToolCall[] = []
  const textParts: string[] = []

@ -139,8 +159,15 @@ export function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssista
  return assistantMsg
 }

+// ---------------------------------------------------------------------------
+// OpenAI → Framework
+// ---------------------------------------------------------------------------
+
 /**
- * Convert an OpenAI/Ollama {@link ChatCompletion} into a framework {@link LLMResponse}.
+ * Convert an OpenAI {@link ChatCompletion} into a framework {@link LLMResponse}.
+ *
+ * Takes only the first choice (index 0), consistent with how the framework
+ * is designed for single-output agents.
 */
 export function fromOpenAICompletion(completion: ChatCompletion): LLMResponse {
  const choice = completion.choices[0]
@ -191,8 +218,15 @@ export function fromOpenAICompletion(completion: ChatCompletion): LLMResponse {
 }

 /**
- * Normalize an OpenAI/Ollama `finish_reason` string to the framework's canonical
+ * Normalize an OpenAI `finish_reason` string to the framework's canonical
 * stop-reason vocabulary.
+ *
+ * Mapping:
+ * - `'stop'`           → `'end_turn'`
+ * - `'tool_calls'`     → `'tool_use'`
+ * - `'length'`         → `'max_tokens'`
+ * - `'content_filter'` → `'content_filter'`
+ * - anything else      → passed through unchanged
 */
 export function normalizeFinishReason(reason: string): string {
  switch (reason) {
--- a/src/llm/openai.ts
+++ b/src/llm/openai.ts
@ -32,14 +32,7 @@

 import OpenAI from 'openai'
 import type {
-  ChatCompletion,
-  ChatCompletionAssistantMessageParam,
  ChatCompletionChunk,
-  ChatCompletionMessageParam,
-  ChatCompletionMessageToolCall,
-  ChatCompletionTool,
-  ChatCompletionToolMessageParam,
-  ChatCompletionUserMessageParam,
 } from 'openai/resources/chat/completions/index.js'

 import type {
@ -55,231 +48,12 @@ import type {
  ToolUseBlock,
 } from '../types.js'

-// ---------------------------------------------------------------------------
-// Internal helpers — framework → OpenAI
-// ---------------------------------------------------------------------------
-
-/**
- * Convert a framework {@link LLMToolDef} to an OpenAI {@link ChatCompletionTool}.
- *
- * OpenAI wraps the function definition inside a `function` key and a `type`
- * discriminant. The `inputSchema` is already a JSON Schema object.
- */
-function toOpenAITool(tool: LLMToolDef): ChatCompletionTool {
-  return {
-    type: 'function',
-    function: {
-      name: tool.name,
-      description: tool.description,
-      parameters: tool.inputSchema as Record<string, unknown>,
-    },
-  }
-}
-
-/**
- * Determine whether a framework message contains any `tool_result` content
- * blocks, which must be serialised as separate OpenAI `tool`-role messages.
- */
-function hasToolResults(msg: LLMMessage): boolean {
-  return msg.content.some((b) => b.type === 'tool_result')
-}
-
-/**
- * Convert a single framework {@link LLMMessage} into one or more OpenAI
- * {@link ChatCompletionMessageParam} entries.
- *
- * The expansion is necessary because OpenAI represents tool results as
- * top-level messages with role `tool`, whereas in our model they are content
- * blocks inside a `user` message.
- *
- * Expansion rules:
- * - A `user` message containing only text/image blocks → single user message
- * - A `user` message containing `tool_result` blocks → one `tool` message per
- *   tool_result block; any remaining text/image blocks are folded into an
- *   additional user message prepended to the group
- * - An `assistant` message → single assistant message with optional tool_calls
- */
-function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageParam[] {
-  const result: ChatCompletionMessageParam[] = []
-
-  for (const msg of messages) {
-    if (msg.role === 'assistant') {
-      result.push(toOpenAIAssistantMessage(msg))
-    } else {
-      // user role
-      if (!hasToolResults(msg)) {
-        result.push(toOpenAIUserMessage(msg))
-      } else {
-        // Split: text/image blocks become a user message (if any exist), then
-        // each tool_result block becomes an independent tool message.
-        const nonToolBlocks = msg.content.filter((b) => b.type !== 'tool_result')
-        if (nonToolBlocks.length > 0) {
-          result.push(toOpenAIUserMessage({ role: 'user', content: nonToolBlocks }))
-        }
-
-        for (const block of msg.content) {
-          if (block.type === 'tool_result') {
-            const toolMsg: ChatCompletionToolMessageParam = {
-              role: 'tool',
-              tool_call_id: block.tool_use_id,
-              content: block.content,
-            }
-            result.push(toolMsg)
-          }
-        }
-      }
-    }
-  }
-
-  return result
-}
-
-/**
- * Convert a `user`-role framework message into an OpenAI user message.
- * Image blocks are converted to the OpenAI image_url content part format.
- */
-function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageParam {
-  // If the entire content is a single text block, use the compact string form
-  // to keep the request payload smaller.
-  if (msg.content.length === 1 && msg.content[0]?.type === 'text') {
-    return { role: 'user', content: msg.content[0].text }
-  }
-
-  type ContentPart = OpenAI.Chat.ChatCompletionContentPartText | OpenAI.Chat.ChatCompletionContentPartImage
-  const parts: ContentPart[] = []
-
-  for (const block of msg.content) {
-    if (block.type === 'text') {
-      parts.push({ type: 'text', text: block.text })
-    } else if (block.type === 'image') {
-      parts.push({
-        type: 'image_url',
-        image_url: {
-          url: `data:${block.source.media_type};base64,${block.source.data}`,
-        },
-      })
-    }
-    // tool_result blocks are handled by the caller (toOpenAIMessages); skip here.
-  }
-
-  return { role: 'user', content: parts }
-}
-
-/**
- * Convert an `assistant`-role framework message into an OpenAI assistant message.
- *
- * Any `tool_use` blocks become `tool_calls`; `text` blocks become the message content.
- */
-function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessageParam {
-  const toolCalls: ChatCompletionMessageToolCall[] = []
-  const textParts: string[] = []
-
-  for (const block of msg.content) {
-    if (block.type === 'tool_use') {
-      toolCalls.push({
-        id: block.id,
-        type: 'function',
-        function: {
-          name: block.name,
-          arguments: JSON.stringify(block.input),
-        },
-      })
-    } else if (block.type === 'text') {
-      textParts.push(block.text)
-    }
-  }
-
-  const assistantMsg: ChatCompletionAssistantMessageParam = {
-    role: 'assistant',
-    content: textParts.length > 0 ? textParts.join('') : null,
-  }
-
-  if (toolCalls.length > 0) {
-    assistantMsg.tool_calls = toolCalls
-  }
-
-  return assistantMsg
-}
-
-// ---------------------------------------------------------------------------
-// Internal helpers — OpenAI → framework
-// ---------------------------------------------------------------------------
-
-/**
- * Convert an OpenAI {@link ChatCompletion} into a framework {@link LLMResponse}.
- *
- * We take only the first choice (index 0), consistent with how the framework
- * is designed for single-output agents.
- */
-function fromOpenAICompletion(completion: ChatCompletion): LLMResponse {
-  const choice = completion.choices[0]
-  if (choice === undefined) {
-    throw new Error('OpenAI returned a completion with no choices')
-  }
-
-  const content: ContentBlock[] = []
-  const message = choice.message
-
-  if (message.content !== null && message.content !== undefined) {
-    const textBlock: TextBlock = { type: 'text', text: message.content }
-    content.push(textBlock)
-  }
-
-  for (const toolCall of message.tool_calls ?? []) {
-    let parsedInput: Record<string, unknown> = {}
-    try {
-      const parsed: unknown = JSON.parse(toolCall.function.arguments)
-      if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) {
-        parsedInput = parsed as Record<string, unknown>
-      }
-    } catch {
-      // Malformed arguments from the model — surface as empty object.
-    }
-
-    const toolUseBlock: ToolUseBlock = {
-      type: 'tool_use',
-      id: toolCall.id,
-      name: toolCall.function.name,
-      input: parsedInput,
-    }
-    content.push(toolUseBlock)
-  }
-
-  const stopReason = normalizeFinishReason(choice.finish_reason ?? 'stop')
-
-  return {
-    id: completion.id,
-    content,
-    model: completion.model,
-    stop_reason: stopReason,
-    usage: {
-      input_tokens: completion.usage?.prompt_tokens ?? 0,
-      output_tokens: completion.usage?.completion_tokens ?? 0,
-    },
-  }
-}
-
-/**
- * Normalize an OpenAI `finish_reason` string to the framework's canonical
- * stop-reason vocabulary so consumers never need to branch on provider-specific
- * strings.
- *
- * Mapping:
- * - `'stop'`           → `'end_turn'`
- * - `'tool_calls'`     → `'tool_use'`
- * - `'length'`         → `'max_tokens'`
- * - `'content_filter'` → `'content_filter'`
- * - anything else      → passed through unchanged
- */
-function normalizeFinishReason(reason: string): string {
-  switch (reason) {
-    case 'stop':           return 'end_turn'
-    case 'tool_calls':     return 'tool_use'
-    case 'length':         return 'max_tokens'
-    case 'content_filter': return 'content_filter'
-    default:               return reason
-  }
-}
+import {
+  toOpenAITool,
+  fromOpenAICompletion,
+  normalizeFinishReason,
+  buildOpenAIMessageList,
+} from './openai-common.js'

 // ---------------------------------------------------------------------------
 // Adapter implementation
@ -295,9 +69,10 @@ export class OpenAIAdapter implements LLMAdapter {

  readonly #client: OpenAI

-  constructor(apiKey?: string) {
+  constructor(apiKey?: string, baseURL?: string) {
    this.#client = new OpenAI({
      apiKey: apiKey ?? process.env['OPENAI_API_KEY'],
+      baseURL,
    })
  }

@ -484,31 +259,6 @@ export class OpenAIAdapter implements LLMAdapter {
  }
 }

-// ---------------------------------------------------------------------------
-// Private utility
-// ---------------------------------------------------------------------------
-
-/**
- * Prepend a system message when `systemPrompt` is provided, then append the
- * converted conversation messages.
- *
- * OpenAI represents system instructions as a message with `role: 'system'`
- * at the top of the array, not as a separate API parameter.
- */
-function buildOpenAIMessageList(
-  messages: LLMMessage[],
-  systemPrompt: string | undefined,
-): ChatCompletionMessageParam[] {
-  const result: ChatCompletionMessageParam[] = []
-
-  if (systemPrompt !== undefined && systemPrompt.length > 0) {
-    result.push({ role: 'system', content: systemPrompt })
-  }
-
-  result.push(...toOpenAIMessages(messages))
-  return result
-}
-
 // Re-export types that consumers of this module commonly need alongside the adapter.
 export type {
  ContentBlock,
--- a/src/orchestrator/orchestrator.ts
+++ b/src/orchestrator/orchestrator.ts
@ -52,8 +52,10 @@ import type {
  TeamRunResult,
  TokenUsage,
 } from '../types.js'
+import type { RunOptions } from '../agent/runner.js'
 import { Agent } from '../agent/agent.js'
 import { AgentPool } from '../agent/pool.js'
+import { emitTrace, generateRunId } from '../utils/trace.js'
 import { ToolRegistry } from '../tool/framework.js'
 import { ToolExecutor } from '../tool/executor.js'
 import { registerBuiltInTools } from '../tool/built-in/index.js'
@ -92,6 +94,105 @@ function buildAgent(config: AgentConfig): Agent {
  return new Agent(config, registry, executor)
 }

+/** Promise-based delay. */
+function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms))
+}
+
+/** Maximum delay cap to prevent runaway exponential backoff (30 seconds). */
+const MAX_RETRY_DELAY_MS = 30_000
+
+/**
+ * Compute the retry delay for a given attempt, capped at {@link MAX_RETRY_DELAY_MS}.
+ */
+export function computeRetryDelay(
+  baseDelay: number,
+  backoff: number,
+  attempt: number,
+): number {
+  return Math.min(baseDelay * backoff ** (attempt - 1), MAX_RETRY_DELAY_MS)
+}
+
+/**
+ * Execute an agent task with optional retry and exponential backoff.
+ *
+ * Exported for testability — called internally by {@link executeQueue}.
+ *
+ * @param run      - The function that executes the task (typically `pool.run`).
+ * @param task     - The task to execute (retry config read from its fields).
+ * @param onRetry  - Called before each retry sleep with event data.
+ * @param delayFn  - Injectable delay function (defaults to real `sleep`).
+ * @returns The final {@link AgentRunResult} from the last attempt.
+ */
+export async function executeWithRetry(
+  run: () => Promise<AgentRunResult>,
+  task: Task,
+  onRetry?: (data: { attempt: number; maxAttempts: number; error: string; nextDelayMs: number }) => void,
+  delayFn: (ms: number) => Promise<void> = sleep,
+): Promise<AgentRunResult> {
+  const rawRetries = Number.isFinite(task.maxRetries) ? task.maxRetries! : 0
+  const maxAttempts = Math.max(0, rawRetries) + 1
+  const baseDelay = Math.max(0, Number.isFinite(task.retryDelayMs) ? task.retryDelayMs! : 1000)
+  const backoff = Math.max(1, Number.isFinite(task.retryBackoff) ? task.retryBackoff! : 2)
+
+  let lastError: string = ''
+  // Accumulate token usage across all attempts so billing/observability
+  // reflects the true cost of retries.
+  let totalUsage: TokenUsage = { input_tokens: 0, output_tokens: 0 }
+
+  for (let attempt = 1; attempt <= maxAttempts; attempt++) {
+    try {
+      const result = await run()
+      totalUsage = {
+        input_tokens: totalUsage.input_tokens + result.tokenUsage.input_tokens,
+        output_tokens: totalUsage.output_tokens + result.tokenUsage.output_tokens,
+      }
+
+      if (result.success) {
+        return { ...result, tokenUsage: totalUsage }
+      }
+      lastError = result.output
+
+      // Failure — retry or give up
+      if (attempt < maxAttempts) {
+        const delay = computeRetryDelay(baseDelay, backoff, attempt)
+        onRetry?.({ attempt, maxAttempts, error: lastError, nextDelayMs: delay })
+        await delayFn(delay)
+        continue
+      }
+
+      return { ...result, tokenUsage: totalUsage }
+    } catch (err) {
+      lastError = err instanceof Error ? err.message : String(err)
+
+      if (attempt < maxAttempts) {
+        const delay = computeRetryDelay(baseDelay, backoff, attempt)
+        onRetry?.({ attempt, maxAttempts, error: lastError, nextDelayMs: delay })
+        await delayFn(delay)
+        continue
+      }
+
+      // All retries exhausted — return a failure result
+      return {
+        success: false,
+        output: lastError,
+        messages: [],
+        tokenUsage: totalUsage,
+        toolCalls: [],
+      }
+    }
+  }
+
+  // Should not be reached, but TypeScript needs a return
+  return {
+    success: false,
+    output: lastError,
+    messages: [],
+    tokenUsage: totalUsage,
+    toolCalls: [],
+  }
+}
+
 // ---------------------------------------------------------------------------
 // Parsed task spec (result of coordinator decomposition)
 // ---------------------------------------------------------------------------
@ -161,6 +262,8 @@ interface RunContext {
  readonly scheduler: Scheduler
  readonly agentResults: Map<string, AgentRunResult>
  readonly config: OrchestratorConfig
+  /** Trace run ID, present when `onTrace` is configured. */
+  readonly runId?: string
 }

 /**
@ -239,49 +342,76 @@ async function executeQueue(
      // Build the prompt: inject shared memory context + task description
      const prompt = await buildTaskPrompt(task, team)

-      try {
-        const result = await pool.run(assignee, prompt)
-        ctx.agentResults.set(`${assignee}:${task.id}`, result)
+      // Build trace context for this task's agent run
+      const traceOptions: Partial<RunOptions> | undefined = config.onTrace
+        ? { onTrace: config.onTrace, runId: ctx.runId ?? '', taskId: task.id, traceAgent: assignee }
+        : undefined

-        if (result.success) {
-          // Persist result into shared memory so other agents can read it
-          const sharedMem = team.getSharedMemoryInstance()
-          if (sharedMem) {
-            await sharedMem.write(assignee, `task:${task.id}:result`, result.output)
-          }
-
-          queue.complete(task.id, result.output)
+      const taskStartMs = config.onTrace ? Date.now() : 0
+      let retryCount = 0

+      const result = await executeWithRetry(
+        () => pool.run(assignee, prompt, traceOptions),
+        task,
+        (retryData) => {
+          retryCount++
          config.onProgress?.({
-            type: 'task_complete',
+            type: 'task_retry',
            task: task.id,
            agent: assignee,
-            data: result,
+            data: retryData,
          } satisfies OrchestratorEvent)
+        },
+      )

-          config.onProgress?.({
-            type: 'agent_complete',
-            agent: assignee,
-            task: task.id,
-            data: result,
-          } satisfies OrchestratorEvent)
-        } else {
-          queue.fail(task.id, result.output)
-          config.onProgress?.({
-            type: 'error',
-            task: task.id,
-            agent: assignee,
-            data: result,
-          } satisfies OrchestratorEvent)
+      // Emit task trace
+      if (config.onTrace) {
+        const taskEndMs = Date.now()
+        emitTrace(config.onTrace, {
+          type: 'task',
+          runId: ctx.runId ?? '',
+          taskId: task.id,
+          taskTitle: task.title,
+          agent: assignee,
+          success: result.success,
+          retries: retryCount,
+          startMs: taskStartMs,
+          endMs: taskEndMs,
+          durationMs: taskEndMs - taskStartMs,
+        })
+      }
+
+      ctx.agentResults.set(`${assignee}:${task.id}`, result)
+
+      if (result.success) {
+        // Persist result into shared memory so other agents can read it
+        const sharedMem = team.getSharedMemoryInstance()
+        if (sharedMem) {
+          await sharedMem.write(assignee, `task:${task.id}:result`, result.output)
        }
-      } catch (err) {
-        const message = err instanceof Error ? err.message : String(err)
-        queue.fail(task.id, message)
+
+        queue.complete(task.id, result.output)
+
+        config.onProgress?.({
+          type: 'task_complete',
+          task: task.id,
+          agent: assignee,
+          data: result,
+        } satisfies OrchestratorEvent)
+
+        config.onProgress?.({
+          type: 'agent_complete',
+          agent: assignee,
+          task: task.id,
+          data: result,
+        } satisfies OrchestratorEvent)
+      } else {
+        queue.fail(task.id, result.output)
        config.onProgress?.({
          type: 'error',
          task: task.id,
          agent: assignee,
-          data: err,
+          data: result,
        } satisfies OrchestratorEvent)
      }
    })
@ -341,8 +471,8 @@ async function buildTaskPrompt(task: Task, team: Team): Promise<string> {
 */
 export class OpenMultiAgent {
  private readonly config: Required<
-    Omit<OrchestratorConfig, 'onProgress'>
-  > & Pick<OrchestratorConfig, 'onProgress'>
+    Omit<OrchestratorConfig, 'onProgress' | 'onTrace' | 'defaultBaseURL' | 'defaultApiKey'>
+  > & Pick<OrchestratorConfig, 'onProgress' | 'onTrace' | 'defaultBaseURL' | 'defaultApiKey'>

  private readonly teams: Map<string, Team> = new Map()
  private completedTaskCount = 0
@ -360,7 +490,10 @@ export class OpenMultiAgent {
      maxConcurrency: config.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY,
      defaultModel: config.defaultModel ?? DEFAULT_MODEL,
      defaultProvider: config.defaultProvider ?? 'anthropic',
+      defaultBaseURL: config.defaultBaseURL,
+      defaultApiKey: config.defaultApiKey,
      onProgress: config.onProgress,
+      onTrace: config.onTrace,
    }
  }

@ -405,14 +538,24 @@ export class OpenMultiAgent {
   * @param prompt - The user prompt to send.
   */
  async runAgent(config: AgentConfig, prompt: string): Promise<AgentRunResult> {
-    const agent = buildAgent(config)
+    const effective: AgentConfig = {
+      ...config,
+      provider: config.provider ?? this.config.defaultProvider,
+      baseURL: config.baseURL ?? this.config.defaultBaseURL,
+      apiKey: config.apiKey ?? this.config.defaultApiKey,
+    }
+    const agent = buildAgent(effective)
    this.config.onProgress?.({
      type: 'agent_start',
      agent: config.name,
      data: { prompt },
    })

-    const result = await agent.run(prompt)
+    const traceOptions: Partial<RunOptions> | undefined = this.config.onTrace
+      ? { onTrace: this.config.onTrace, runId: generateRunId(), traceAgent: config.name }
+      : undefined
+
+    const result = await agent.run(prompt, traceOptions)

    this.config.onProgress?.({
      type: 'agent_complete',
@ -462,12 +605,15 @@ export class OpenMultiAgent {
      name: 'coordinator',
      model: this.config.defaultModel,
      provider: this.config.defaultProvider,
+      baseURL: this.config.defaultBaseURL,
+      apiKey: this.config.defaultApiKey,
      systemPrompt: this.buildCoordinatorSystemPrompt(agentConfigs),
      maxTurns: 3,
    }

    const decompositionPrompt = this.buildDecompositionPrompt(goal, agentConfigs)
    const coordinatorAgent = buildAgent(coordinatorConfig)
+    const runId = this.config.onTrace ? generateRunId() : undefined

    this.config.onProgress?.({
      type: 'agent_start',
@ -475,7 +621,10 @@ export class OpenMultiAgent {
      data: { phase: 'decomposition', goal },
    })

-    const decompositionResult = await coordinatorAgent.run(decompositionPrompt)
+    const decompTraceOptions: Partial<RunOptions> | undefined = this.config.onTrace
+      ? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator' }
+      : undefined
+    const decompositionResult = await coordinatorAgent.run(decompositionPrompt, decompTraceOptions)
    const agentResults = new Map<string, AgentRunResult>()
    agentResults.set('coordinator:decompose', decompositionResult)

@ -519,6 +668,7 @@ export class OpenMultiAgent {
      scheduler,
      agentResults,
      config: this.config,
+      runId,
    }

    await executeQueue(queue, ctx)
@ -527,7 +677,10 @@ export class OpenMultiAgent {
    // Step 5: Coordinator synthesises final result
    // ------------------------------------------------------------------
    const synthesisPrompt = await this.buildSynthesisPrompt(goal, queue.list(), team)
-    const synthesisResult = await coordinatorAgent.run(synthesisPrompt)
+    const synthTraceOptions: Partial<RunOptions> | undefined = this.config.onTrace
+      ? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator' }
+      : undefined
+    const synthesisResult = await coordinatorAgent.run(synthesisPrompt, synthTraceOptions)
    agentResults.set('coordinator', synthesisResult)

    this.config.onProgress?.({
@ -564,6 +717,9 @@ export class OpenMultiAgent {
      description: string
      assignee?: string
      dependsOn?: string[]
+      maxRetries?: number
+      retryDelayMs?: number
+      retryBackoff?: number
    }>,
  ): Promise<TeamRunResult> {
    const agentConfigs = team.getAgents()
@ -576,6 +732,9 @@ export class OpenMultiAgent {
        description: t.description,
        assignee: t.assignee,
        dependsOn: t.dependsOn,
+        maxRetries: t.maxRetries,
+        retryDelayMs: t.retryDelayMs,
+        retryBackoff: t.retryBackoff,
      })),
      agentConfigs,
      queue,
@ -591,6 +750,7 @@ export class OpenMultiAgent {
      scheduler,
      agentResults,
      config: this.config,
+      runId: this.config.onTrace ? generateRunId() : undefined,
    }

    await executeQueue(queue, ctx)
@ -733,7 +893,11 @@ export class OpenMultiAgent {
   * then resolving them to real IDs before adding tasks to the queue.
   */
  private loadSpecsIntoQueue(
-    specs: ReadonlyArray<ParsedTaskSpec>,
+    specs: ReadonlyArray<ParsedTaskSpec & {
+      maxRetries?: number
+      retryDelayMs?: number
+      retryBackoff?: number
+    }>,
    agentConfigs: AgentConfig[],
    queue: TaskQueue,
  ): void {
@ -750,6 +914,9 @@ export class OpenMultiAgent {
        assignee: spec.assignee && agentNames.has(spec.assignee)
          ? spec.assignee
          : undefined,
+        maxRetries: spec.maxRetries,
+        retryDelayMs: spec.retryDelayMs,
+        retryBackoff: spec.retryBackoff,
      })
      titleToId.set(spec.title.toLowerCase().trim(), task.id)
      createdTasks.push(task)
@ -792,6 +959,8 @@ export class OpenMultiAgent {
        ...config,
        model: config.model,
        provider: config.provider ?? this.config.defaultProvider,
+        baseURL: config.baseURL ?? this.config.defaultBaseURL,
+        apiKey: config.apiKey ?? this.config.defaultApiKey,
      }
      pool.add(buildAgent(effective))
    }
@ -825,13 +994,15 @@ export class OpenMultiAgent {
      if (!existing) {
        collapsed.set(agentName, result)
      } else {
-        // Merge multiple results for the same agent (multi-task case)
+        // Merge multiple results for the same agent (multi-task case).
+        // Keep the latest `structured` value (last completed task wins).
        collapsed.set(agentName, {
          success: existing.success && result.success,
          output: [existing.output, result.output].filter(Boolean).join('\n\n---\n\n'),
          messages: [...existing.messages, ...result.messages],
          tokenUsage: addUsage(existing.tokenUsage, result.tokenUsage),
          toolCalls: [...existing.toolCalls, ...result.toolCalls],
+          structured: result.structured !== undefined ? result.structured : existing.structured,
        })
      }

--- a/src/task/queue.ts
+++ b/src/task/queue.ts
@ -356,7 +356,7 @@ export class TaskQueue {

      // Re-check against the current state of the whole task set.
      // Pass the pre-built map to avoid rebuilding it for every candidate task.
-      if (isTaskReady(task, allTasks, taskById)) {
+      if (isTaskReady({ ...task, status: 'pending' }, allTasks, taskById)) {
        const unblocked: Task = {
          ...task,
          status: 'pending',
--- a/src/task/task.ts
+++ b/src/task/task.ts
@ -6,6 +6,7 @@
 * Stateful orchestration belongs in {@link TaskQueue}.
 */

+import { randomUUID } from 'node:crypto'
 import type { Task, TaskStatus } from '../types.js'

 // ---------------------------------------------------------------------------
@ -30,10 +31,13 @@ export function createTask(input: {
  description: string
  assignee?: string
  dependsOn?: string[]
+  maxRetries?: number
+  retryDelayMs?: number
+  retryBackoff?: number
 }): Task {
  const now = new Date()
  return {
-    id: crypto.randomUUID(),
+    id: randomUUID(),
    title: input.title,
    description: input.description,
    status: 'pending' as TaskStatus,
@ -42,6 +46,9 @@ export function createTask(input: {
    result: undefined,
    createdAt: now,
    updatedAt: now,
+    maxRetries: input.maxRetries,
+    retryDelayMs: input.retryDelayMs,
+    retryBackoff: input.retryBackoff,
  }
 }

--- a/src/team/messaging.ts
+++ b/src/team/messaging.ts
@ -6,6 +6,8 @@
 * for replay and audit; read-state is tracked per recipient.
 */

+import { randomUUID } from 'node:crypto'
+
 // ---------------------------------------------------------------------------
 // Message type
 // ---------------------------------------------------------------------------
@ -93,7 +95,7 @@ export class MessageBus {
   */
  send(from: string, to: string, content: string): Message {
    const message: Message = {
-      id: crypto.randomUUID(),
+      id: randomUUID(),
      from,
      to,
      content,
--- a/src/types.ts
+++ b/src/types.ts
@ -186,13 +186,27 @@ export interface ToolDefinition<TInput = Record<string, unknown>> {
 export interface AgentConfig {
  readonly name: string
  readonly model: string
-  readonly provider?: 'anthropic' | 'ollama' | 'openai'
+  readonly provider?: 'anthropic' | 'copilot' | 'openai'
+  /**
+   * Custom base URL for OpenAI-compatible APIs (Ollama, vLLM, LM Studio, etc.).
+   * Note: local servers that don't require auth still need `apiKey` set to a
+   * non-empty placeholder (e.g. `'ollama'`) because the OpenAI SDK validates it.
+   */
+  readonly baseURL?: string
+  /** API key override; falls back to the provider's standard env var. */
+  readonly apiKey?: string
  readonly systemPrompt?: string
  /** Names of tools (from the tool registry) available to this agent. */
  readonly tools?: readonly string[]
  readonly maxTurns?: number
  readonly maxTokens?: number
  readonly temperature?: number
+  /**
+   * Optional Zod schema for structured output.  When set, the agent's final
+   * output is parsed as JSON and validated against this schema.  A single
+   * retry with error feedback is attempted on validation failure.
+   */
+  readonly outputSchema?: ZodSchema
 }

 /** Lifecycle state tracked during an agent run. */
@ -219,6 +233,12 @@ export interface AgentRunResult {
  readonly messages: LLMMessage[]
  readonly tokenUsage: TokenUsage
  readonly toolCalls: ToolCallRecord[]
+  /**
+   * Parsed and validated structured output when `outputSchema` is set on the
+   * agent config.  `undefined` when no schema is configured or validation
+   * failed after retry.
+   */
+  readonly structured?: unknown
 }

 // ---------------------------------------------------------------------------
@ -261,6 +281,12 @@ export interface Task {
  result?: string
  readonly createdAt: Date
  updatedAt: Date
+  /** Maximum number of retry attempts on failure (default: 0 — no retry). */
+  readonly maxRetries?: number
+  /** Base delay in ms before the first retry (default: 1000). */
+  readonly retryDelayMs?: number
+  /** Exponential backoff multiplier (default: 2). */
+  readonly retryBackoff?: number
 }

 // ---------------------------------------------------------------------------
@ -274,6 +300,7 @@ export interface OrchestratorEvent {
    | 'agent_complete'
    | 'task_start'
    | 'task_complete'
+    | 'task_retry'
    | 'message'
    | 'error'
  readonly agent?: string
@ -285,10 +312,72 @@ export interface OrchestratorEvent {
 export interface OrchestratorConfig {
  readonly maxConcurrency?: number
  readonly defaultModel?: string
-  readonly defaultProvider?: 'anthropic' | 'ollama' | 'openai'
-  onProgress?: (event: OrchestratorEvent) => void
+  readonly defaultProvider?: 'anthropic' | 'copilot' | 'openai'
+  readonly defaultBaseURL?: string
+  readonly defaultApiKey?: string
+  readonly onProgress?: (event: OrchestratorEvent) => void
+  readonly onTrace?: (event: TraceEvent) => void | Promise<void>
 }

+// ---------------------------------------------------------------------------
+// Trace events — lightweight observability spans
+// ---------------------------------------------------------------------------
+
+/** Trace event type discriminants. */
+export type TraceEventType = 'llm_call' | 'tool_call' | 'task' | 'agent'
+
+/** Shared fields present on every trace event. */
+export interface TraceEventBase {
+  /** Unique identifier for the entire run (runTeam / runTasks / runAgent call). */
+  readonly runId: string
+  readonly type: TraceEventType
+  /** Unix epoch ms when the span started. */
+  readonly startMs: number
+  /** Unix epoch ms when the span ended. */
+  readonly endMs: number
+  /** Wall-clock duration in milliseconds (`endMs - startMs`). */
+  readonly durationMs: number
+  /** Agent name associated with this span. */
+  readonly agent: string
+  /** Task ID associated with this span. */
+  readonly taskId?: string
+}
+
+/** Emitted for each LLM API call (one per agent turn). */
+export interface LLMCallTrace extends TraceEventBase {
+  readonly type: 'llm_call'
+  readonly model: string
+  readonly turn: number
+  readonly tokens: TokenUsage
+}
+
+/** Emitted for each tool execution. */
+export interface ToolCallTrace extends TraceEventBase {
+  readonly type: 'tool_call'
+  readonly tool: string
+  readonly isError: boolean
+}
+
+/** Emitted when a task completes (wraps the full retry sequence). */
+export interface TaskTrace extends TraceEventBase {
+  readonly type: 'task'
+  readonly taskId: string
+  readonly taskTitle: string
+  readonly success: boolean
+  readonly retries: number
+}
+
+/** Emitted when an agent run completes (wraps the full conversation loop). */
+export interface AgentTrace extends TraceEventBase {
+  readonly type: 'agent'
+  readonly turns: number
+  readonly tokens: TokenUsage
+  readonly toolCalls: number
+}
+
+/** Discriminated union of all trace event types. */
+export type TraceEvent = LLMCallTrace | ToolCallTrace | TaskTrace | AgentTrace
+
 // ---------------------------------------------------------------------------
 // Memory
 // ---------------------------------------------------------------------------
--- a/src/utils/trace.ts
+++ b/src/utils/trace.ts
@ -0,0 +1,34 @@
+/**
+ * @fileoverview Trace emission utilities for the observability layer.
+ */
+
+import { randomUUID } from 'node:crypto'
+import type { TraceEvent } from '../types.js'
+
+/**
+ * Safely emit a trace event. Swallows callback errors so a broken
+ * subscriber never crashes agent execution.
+ */
+export function emitTrace(
+  fn: ((event: TraceEvent) => void | Promise<void>) | undefined,
+  event: TraceEvent,
+): void {
+  if (!fn) return
+  try {
+    // Guard async callbacks: if fn returns a Promise, swallow its rejection
+    // so an async onTrace never produces an unhandled promise rejection.
+    const result = fn(event) as unknown
+    if (result && typeof (result as Promise<unknown>).catch === 'function') {
+      ;(result as Promise<unknown>).catch(noop)
+    }
+  } catch {
+    // Intentionally swallowed — observability must never break execution.
+  }
+}
+
+function noop() {}
+
+/** Generate a unique run ID for trace correlation. */
+export function generateRunId(): string {
+  return randomUUID()
+}
--- a/tests/semaphore.test.ts
+++ b/tests/semaphore.test.ts
@ -0,0 +1,57 @@
+import { describe, it, expect } from 'vitest'
+import { Semaphore } from '../src/utils/semaphore.js'
+
+describe('Semaphore', () => {
+  it('throws on max < 1', () => {
+    expect(() => new Semaphore(0)).toThrow()
+  })
+
+  it('allows up to max concurrent holders', async () => {
+    const sem = new Semaphore(2)
+    let running = 0
+    let peak = 0
+
+    const work = async () => {
+      await sem.acquire()
+      running++
+      peak = Math.max(peak, running)
+      await new Promise((r) => setTimeout(r, 30))
+      running--
+      sem.release()
+    }
+
+    await Promise.all([work(), work(), work(), work()])
+    expect(peak).toBeLessThanOrEqual(2)
+  })
+
+  it('run() auto-releases on success', async () => {
+    const sem = new Semaphore(1)
+    const result = await sem.run(async () => 42)
+    expect(result).toBe(42)
+    expect(sem.active).toBe(0)
+  })
+
+  it('run() auto-releases on error', async () => {
+    const sem = new Semaphore(1)
+    await expect(sem.run(async () => { throw new Error('oops') })).rejects.toThrow('oops')
+    expect(sem.active).toBe(0)
+  })
+
+  it('tracks active and pending counts', async () => {
+    const sem = new Semaphore(1)
+    await sem.acquire()
+    expect(sem.active).toBe(1)
+
+    // This will queue
+    const p = sem.acquire()
+    expect(sem.pending).toBe(1)
+
+    sem.release()
+    await p
+    expect(sem.active).toBe(1)
+    expect(sem.pending).toBe(0)
+
+    sem.release()
+    expect(sem.active).toBe(0)
+  })
+})
--- a/tests/shared-memory.test.ts
+++ b/tests/shared-memory.test.ts
@ -0,0 +1,122 @@
+import { describe, it, expect } from 'vitest'
+import { SharedMemory } from '../src/memory/shared.js'
+
+describe('SharedMemory', () => {
+  // -------------------------------------------------------------------------
+  // Write & read
+  // -------------------------------------------------------------------------
+
+  it('writes and reads a value under a namespaced key', async () => {
+    const mem = new SharedMemory()
+    await mem.write('researcher', 'findings', 'TS 5.5 ships const type params')
+
+    const entry = await mem.read('researcher/findings')
+    expect(entry).not.toBeNull()
+    expect(entry!.value).toBe('TS 5.5 ships const type params')
+  })
+
+  it('returns null for a non-existent key', async () => {
+    const mem = new SharedMemory()
+    expect(await mem.read('nope/nothing')).toBeNull()
+  })
+
+  // -------------------------------------------------------------------------
+  // Namespace isolation
+  // -------------------------------------------------------------------------
+
+  it('isolates writes between agents', async () => {
+    const mem = new SharedMemory()
+    await mem.write('alice', 'plan', 'plan A')
+    await mem.write('bob', 'plan', 'plan B')
+
+    const alice = await mem.read('alice/plan')
+    const bob = await mem.read('bob/plan')
+    expect(alice!.value).toBe('plan A')
+    expect(bob!.value).toBe('plan B')
+  })
+
+  it('listByAgent returns only that agent\'s entries', async () => {
+    const mem = new SharedMemory()
+    await mem.write('alice', 'a1', 'v1')
+    await mem.write('alice', 'a2', 'v2')
+    await mem.write('bob', 'b1', 'v3')
+
+    const aliceEntries = await mem.listByAgent('alice')
+    expect(aliceEntries).toHaveLength(2)
+    expect(aliceEntries.every((e) => e.key.startsWith('alice/'))).toBe(true)
+  })
+
+  // -------------------------------------------------------------------------
+  // Overwrite
+  // -------------------------------------------------------------------------
+
+  it('overwrites a value and preserves createdAt', async () => {
+    const mem = new SharedMemory()
+    await mem.write('agent', 'key', 'first')
+    const first = await mem.read('agent/key')
+
+    await mem.write('agent', 'key', 'second')
+    const second = await mem.read('agent/key')
+
+    expect(second!.value).toBe('second')
+    expect(second!.createdAt.getTime()).toBe(first!.createdAt.getTime())
+  })
+
+  // -------------------------------------------------------------------------
+  // Metadata
+  // -------------------------------------------------------------------------
+
+  it('stores metadata alongside the value', async () => {
+    const mem = new SharedMemory()
+    await mem.write('agent', 'key', 'val', { priority: 'high' })
+
+    const entry = await mem.read('agent/key')
+    expect(entry!.metadata).toMatchObject({ priority: 'high', agent: 'agent' })
+  })
+
+  // -------------------------------------------------------------------------
+  // Summary
+  // -------------------------------------------------------------------------
+
+  it('returns empty string for an empty store', async () => {
+    const mem = new SharedMemory()
+    expect(await mem.getSummary()).toBe('')
+  })
+
+  it('produces a markdown summary grouped by agent', async () => {
+    const mem = new SharedMemory()
+    await mem.write('researcher', 'findings', 'result A')
+    await mem.write('coder', 'plan', 'implement X')
+
+    const summary = await mem.getSummary()
+    expect(summary).toContain('## Shared Team Memory')
+    expect(summary).toContain('### researcher')
+    expect(summary).toContain('### coder')
+    expect(summary).toContain('findings: result A')
+    expect(summary).toContain('plan: implement X')
+  })
+
+  it('truncates long values in the summary', async () => {
+    const mem = new SharedMemory()
+    const longValue = 'x'.repeat(300)
+    await mem.write('agent', 'big', longValue)
+
+    const summary = await mem.getSummary()
+    // Summary truncates at 200 chars → 197 + '…'
+    expect(summary.length).toBeLessThan(longValue.length)
+    expect(summary).toContain('…')
+  })
+
+  // -------------------------------------------------------------------------
+  // listAll
+  // -------------------------------------------------------------------------
+
+  it('listAll returns entries from all agents', async () => {
+    const mem = new SharedMemory()
+    await mem.write('a', 'k1', 'v1')
+    await mem.write('b', 'k2', 'v2')
+
+    const all = await mem.listAll()
+    expect(all).toHaveLength(2)
+  })
+})
--- a/tests/structured-output.test.ts
+++ b/tests/structured-output.test.ts
@ -0,0 +1,331 @@
+import { describe, it, expect } from 'vitest'
+import { z } from 'zod'
+import {
+  buildStructuredOutputInstruction,
+  extractJSON,
+  validateOutput,
+} from '../src/agent/structured-output.js'
+import { Agent } from '../src/agent/agent.js'
+import { AgentRunner } from '../src/agent/runner.js'
+import { ToolRegistry } from '../src/tool/framework.js'
+import { ToolExecutor } from '../src/tool/executor.js'
+import type { AgentConfig, LLMAdapter, LLMResponse } from '../src/types.js'
+
+// ---------------------------------------------------------------------------
+// Mock LLM adapter factory
+// ---------------------------------------------------------------------------
+
+function mockAdapter(responses: string[]): LLMAdapter {
+  let callIndex = 0
+  return {
+    name: 'mock',
+    async chat() {
+      const text = responses[callIndex++] ?? ''
+      return {
+        id: `mock-${callIndex}`,
+        content: [{ type: 'text' as const, text }],
+        model: 'mock-model',
+        stop_reason: 'end_turn',
+        usage: { input_tokens: 10, output_tokens: 20 },
+      } satisfies LLMResponse
+    },
+    async *stream() {
+      /* unused in these tests */
+    },
+  }
+}
+
+// ---------------------------------------------------------------------------
+// extractJSON
+// ---------------------------------------------------------------------------
+
+describe('extractJSON', () => {
+  it('parses clean JSON', () => {
+    expect(extractJSON('{"a":1}')).toEqual({ a: 1 })
+  })
+
+  it('parses JSON wrapped in ```json fence', () => {
+    const raw = 'Here is the result:\n```json\n{"a":1}\n```\nDone.'
+    expect(extractJSON(raw)).toEqual({ a: 1 })
+  })
+
+  it('parses JSON wrapped in bare ``` fence', () => {
+    const raw = '```\n{"a":1}\n```'
+    expect(extractJSON(raw)).toEqual({ a: 1 })
+  })
+
+  it('extracts embedded JSON object from surrounding text', () => {
+    const raw = 'The answer is {"summary":"hello","score":5} as shown above.'
+    expect(extractJSON(raw)).toEqual({ summary: 'hello', score: 5 })
+  })
+
+  it('extracts JSON array', () => {
+    expect(extractJSON('[1,2,3]')).toEqual([1, 2, 3])
+  })
+
+  it('extracts embedded JSON array from surrounding text', () => {
+    const raw = 'Here: [{"a":1},{"a":2}] end'
+    expect(extractJSON(raw)).toEqual([{ a: 1 }, { a: 2 }])
+  })
+
+  it('throws on non-JSON text', () => {
+    expect(() => extractJSON('just plain text')).toThrow('Failed to extract JSON')
+  })
+
+  it('throws on empty string', () => {
+    expect(() => extractJSON('')).toThrow('Failed to extract JSON')
+  })
+})
+
+// ---------------------------------------------------------------------------
+// validateOutput
+// ---------------------------------------------------------------------------
+
+describe('validateOutput', () => {
+  const schema = z.object({
+    summary: z.string(),
+    score: z.number().min(0).max(10),
+  })
+
+  it('returns validated data on success', () => {
+    const data = { summary: 'hello', score: 5 }
+    expect(validateOutput(schema, data)).toEqual(data)
+  })
+
+  it('throws on missing field', () => {
+    expect(() => validateOutput(schema, { summary: 'hello' })).toThrow(
+      'Output validation failed',
+    )
+  })
+
+  it('throws on wrong type', () => {
+    expect(() =>
+      validateOutput(schema, { summary: 'hello', score: 'not a number' }),
+    ).toThrow('Output validation failed')
+  })
+
+  it('throws on value out of range', () => {
+    expect(() =>
+      validateOutput(schema, { summary: 'hello', score: 99 }),
+    ).toThrow('Output validation failed')
+  })
+
+  it('applies Zod transforms', () => {
+    const transformSchema = z.object({
+      name: z.string().transform(s => s.toUpperCase()),
+    })
+    const result = validateOutput(transformSchema, { name: 'alice' })
+    expect(result).toEqual({ name: 'ALICE' })
+  })
+
+  it('strips unknown keys with strict schema', () => {
+    const strictSchema = z.object({ a: z.number() }).strict()
+    expect(() =>
+      validateOutput(strictSchema, { a: 1, b: 2 }),
+    ).toThrow('Output validation failed')
+  })
+
+  it('shows (root) for root-level errors', () => {
+    const stringSchema = z.string()
+    expect(() => validateOutput(stringSchema, 42)).toThrow('(root)')
+  })
+})
+
+// ---------------------------------------------------------------------------
+// buildStructuredOutputInstruction
+// ---------------------------------------------------------------------------
+
+describe('buildStructuredOutputInstruction', () => {
+  it('includes the JSON Schema representation', () => {
+    const schema = z.object({
+      summary: z.string(),
+      score: z.number(),
+    })
+    const instruction = buildStructuredOutputInstruction(schema)
+
+    expect(instruction).toContain('Output Format (REQUIRED)')
+    expect(instruction).toContain('"type": "object"')
+    expect(instruction).toContain('"summary"')
+    expect(instruction).toContain('"score"')
+    expect(instruction).toContain('ONLY valid JSON')
+  })
+
+  it('includes description from Zod schema', () => {
+    const schema = z.object({
+      name: z.string().describe('The person name'),
+    })
+    const instruction = buildStructuredOutputInstruction(schema)
+    expect(instruction).toContain('The person name')
+  })
+})
+
+// ---------------------------------------------------------------------------
+// Agent integration (mocked LLM)
+// ---------------------------------------------------------------------------
+
+/**
+ * Build an Agent with a mocked LLM adapter by injecting an AgentRunner
+ * directly into the Agent's private `runner` field, bypassing `createAdapter`.
+ */
+function buildMockAgent(config: AgentConfig, responses: string[]): Agent {
+  const adapter = mockAdapter(responses)
+  const registry = new ToolRegistry()
+  const executor = new ToolExecutor(registry)
+  const agent = new Agent(config, registry, executor)
+
+  // Inject a pre-built runner so `getRunner()` returns it without calling createAdapter.
+  const runner = new AgentRunner(adapter, registry, executor, {
+    model: config.model,
+    systemPrompt: config.systemPrompt,
+    maxTurns: config.maxTurns,
+    maxTokens: config.maxTokens,
+    temperature: config.temperature,
+    agentName: config.name,
+  })
+  ;(agent as any).runner = runner
+
+  return agent
+}
+
+describe('Agent structured output (end-to-end)', () => {
+  const schema = z.object({
+    summary: z.string(),
+    sentiment: z.enum(['positive', 'negative', 'neutral']),
+    confidence: z.number().min(0).max(1),
+  })
+
+  const baseConfig: AgentConfig = {
+    name: 'test-agent',
+    model: 'mock-model',
+    systemPrompt: 'You are a test agent.',
+    outputSchema: schema,
+  }
+
+  it('happy path: valid JSON on first attempt', async () => {
+    const validJSON = JSON.stringify({
+      summary: 'Great product',
+      sentiment: 'positive',
+      confidence: 0.95,
+    })
+
+    const agent = buildMockAgent(baseConfig, [validJSON])
+    const result = await agent.run('Analyze this review')
+
+    expect(result.success).toBe(true)
+    expect(result.structured).toEqual({
+      summary: 'Great product',
+      sentiment: 'positive',
+      confidence: 0.95,
+    })
+  })
+
+  it('retry: invalid first attempt, valid second attempt', async () => {
+    const invalidJSON = JSON.stringify({
+      summary: 'Great product',
+      sentiment: 'INVALID_VALUE',
+      confidence: 0.95,
+    })
+    const validJSON = JSON.stringify({
+      summary: 'Great product',
+      sentiment: 'positive',
+      confidence: 0.95,
+    })
+
+    const agent = buildMockAgent(baseConfig, [invalidJSON, validJSON])
+    const result = await agent.run('Analyze this review')
+
+    expect(result.success).toBe(true)
+    expect(result.structured).toEqual({
+      summary: 'Great product',
+      sentiment: 'positive',
+      confidence: 0.95,
+    })
+    // Token usage should reflect both attempts
+    expect(result.tokenUsage.input_tokens).toBe(20) // 10 + 10
+    expect(result.tokenUsage.output_tokens).toBe(40) // 20 + 20
+  })
+
+  it('both attempts fail: success=false, structured=undefined', async () => {
+    const bad1 = '{"summary": "ok", "sentiment": "WRONG"}'
+    const bad2 = '{"summary": "ok", "sentiment": "ALSO_WRONG"}'
+
+    const agent = buildMockAgent(baseConfig, [bad1, bad2])
+    const result = await agent.run('Analyze this review')
+
+    expect(result.success).toBe(false)
+    expect(result.structured).toBeUndefined()
+  })
+
+  it('no outputSchema: original behavior, structured is undefined', async () => {
+    const configNoSchema: AgentConfig = {
+      name: 'plain-agent',
+      model: 'mock-model',
+      systemPrompt: 'You are a test agent.',
+    }
+
+    const agent = buildMockAgent(configNoSchema, ['Just plain text output'])
+    const result = await agent.run('Hello')
+
+    expect(result.success).toBe(true)
+    expect(result.output).toBe('Just plain text output')
+    expect(result.structured).toBeUndefined()
+  })
+
+  it('handles JSON wrapped in markdown fence', async () => {
+    const fenced = '```json\n{"summary":"ok","sentiment":"neutral","confidence":0.5}\n```'
+
+    const agent = buildMockAgent(baseConfig, [fenced])
+    const result = await agent.run('Analyze')
+
+    expect(result.success).toBe(true)
+    expect(result.structured).toEqual({
+      summary: 'ok',
+      sentiment: 'neutral',
+      confidence: 0.5,
+    })
+  })
+
+  it('non-JSON output triggers retry, valid JSON on retry succeeds', async () => {
+    const nonJSON = 'I am not sure how to analyze this.'
+    const validJSON = JSON.stringify({
+      summary: 'Uncertain',
+      sentiment: 'neutral',
+      confidence: 0.1,
+    })
+
+    const agent = buildMockAgent(baseConfig, [nonJSON, validJSON])
+    const result = await agent.run('Analyze this review')
+
+    expect(result.success).toBe(true)
+    expect(result.structured).toEqual({
+      summary: 'Uncertain',
+      sentiment: 'neutral',
+      confidence: 0.1,
+    })
+  })
+
+  it('non-JSON output on both attempts: success=false', async () => {
+    const agent = buildMockAgent(baseConfig, [
+      'Sorry, I cannot do that.',
+      'Still cannot do it.',
+    ])
+    const result = await agent.run('Analyze this review')
+
+    expect(result.success).toBe(false)
+    expect(result.structured).toBeUndefined()
+  })
+
+  it('token usage on first-attempt success reflects single call only', async () => {
+    const validJSON = JSON.stringify({
+      summary: 'Good',
+      sentiment: 'positive',
+      confidence: 0.9,
+    })
+
+    const agent = buildMockAgent(baseConfig, [validJSON])
+    const result = await agent.run('Analyze')
+
+    expect(result.tokenUsage.input_tokens).toBe(10)
+    expect(result.tokenUsage.output_tokens).toBe(20)
+  })
+})
--- a/tests/task-queue.test.ts
+++ b/tests/task-queue.test.ts
@ -0,0 +1,244 @@
+import { describe, it, expect, vi } from 'vitest'
+import { TaskQueue } from '../src/task/queue.js'
+import { createTask } from '../src/task/task.js'
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/** Create a simple task with a predictable id. */
+function task(id: string, opts: { dependsOn?: string[]; assignee?: string } = {}) {
+  const t = createTask({ title: id, description: `task ${id}`, assignee: opts.assignee })
+  // Override the random UUID so tests can reference tasks by name.
+  return { ...t, id, dependsOn: opts.dependsOn } as ReturnType<typeof createTask>
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe('TaskQueue', () => {
+  // -------------------------------------------------------------------------
+  // Basic add & query
+  // -------------------------------------------------------------------------
+
+  it('adds a task and lists it', () => {
+    const q = new TaskQueue()
+    q.add(task('a'))
+    expect(q.list()).toHaveLength(1)
+    expect(q.list()[0].id).toBe('a')
+  })
+
+  it('fires task:ready for a task with no dependencies', () => {
+    const q = new TaskQueue()
+    const handler = vi.fn()
+    q.on('task:ready', handler)
+
+    q.add(task('a'))
+    expect(handler).toHaveBeenCalledTimes(1)
+    expect(handler.mock.calls[0][0].id).toBe('a')
+  })
+
+  it('blocks a task whose dependency is not yet completed', () => {
+    const q = new TaskQueue()
+    q.add(task('a'))
+    q.add(task('b', { dependsOn: ['a'] }))
+
+    const b = q.list().find((t) => t.id === 'b')!
+    expect(b.status).toBe('blocked')
+  })
+
+  // -------------------------------------------------------------------------
+  // Dependency resolution
+  // -------------------------------------------------------------------------
+
+  it('unblocks a dependent task when its dependency completes', () => {
+    const q = new TaskQueue()
+    const readyHandler = vi.fn()
+    q.on('task:ready', readyHandler)
+
+    q.add(task('a'))
+    q.add(task('b', { dependsOn: ['a'] }))
+
+    // 'a' fires task:ready, 'b' is blocked
+    expect(readyHandler).toHaveBeenCalledTimes(1)
+
+    q.complete('a', 'done')
+
+    // 'b' should now be unblocked → fires task:ready
+    expect(readyHandler).toHaveBeenCalledTimes(2)
+    expect(readyHandler.mock.calls[1][0].id).toBe('b')
+    expect(q.list().find((t) => t.id === 'b')!.status).toBe('pending')
+  })
+
+  it('keeps a task blocked until ALL dependencies complete', () => {
+    const q = new TaskQueue()
+    q.add(task('a'))
+    q.add(task('b'))
+    q.add(task('c', { dependsOn: ['a', 'b'] }))
+
+    q.complete('a')
+
+    const cAfterA = q.list().find((t) => t.id === 'c')!
+    expect(cAfterA.status).toBe('blocked')
+
+    q.complete('b')
+
+    const cAfterB = q.list().find((t) => t.id === 'c')!
+    expect(cAfterB.status).toBe('pending')
+  })
+
+  // -------------------------------------------------------------------------
+  // Cascade failure
+  // -------------------------------------------------------------------------
+
+  it('cascades failure to direct dependents', () => {
+    const q = new TaskQueue()
+    const failHandler = vi.fn()
+    q.on('task:failed', failHandler)
+
+    q.add(task('a'))
+    q.add(task('b', { dependsOn: ['a'] }))
+
+    q.fail('a', 'boom')
+
+    expect(failHandler).toHaveBeenCalledTimes(2) // a + b
+    expect(q.list().find((t) => t.id === 'b')!.status).toBe('failed')
+    expect(q.list().find((t) => t.id === 'b')!.result).toContain('dependency')
+  })
+
+  it('cascades failure transitively (a → b → c)', () => {
+    const q = new TaskQueue()
+    q.add(task('a'))
+    q.add(task('b', { dependsOn: ['a'] }))
+    q.add(task('c', { dependsOn: ['b'] }))
+
+    q.fail('a', 'boom')
+
+    expect(q.list().every((t) => t.status === 'failed')).toBe(true)
+  })
+
+  it('does not cascade failure to independent tasks', () => {
+    const q = new TaskQueue()
+    q.add(task('a'))
+    q.add(task('b'))
+    q.add(task('c', { dependsOn: ['a'] }))
+
+    q.fail('a', 'boom')
+
+    expect(q.list().find((t) => t.id === 'b')!.status).toBe('pending')
+    expect(q.list().find((t) => t.id === 'c')!.status).toBe('failed')
+  })
+
+  // -------------------------------------------------------------------------
+  // Completion
+  // -------------------------------------------------------------------------
+
+  it('fires all:complete when every task reaches a terminal state', () => {
+    const q = new TaskQueue()
+    const allComplete = vi.fn()
+    q.on('all:complete', allComplete)
+
+    q.add(task('a'))
+    q.add(task('b'))
+
+    q.complete('a')
+    expect(allComplete).not.toHaveBeenCalled()
+
+    q.complete('b')
+    expect(allComplete).toHaveBeenCalledTimes(1)
+  })
+
+  it('fires all:complete when mix of completed and failed', () => {
+    const q = new TaskQueue()
+    const allComplete = vi.fn()
+    q.on('all:complete', allComplete)
+
+    q.add(task('a'))
+    q.add(task('b', { dependsOn: ['a'] }))
+
+    q.fail('a', 'err') // cascades to b
+    expect(allComplete).toHaveBeenCalledTimes(1)
+  })
+
+  it('isComplete returns true for an empty queue', () => {
+    const q = new TaskQueue()
+    expect(q.isComplete()).toBe(true)
+  })
+
+  // -------------------------------------------------------------------------
+  // Query: next / nextAvailable
+  // -------------------------------------------------------------------------
+
+  it('next() returns a pending task for the given assignee', () => {
+    const q = new TaskQueue()
+    q.add(task('a', { assignee: 'alice' }))
+    q.add(task('b', { assignee: 'bob' }))
+
+    expect(q.next('bob')?.id).toBe('b')
+  })
+
+  it('next() returns undefined when no pending task matches', () => {
+    const q = new TaskQueue()
+    q.add(task('a', { assignee: 'alice' }))
+    expect(q.next('bob')).toBeUndefined()
+  })
+
+  it('nextAvailable() prefers unassigned tasks', () => {
+    const q = new TaskQueue()
+    q.add(task('assigned', { assignee: 'alice' }))
+    q.add(task('unassigned'))
+
+    expect(q.nextAvailable()?.id).toBe('unassigned')
+  })
+
+  // -------------------------------------------------------------------------
+  // Progress
+  // -------------------------------------------------------------------------
+
+  it('getProgress() returns correct counts', () => {
+    const q = new TaskQueue()
+    q.add(task('a'))
+    q.add(task('b'))
+    q.add(task('c', { dependsOn: ['a'] }))
+
+    q.complete('a')
+
+    const p = q.getProgress()
+    expect(p.total).toBe(3)
+    expect(p.completed).toBe(1)
+    expect(p.pending).toBe(2) // b + c (unblocked)
+    expect(p.blocked).toBe(0)
+  })
+
+  // -------------------------------------------------------------------------
+  // Event unsubscribe
+  // -------------------------------------------------------------------------
+
+  it('unsubscribe stops receiving events', () => {
+    const q = new TaskQueue()
+    const handler = vi.fn()
+    const off = q.on('task:ready', handler)
+
+    q.add(task('a'))
+    expect(handler).toHaveBeenCalledTimes(1)
+
+    off()
+    q.add(task('b'))
+    expect(handler).toHaveBeenCalledTimes(1) // no new call
+  })
+
+  // -------------------------------------------------------------------------
+  // Error cases
+  // -------------------------------------------------------------------------
+
+  it('throws when completing a non-existent task', () => {
+    const q = new TaskQueue()
+    expect(() => q.complete('ghost')).toThrow('not found')
+  })
+
+  it('throws when failing a non-existent task', () => {
+    const q = new TaskQueue()
+    expect(() => q.fail('ghost', 'err')).toThrow('not found')
+  })
+})
--- a/tests/task-retry.test.ts
+++ b/tests/task-retry.test.ts
@ -0,0 +1,368 @@
+import { describe, it, expect, vi } from 'vitest'
+import { createTask } from '../src/task/task.js'
+import { executeWithRetry, computeRetryDelay } from '../src/orchestrator/orchestrator.js'
+import type { AgentRunResult } from '../src/types.js'
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+const SUCCESS_RESULT: AgentRunResult = {
+  success: true,
+  output: 'done',
+  messages: [],
+  tokenUsage: { input_tokens: 10, output_tokens: 20 },
+  toolCalls: [],
+}
+
+const FAILURE_RESULT: AgentRunResult = {
+  success: false,
+  output: 'agent failed',
+  messages: [],
+  tokenUsage: { input_tokens: 10, output_tokens: 20 },
+  toolCalls: [],
+}
+
+/** No-op delay for tests. */
+const noDelay = () => Promise.resolve()
+
+// ---------------------------------------------------------------------------
+// computeRetryDelay
+// ---------------------------------------------------------------------------
+
+describe('computeRetryDelay', () => {
+  it('computes exponential backoff', () => {
+    expect(computeRetryDelay(1000, 2, 1)).toBe(1000)  // 1000 * 2^0
+    expect(computeRetryDelay(1000, 2, 2)).toBe(2000)  // 1000 * 2^1
+    expect(computeRetryDelay(1000, 2, 3)).toBe(4000)  // 1000 * 2^2
+  })
+
+  it('caps at 30 seconds', () => {
+    // 1000 * 2^20 = 1,048,576,000 — way over cap
+    expect(computeRetryDelay(1000, 2, 21)).toBe(30_000)
+  })
+
+  it('handles backoff of 1 (constant delay)', () => {
+    expect(computeRetryDelay(500, 1, 1)).toBe(500)
+    expect(computeRetryDelay(500, 1, 5)).toBe(500)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// createTask: retry fields
+// ---------------------------------------------------------------------------
+
+describe('createTask with retry fields', () => {
+  it('passes through retry config', () => {
+    const t = createTask({
+      title: 'Retry task',
+      description: 'test',
+      maxRetries: 3,
+      retryDelayMs: 500,
+      retryBackoff: 1.5,
+    })
+    expect(t.maxRetries).toBe(3)
+    expect(t.retryDelayMs).toBe(500)
+    expect(t.retryBackoff).toBe(1.5)
+  })
+
+  it('defaults retry fields to undefined', () => {
+    const t = createTask({ title: 'No retry', description: 'test' })
+    expect(t.maxRetries).toBeUndefined()
+    expect(t.retryDelayMs).toBeUndefined()
+    expect(t.retryBackoff).toBeUndefined()
+  })
+})
+
+// ---------------------------------------------------------------------------
+// executeWithRetry — tests the real exported function
+// ---------------------------------------------------------------------------
+
+describe('executeWithRetry', () => {
+  it('succeeds on first attempt with no retry config', async () => {
+    const run = vi.fn().mockResolvedValue(SUCCESS_RESULT)
+    const task = createTask({ title: 'Simple', description: 'test' })
+
+    const result = await executeWithRetry(run, task, undefined, noDelay)
+
+    expect(result.success).toBe(true)
+    expect(result.output).toBe('done')
+    expect(run).toHaveBeenCalledTimes(1)
+  })
+
+  it('succeeds on first attempt even when maxRetries > 0', async () => {
+    const run = vi.fn().mockResolvedValue(SUCCESS_RESULT)
+    const task = createTask({
+      title: 'Has retries',
+      description: 'test',
+      maxRetries: 3,
+    })
+
+    const result = await executeWithRetry(run, task, undefined, noDelay)
+
+    expect(result.success).toBe(true)
+    expect(run).toHaveBeenCalledTimes(1)
+  })
+
+  it('retries on exception and succeeds on second attempt', async () => {
+    const run = vi.fn()
+      .mockRejectedValueOnce(new Error('transient error'))
+      .mockResolvedValueOnce(SUCCESS_RESULT)
+
+    const task = createTask({
+      title: 'Retry task',
+      description: 'test',
+      maxRetries: 2,
+      retryDelayMs: 100,
+      retryBackoff: 2,
+    })
+
+    const retryEvents: unknown[] = []
+    const result = await executeWithRetry(
+      run,
+      task,
+      (data) => retryEvents.push(data),
+      noDelay,
+    )
+
+    expect(result.success).toBe(true)
+    expect(run).toHaveBeenCalledTimes(2)
+    expect(retryEvents).toHaveLength(1)
+    expect(retryEvents[0]).toEqual({
+      attempt: 1,
+      maxAttempts: 3,
+      error: 'transient error',
+      nextDelayMs: 100,  // 100 * 2^0
+    })
+  })
+
+  it('retries on success:false and succeeds on second attempt', async () => {
+    const run = vi.fn()
+      .mockResolvedValueOnce(FAILURE_RESULT)
+      .mockResolvedValueOnce(SUCCESS_RESULT)
+
+    const task = createTask({
+      title: 'Retry task',
+      description: 'test',
+      maxRetries: 1,
+      retryDelayMs: 50,
+    })
+
+    const result = await executeWithRetry(run, task, undefined, noDelay)
+
+    expect(result.success).toBe(true)
+    expect(run).toHaveBeenCalledTimes(2)
+  })
+
+  it('exhausts all retries on persistent exception', async () => {
+    const run = vi.fn().mockRejectedValue(new Error('persistent error'))
+
+    const task = createTask({
+      title: 'Always fails',
+      description: 'test',
+      maxRetries: 2,
+      retryDelayMs: 10,
+      retryBackoff: 1,
+    })
+
+    const retryEvents: unknown[] = []
+    const result = await executeWithRetry(
+      run,
+      task,
+      (data) => retryEvents.push(data),
+      noDelay,
+    )
+
+    expect(result.success).toBe(false)
+    expect(result.output).toBe('persistent error')
+    expect(run).toHaveBeenCalledTimes(3)  // 1 initial + 2 retries
+    expect(retryEvents).toHaveLength(2)
+  })
+
+  it('exhausts all retries on persistent success:false', async () => {
+    const run = vi.fn().mockResolvedValue(FAILURE_RESULT)
+
+    const task = createTask({
+      title: 'Always fails',
+      description: 'test',
+      maxRetries: 1,
+    })
+
+    const result = await executeWithRetry(run, task, undefined, noDelay)
+
+    expect(result.success).toBe(false)
+    expect(result.output).toBe('agent failed')
+    expect(run).toHaveBeenCalledTimes(2)
+  })
+
+  it('emits correct exponential backoff delays', async () => {
+    const run = vi.fn().mockRejectedValue(new Error('error'))
+
+    const task = createTask({
+      title: 'Backoff test',
+      description: 'test',
+      maxRetries: 3,
+      retryDelayMs: 100,
+      retryBackoff: 2,
+    })
+
+    const retryEvents: Array<{ nextDelayMs: number }> = []
+    await executeWithRetry(
+      run,
+      task,
+      (data) => retryEvents.push(data),
+      noDelay,
+    )
+
+    expect(retryEvents).toHaveLength(3)
+    expect(retryEvents[0]!.nextDelayMs).toBe(100)   // 100 * 2^0
+    expect(retryEvents[1]!.nextDelayMs).toBe(200)   // 100 * 2^1
+    expect(retryEvents[2]!.nextDelayMs).toBe(400)   // 100 * 2^2
+  })
+
+  it('no retry events when maxRetries is 0 (default)', async () => {
+    const run = vi.fn().mockRejectedValue(new Error('fail'))
+    const task = createTask({ title: 'No retry', description: 'test' })
+
+    const retryEvents: unknown[] = []
+    const result = await executeWithRetry(
+      run,
+      task,
+      (data) => retryEvents.push(data),
+      noDelay,
+    )
+
+    expect(result.success).toBe(false)
+    expect(run).toHaveBeenCalledTimes(1)
+    expect(retryEvents).toHaveLength(0)
+  })
+
+  it('calls the delay function with computed delay', async () => {
+    const run = vi.fn()
+      .mockRejectedValueOnce(new Error('error'))
+      .mockResolvedValueOnce(SUCCESS_RESULT)
+
+    const task = createTask({
+      title: 'Delay test',
+      description: 'test',
+      maxRetries: 1,
+      retryDelayMs: 250,
+      retryBackoff: 3,
+    })
+
+    const mockDelay = vi.fn().mockResolvedValue(undefined)
+    await executeWithRetry(run, task, undefined, mockDelay)
+
+    expect(mockDelay).toHaveBeenCalledTimes(1)
+    expect(mockDelay).toHaveBeenCalledWith(250)  // 250 * 3^0
+  })
+
+  it('caps delay at 30 seconds', async () => {
+    const run = vi.fn()
+      .mockRejectedValueOnce(new Error('error'))
+      .mockResolvedValueOnce(SUCCESS_RESULT)
+
+    const task = createTask({
+      title: 'Cap test',
+      description: 'test',
+      maxRetries: 1,
+      retryDelayMs: 50_000,
+      retryBackoff: 2,
+    })
+
+    const mockDelay = vi.fn().mockResolvedValue(undefined)
+    await executeWithRetry(run, task, undefined, mockDelay)
+
+    expect(mockDelay).toHaveBeenCalledWith(30_000)  // capped
+  })
+
+  it('accumulates token usage across retry attempts', async () => {
+    const failResult: AgentRunResult = {
+      ...FAILURE_RESULT,
+      tokenUsage: { input_tokens: 100, output_tokens: 50 },
+    }
+    const successResult: AgentRunResult = {
+      ...SUCCESS_RESULT,
+      tokenUsage: { input_tokens: 200, output_tokens: 80 },
+    }
+
+    const run = vi.fn()
+      .mockResolvedValueOnce(failResult)
+      .mockResolvedValueOnce(failResult)
+      .mockResolvedValueOnce(successResult)
+
+    const task = createTask({
+      title: 'Token test',
+      description: 'test',
+      maxRetries: 2,
+      retryDelayMs: 10,
+    })
+
+    const result = await executeWithRetry(run, task, undefined, noDelay)
+
+    expect(result.success).toBe(true)
+    // 100+100+200 input, 50+50+80 output
+    expect(result.tokenUsage.input_tokens).toBe(400)
+    expect(result.tokenUsage.output_tokens).toBe(180)
+  })
+
+  it('accumulates token usage even when all retries fail', async () => {
+    const failResult: AgentRunResult = {
+      ...FAILURE_RESULT,
+      tokenUsage: { input_tokens: 50, output_tokens: 30 },
+    }
+
+    const run = vi.fn().mockResolvedValue(failResult)
+
+    const task = createTask({
+      title: 'Token fail test',
+      description: 'test',
+      maxRetries: 1,
+    })
+
+    const result = await executeWithRetry(run, task, undefined, noDelay)
+
+    expect(result.success).toBe(false)
+    // 50+50 input, 30+30 output (2 attempts)
+    expect(result.tokenUsage.input_tokens).toBe(100)
+    expect(result.tokenUsage.output_tokens).toBe(60)
+  })
+
+  it('clamps negative maxRetries to 0 (single attempt)', async () => {
+    const run = vi.fn().mockRejectedValue(new Error('fail'))
+
+    const task = createTask({
+      title: 'Negative retry',
+      description: 'test',
+      maxRetries: -5,
+    })
+    // Manually set negative value since createTask doesn't validate
+    ;(task as any).maxRetries = -5
+
+    const result = await executeWithRetry(run, task, undefined, noDelay)
+
+    expect(result.success).toBe(false)
+    expect(run).toHaveBeenCalledTimes(1)  // exactly 1 attempt, no retries
+  })
+
+  it('clamps backoff below 1 to 1 (constant delay)', async () => {
+    const run = vi.fn()
+      .mockRejectedValueOnce(new Error('error'))
+      .mockResolvedValueOnce(SUCCESS_RESULT)
+
+    const task = createTask({
+      title: 'Bad backoff',
+      description: 'test',
+      maxRetries: 1,
+      retryDelayMs: 100,
+      retryBackoff: -2,
+    })
+    ;(task as any).retryBackoff = -2
+
+    const mockDelay = vi.fn().mockResolvedValue(undefined)
+    await executeWithRetry(run, task, undefined, mockDelay)
+
+    // backoff clamped to 1, so delay = 100 * 1^0 = 100
+    expect(mockDelay).toHaveBeenCalledWith(100)
+  })
+})
--- a/tests/task-utils.test.ts
+++ b/tests/task-utils.test.ts
@ -0,0 +1,155 @@
+import { describe, it, expect } from 'vitest'
+import {
+  createTask,
+  isTaskReady,
+  getTaskDependencyOrder,
+  validateTaskDependencies,
+} from '../src/task/task.js'
+import type { Task } from '../src/types.js'
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function task(id: string, opts: { dependsOn?: string[]; status?: Task['status'] } = {}): Task {
+  const t = createTask({ title: id, description: `task ${id}` })
+  return { ...t, id, dependsOn: opts.dependsOn, status: opts.status ?? 'pending' }
+}
+
+// ---------------------------------------------------------------------------
+// createTask
+// ---------------------------------------------------------------------------
+
+describe('createTask', () => {
+  it('creates a task with pending status and timestamps', () => {
+    const t = createTask({ title: 'Test', description: 'A test task' })
+    expect(t.id).toBeDefined()
+    expect(t.status).toBe('pending')
+    expect(t.createdAt).toBeInstanceOf(Date)
+    expect(t.updatedAt).toBeInstanceOf(Date)
+  })
+
+  it('copies dependsOn array (no shared reference)', () => {
+    const deps = ['a']
+    const t = createTask({ title: 'T', description: 'D', dependsOn: deps })
+    deps.push('b')
+    expect(t.dependsOn).toEqual(['a'])
+  })
+})
+
+// ---------------------------------------------------------------------------
+// isTaskReady
+// ---------------------------------------------------------------------------
+
+describe('isTaskReady', () => {
+  it('returns true for a pending task with no dependencies', () => {
+    const t = task('a')
+    expect(isTaskReady(t, [t])).toBe(true)
+  })
+
+  it('returns false for a non-pending task', () => {
+    const t = task('a', { status: 'blocked' })
+    expect(isTaskReady(t, [t])).toBe(false)
+  })
+
+  it('returns true when all dependencies are completed', () => {
+    const dep = task('dep', { status: 'completed' })
+    const t = task('a', { dependsOn: ['dep'] })
+    expect(isTaskReady(t, [dep, t])).toBe(true)
+  })
+
+  it('returns false when a dependency is not yet completed', () => {
+    const dep = task('dep', { status: 'in_progress' })
+    const t = task('a', { dependsOn: ['dep'] })
+    expect(isTaskReady(t, [dep, t])).toBe(false)
+  })
+
+  it('returns false when a dependency is missing from the task set', () => {
+    const t = task('a', { dependsOn: ['ghost'] })
+    expect(isTaskReady(t, [t])).toBe(false)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// getTaskDependencyOrder
+// ---------------------------------------------------------------------------
+
+describe('getTaskDependencyOrder', () => {
+  it('returns empty array for empty input', () => {
+    expect(getTaskDependencyOrder([])).toEqual([])
+  })
+
+  it('returns tasks with no deps first', () => {
+    const a = task('a')
+    const b = task('b', { dependsOn: ['a'] })
+    const ordered = getTaskDependencyOrder([b, a])
+    expect(ordered[0].id).toBe('a')
+    expect(ordered[1].id).toBe('b')
+  })
+
+  it('handles a diamond dependency (a → b,c → d)', () => {
+    const a = task('a')
+    const b = task('b', { dependsOn: ['a'] })
+    const c = task('c', { dependsOn: ['a'] })
+    const d = task('d', { dependsOn: ['b', 'c'] })
+
+    const ordered = getTaskDependencyOrder([d, c, b, a])
+    const ids = ordered.map((t) => t.id)
+
+    // a must come before b and c; b and c must come before d
+    expect(ids.indexOf('a')).toBeLessThan(ids.indexOf('b'))
+    expect(ids.indexOf('a')).toBeLessThan(ids.indexOf('c'))
+    expect(ids.indexOf('b')).toBeLessThan(ids.indexOf('d'))
+    expect(ids.indexOf('c')).toBeLessThan(ids.indexOf('d'))
+  })
+
+  it('returns partial result when a cycle exists', () => {
+    const a = task('a', { dependsOn: ['b'] })
+    const b = task('b', { dependsOn: ['a'] })
+    const ordered = getTaskDependencyOrder([a, b])
+    // Neither can be ordered — result should be empty (or partial)
+    expect(ordered.length).toBeLessThan(2)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// validateTaskDependencies
+// ---------------------------------------------------------------------------
+
+describe('validateTaskDependencies', () => {
+  it('returns valid for tasks with no deps', () => {
+    const result = validateTaskDependencies([task('a'), task('b')])
+    expect(result.valid).toBe(true)
+    expect(result.errors).toHaveLength(0)
+  })
+
+  it('detects self-dependency', () => {
+    const t = task('a', { dependsOn: ['a'] })
+    const result = validateTaskDependencies([t])
+    expect(result.valid).toBe(false)
+    expect(result.errors[0]).toContain('depends on itself')
+  })
+
+  it('detects unknown dependency', () => {
+    const t = task('a', { dependsOn: ['ghost'] })
+    const result = validateTaskDependencies([t])
+    expect(result.valid).toBe(false)
+    expect(result.errors[0]).toContain('unknown dependency')
+  })
+
+  it('detects a cycle (a → b → a)', () => {
+    const a = task('a', { dependsOn: ['b'] })
+    const b = task('b', { dependsOn: ['a'] })
+    const result = validateTaskDependencies([a, b])
+    expect(result.valid).toBe(false)
+    expect(result.errors.some((e) => e.toLowerCase().includes('cyclic'))).toBe(true)
+  })
+
+  it('detects a longer cycle (a → b → c → a)', () => {
+    const a = task('a', { dependsOn: ['c'] })
+    const b = task('b', { dependsOn: ['a'] })
+    const c = task('c', { dependsOn: ['b'] })
+    const result = validateTaskDependencies([a, b, c])
+    expect(result.valid).toBe(false)
+  })
+})
--- a/tests/tool-executor.test.ts
+++ b/tests/tool-executor.test.ts
@ -0,0 +1,193 @@
+import { describe, it, expect, vi } from 'vitest'
+import { z } from 'zod'
+import { ToolRegistry, defineTool } from '../src/tool/framework.js'
+import { ToolExecutor } from '../src/tool/executor.js'
+import type { ToolUseContext } from '../src/types.js'
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+const dummyContext: ToolUseContext = {
+  agent: { name: 'test-agent', role: 'tester', model: 'test-model' },
+}
+
+function echoTool() {
+  return defineTool({
+    name: 'echo',
+    description: 'Echoes the message.',
+    inputSchema: z.object({ message: z.string() }),
+    execute: async ({ message }) => ({ data: message, isError: false }),
+  })
+}
+
+function failTool() {
+  return defineTool({
+    name: 'fail',
+    description: 'Always throws.',
+    inputSchema: z.object({}),
+    execute: async () => {
+      throw new Error('intentional failure')
+    },
+  })
+}
+
+function makeExecutor(...tools: ReturnType<typeof defineTool>[]) {
+  const registry = new ToolRegistry()
+  for (const t of tools) registry.register(t)
+  return { executor: new ToolExecutor(registry), registry }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe('ToolExecutor', () => {
+  // -------------------------------------------------------------------------
+  // Single execution
+  // -------------------------------------------------------------------------
+
+  it('executes a tool and returns its result', async () => {
+    const { executor } = makeExecutor(echoTool())
+    const result = await executor.execute('echo', { message: 'hello' }, dummyContext)
+    expect(result.data).toBe('hello')
+    expect(result.isError).toBeFalsy()
+  })
+
+  it('returns an error result for an unknown tool', async () => {
+    const { executor } = makeExecutor()
+    const result = await executor.execute('ghost', {}, dummyContext)
+    expect(result.isError).toBe(true)
+    expect(result.data).toContain('not registered')
+  })
+
+  it('returns an error result when Zod validation fails', async () => {
+    const { executor } = makeExecutor(echoTool())
+    // 'message' is required but missing
+    const result = await executor.execute('echo', {}, dummyContext)
+    expect(result.isError).toBe(true)
+    expect(result.data).toContain('Invalid input')
+  })
+
+  it('catches tool execution errors and returns them as error results', async () => {
+    const { executor } = makeExecutor(failTool())
+    const result = await executor.execute('fail', {}, dummyContext)
+    expect(result.isError).toBe(true)
+    expect(result.data).toContain('intentional failure')
+  })
+
+  it('returns an error result when aborted before execution', async () => {
+    const { executor } = makeExecutor(echoTool())
+    const controller = new AbortController()
+    controller.abort()
+
+    const result = await executor.execute(
+      'echo',
+      { message: 'hi' },
+      { ...dummyContext, abortSignal: controller.signal },
+    )
+    expect(result.isError).toBe(true)
+    expect(result.data).toContain('aborted')
+  })
+
+  // -------------------------------------------------------------------------
+  // Batch execution
+  // -------------------------------------------------------------------------
+
+  it('executeBatch runs multiple tools and returns a map of results', async () => {
+    const { executor } = makeExecutor(echoTool())
+    const results = await executor.executeBatch(
+      [
+        { id: 'c1', name: 'echo', input: { message: 'a' } },
+        { id: 'c2', name: 'echo', input: { message: 'b' } },
+      ],
+      dummyContext,
+    )
+
+    expect(results.size).toBe(2)
+    expect(results.get('c1')!.data).toBe('a')
+    expect(results.get('c2')!.data).toBe('b')
+  })
+
+  it('executeBatch isolates errors — one failure does not affect others', async () => {
+    const { executor } = makeExecutor(echoTool(), failTool())
+    const results = await executor.executeBatch(
+      [
+        { id: 'ok', name: 'echo', input: { message: 'fine' } },
+        { id: 'bad', name: 'fail', input: {} },
+      ],
+      dummyContext,
+    )
+
+    expect(results.get('ok')!.isError).toBeFalsy()
+    expect(results.get('bad')!.isError).toBe(true)
+  })
+
+  // -------------------------------------------------------------------------
+  // Concurrency control
+  // -------------------------------------------------------------------------
+
+  it('respects maxConcurrency limit', async () => {
+    let peak = 0
+    let running = 0
+
+    const trackTool = defineTool({
+      name: 'track',
+      description: 'Tracks concurrency.',
+      inputSchema: z.object({}),
+      execute: async () => {
+        running++
+        peak = Math.max(peak, running)
+        await new Promise((r) => setTimeout(r, 50))
+        running--
+        return { data: 'ok', isError: false }
+      },
+    })
+
+    const registry = new ToolRegistry()
+    registry.register(trackTool)
+    const executor = new ToolExecutor(registry, { maxConcurrency: 2 })
+
+    await executor.executeBatch(
+      Array.from({ length: 5 }, (_, i) => ({ id: `t${i}`, name: 'track', input: {} })),
+      dummyContext,
+    )
+
+    expect(peak).toBeLessThanOrEqual(2)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// ToolRegistry
+// ---------------------------------------------------------------------------
+
+describe('ToolRegistry', () => {
+  it('registers and retrieves a tool', () => {
+    const registry = new ToolRegistry()
+    registry.register(echoTool())
+    expect(registry.get('echo')).toBeDefined()
+    expect(registry.has('echo')).toBe(true)
+  })
+
+  it('throws on duplicate registration', () => {
+    const registry = new ToolRegistry()
+    registry.register(echoTool())
+    expect(() => registry.register(echoTool())).toThrow('already registered')
+  })
+
+  it('unregister removes the tool', () => {
+    const registry = new ToolRegistry()
+    registry.register(echoTool())
+    registry.unregister('echo')
+    expect(registry.has('echo')).toBe(false)
+  })
+
+  it('toToolDefs produces JSON schema representations', () => {
+    const registry = new ToolRegistry()
+    registry.register(echoTool())
+    const defs = registry.toToolDefs()
+    expect(defs).toHaveLength(1)
+    expect(defs[0].name).toBe('echo')
+    expect(defs[0].inputSchema).toHaveProperty('properties')
+  })
+})
--- a/tests/trace.test.ts
+++ b/tests/trace.test.ts
@ -0,0 +1,453 @@
+import { describe, it, expect, vi } from 'vitest'
+import { z } from 'zod'
+import { Agent } from '../src/agent/agent.js'
+import { AgentRunner, type RunOptions } from '../src/agent/runner.js'
+import { ToolRegistry, defineTool } from '../src/tool/framework.js'
+import { ToolExecutor } from '../src/tool/executor.js'
+import { executeWithRetry } from '../src/orchestrator/orchestrator.js'
+import { emitTrace, generateRunId } from '../src/utils/trace.js'
+import { createTask } from '../src/task/task.js'
+import type {
+  AgentConfig,
+  AgentRunResult,
+  LLMAdapter,
+  LLMResponse,
+  TraceEvent,
+} from '../src/types.js'
+
+// ---------------------------------------------------------------------------
+// Mock adapters
+// ---------------------------------------------------------------------------
+
+function mockAdapter(responses: LLMResponse[]): LLMAdapter {
+  let callIndex = 0
+  return {
+    name: 'mock',
+    async chat() {
+      return responses[callIndex++]!
+    },
+    async *stream() {
+      /* unused */
+    },
+  }
+}
+
+function textResponse(text: string): LLMResponse {
+  return {
+    id: `resp-${Math.random().toString(36).slice(2)}`,
+    content: [{ type: 'text' as const, text }],
+    model: 'mock-model',
+    stop_reason: 'end_turn',
+    usage: { input_tokens: 10, output_tokens: 20 },
+  }
+}
+
+function toolUseResponse(toolName: string, input: Record<string, unknown>): LLMResponse {
+  return {
+    id: `resp-${Math.random().toString(36).slice(2)}`,
+    content: [
+      {
+        type: 'tool_use' as const,
+        id: `tu-${Math.random().toString(36).slice(2)}`,
+        name: toolName,
+        input,
+      },
+    ],
+    model: 'mock-model',
+    stop_reason: 'tool_use',
+    usage: { input_tokens: 15, output_tokens: 25 },
+  }
+}
+
+function buildMockAgent(
+  config: AgentConfig,
+  responses: LLMResponse[],
+  registry?: ToolRegistry,
+  executor?: ToolExecutor,
+): Agent {
+  const reg = registry ?? new ToolRegistry()
+  const exec = executor ?? new ToolExecutor(reg)
+  const adapter = mockAdapter(responses)
+  const agent = new Agent(config, reg, exec)
+
+  const runner = new AgentRunner(adapter, reg, exec, {
+    model: config.model,
+    systemPrompt: config.systemPrompt,
+    maxTurns: config.maxTurns,
+    maxTokens: config.maxTokens,
+    temperature: config.temperature,
+    agentName: config.name,
+  })
+  ;(agent as any).runner = runner
+
+  return agent
+}
+
+// ---------------------------------------------------------------------------
+// emitTrace helper
+// ---------------------------------------------------------------------------
+
+describe('emitTrace', () => {
+  it('does nothing when fn is undefined', () => {
+    // Should not throw
+    emitTrace(undefined, {
+      type: 'agent',
+      runId: 'r1',
+      agent: 'a',
+      turns: 1,
+      tokens: { input_tokens: 0, output_tokens: 0 },
+      toolCalls: 0,
+      startMs: 0,
+      endMs: 0,
+      durationMs: 0,
+    })
+  })
+
+  it('calls fn with the event', () => {
+    const fn = vi.fn()
+    const event: TraceEvent = {
+      type: 'agent',
+      runId: 'r1',
+      agent: 'a',
+      turns: 1,
+      tokens: { input_tokens: 0, output_tokens: 0 },
+      toolCalls: 0,
+      startMs: 0,
+      endMs: 0,
+      durationMs: 0,
+    }
+    emitTrace(fn, event)
+    expect(fn).toHaveBeenCalledWith(event)
+  })
+
+  it('swallows errors thrown by callback', () => {
+    const fn = () => { throw new Error('boom') }
+    expect(() =>
+      emitTrace(fn, {
+        type: 'agent',
+        runId: 'r1',
+        agent: 'a',
+        turns: 1,
+        tokens: { input_tokens: 0, output_tokens: 0 },
+        toolCalls: 0,
+        startMs: 0,
+        endMs: 0,
+        durationMs: 0,
+      }),
+    ).not.toThrow()
+  })
+
+  it('swallows rejected promises from async callbacks', async () => {
+    // An async onTrace that rejects should not produce unhandled rejection
+    const fn = async () => { throw new Error('async boom') }
+    emitTrace(fn as unknown as (event: TraceEvent) => void, {
+      type: 'agent',
+      runId: 'r1',
+      agent: 'a',
+      turns: 1,
+      tokens: { input_tokens: 0, output_tokens: 0 },
+      toolCalls: 0,
+      startMs: 0,
+      endMs: 0,
+      durationMs: 0,
+    })
+    // If the rejection is not caught, vitest will fail with unhandled rejection.
+    // Give the microtask queue a tick to surface any unhandled rejection.
+    await new Promise(resolve => setTimeout(resolve, 10))
+  })
+})
+
+describe('generateRunId', () => {
+  it('returns a UUID string', () => {
+    const id = generateRunId()
+    expect(id).toMatch(/^[0-9a-f-]{36}$/)
+  })
+
+  it('returns unique IDs', () => {
+    const ids = new Set(Array.from({ length: 100 }, generateRunId))
+    expect(ids.size).toBe(100)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// AgentRunner trace events
+// ---------------------------------------------------------------------------
+
+describe('AgentRunner trace events', () => {
+  it('emits llm_call trace for each LLM turn', async () => {
+    const traces: TraceEvent[] = []
+    const registry = new ToolRegistry()
+    const executor = new ToolExecutor(registry)
+    const adapter = mockAdapter([textResponse('Hello!')])
+
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'test-model',
+      agentName: 'test-agent',
+    })
+
+    const runOptions: RunOptions = {
+      onTrace: (e) => traces.push(e),
+      runId: 'run-1',
+      traceAgent: 'test-agent',
+    }
+
+    await runner.run(
+      [{ role: 'user', content: [{ type: 'text', text: 'hi' }] }],
+      runOptions,
+    )
+
+    const llmTraces = traces.filter(t => t.type === 'llm_call')
+    expect(llmTraces).toHaveLength(1)
+
+    const llm = llmTraces[0]!
+    expect(llm.type).toBe('llm_call')
+    expect(llm.runId).toBe('run-1')
+    expect(llm.agent).toBe('test-agent')
+    expect(llm.model).toBe('test-model')
+    expect(llm.turn).toBe(1)
+    expect(llm.tokens).toEqual({ input_tokens: 10, output_tokens: 20 })
+    expect(llm.durationMs).toBeGreaterThanOrEqual(0)
+    expect(llm.startMs).toBeLessThanOrEqual(llm.endMs)
+  })
+
+  it('emits tool_call trace with correct fields', async () => {
+    const traces: TraceEvent[] = []
+    const registry = new ToolRegistry()
+    registry.register(
+      defineTool({
+        name: 'echo',
+        description: 'echoes',
+        inputSchema: z.object({ msg: z.string() }),
+        execute: async ({ msg }) => ({ data: msg }),
+      }),
+    )
+    const executor = new ToolExecutor(registry)
+    const adapter = mockAdapter([
+      toolUseResponse('echo', { msg: 'hello' }),
+      textResponse('Done'),
+    ])
+
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'test-model',
+      agentName: 'tooler',
+    })
+
+    await runner.run(
+      [{ role: 'user', content: [{ type: 'text', text: 'test' }] }],
+      { onTrace: (e) => traces.push(e), runId: 'run-2', traceAgent: 'tooler' },
+    )
+
+    const toolTraces = traces.filter(t => t.type === 'tool_call')
+    expect(toolTraces).toHaveLength(1)
+
+    const tool = toolTraces[0]!
+    expect(tool.type).toBe('tool_call')
+    expect(tool.runId).toBe('run-2')
+    expect(tool.agent).toBe('tooler')
+    expect(tool.tool).toBe('echo')
+    expect(tool.isError).toBe(false)
+    expect(tool.durationMs).toBeGreaterThanOrEqual(0)
+  })
+
+  it('tool_call trace has isError: true on tool failure', async () => {
+    const traces: TraceEvent[] = []
+    const registry = new ToolRegistry()
+    registry.register(
+      defineTool({
+        name: 'boom',
+        description: 'fails',
+        inputSchema: z.object({}),
+        execute: async () => { throw new Error('fail') },
+      }),
+    )
+    const executor = new ToolExecutor(registry)
+    const adapter = mockAdapter([
+      toolUseResponse('boom', {}),
+      textResponse('Handled'),
+    ])
+
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'test-model',
+      agentName: 'err-agent',
+    })
+
+    await runner.run(
+      [{ role: 'user', content: [{ type: 'text', text: 'test' }] }],
+      { onTrace: (e) => traces.push(e), runId: 'run-3', traceAgent: 'err-agent' },
+    )
+
+    const toolTraces = traces.filter(t => t.type === 'tool_call')
+    expect(toolTraces).toHaveLength(1)
+    expect(toolTraces[0]!.isError).toBe(true)
+  })
+
+  it('does not call Date.now for LLM timing when onTrace is absent', async () => {
+    // This test just verifies no errors occur when onTrace is not provided
+    const registry = new ToolRegistry()
+    const executor = new ToolExecutor(registry)
+    const adapter = mockAdapter([textResponse('hi')])
+
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'test-model',
+    })
+
+    const result = await runner.run(
+      [{ role: 'user', content: [{ type: 'text', text: 'test' }] }],
+      {},
+    )
+
+    expect(result.output).toBe('hi')
+  })
+})
+
+// ---------------------------------------------------------------------------
+// Agent-level trace events
+// ---------------------------------------------------------------------------
+
+describe('Agent trace events', () => {
+  it('emits agent trace with turns, tokens, and toolCalls', async () => {
+    const traces: TraceEvent[] = []
+    const config: AgentConfig = {
+      name: 'my-agent',
+      model: 'mock-model',
+      systemPrompt: 'You are a test.',
+    }
+
+    const agent = buildMockAgent(config, [textResponse('Hello world')])
+
+    const runOptions: Partial<RunOptions> = {
+      onTrace: (e) => traces.push(e),
+      runId: 'run-agent-1',
+      traceAgent: 'my-agent',
+    }
+
+    const result = await agent.run('Say hello', runOptions)
+    expect(result.success).toBe(true)
+
+    const agentTraces = traces.filter(t => t.type === 'agent')
+    expect(agentTraces).toHaveLength(1)
+
+    const at = agentTraces[0]!
+    expect(at.type).toBe('agent')
+    expect(at.runId).toBe('run-agent-1')
+    expect(at.agent).toBe('my-agent')
+    expect(at.turns).toBe(1) // one assistant message
+    expect(at.tokens).toEqual({ input_tokens: 10, output_tokens: 20 })
+    expect(at.toolCalls).toBe(0)
+    expect(at.durationMs).toBeGreaterThanOrEqual(0)
+  })
+
+  it('all traces share the same runId', async () => {
+    const traces: TraceEvent[] = []
+    const registry = new ToolRegistry()
+    registry.register(
+      defineTool({
+        name: 'greet',
+        description: 'greets',
+        inputSchema: z.object({ name: z.string() }),
+        execute: async ({ name }) => ({ data: `Hi ${name}` }),
+      }),
+    )
+    const executor = new ToolExecutor(registry)
+    const config: AgentConfig = {
+      name: 'multi-trace-agent',
+      model: 'mock-model',
+      tools: ['greet'],
+    }
+
+    const agent = buildMockAgent(
+      config,
+      [
+        toolUseResponse('greet', { name: 'world' }),
+        textResponse('Done'),
+      ],
+      registry,
+      executor,
+    )
+
+    const runId = 'shared-run-id'
+    await agent.run('test', {
+      onTrace: (e) => traces.push(e),
+      runId,
+      traceAgent: 'multi-trace-agent',
+    })
+
+    // Should have: 2 llm_call, 1 tool_call, 1 agent
+    expect(traces.length).toBeGreaterThanOrEqual(4)
+
+    for (const trace of traces) {
+      expect(trace.runId).toBe(runId)
+    }
+  })
+
+  it('onTrace error does not break agent execution', async () => {
+    const config: AgentConfig = {
+      name: 'resilient-agent',
+      model: 'mock-model',
+    }
+
+    const agent = buildMockAgent(config, [textResponse('OK')])
+
+    const result = await agent.run('test', {
+      onTrace: () => { throw new Error('callback exploded') },
+      runId: 'run-err',
+      traceAgent: 'resilient-agent',
+    })
+
+    // The run should still succeed despite the broken callback
+    expect(result.success).toBe(true)
+    expect(result.output).toBe('OK')
+  })
+
+  it('per-turn token usage in llm_call traces', async () => {
+    const traces: TraceEvent[] = []
+    const registry = new ToolRegistry()
+    registry.register(
+      defineTool({
+        name: 'noop',
+        description: 'noop',
+        inputSchema: z.object({}),
+        execute: async () => ({ data: 'ok' }),
+      }),
+    )
+    const executor = new ToolExecutor(registry)
+
+    // Two LLM calls: first triggers a tool, second is the final response
+    const resp1: LLMResponse = {
+      id: 'r1',
+      content: [{ type: 'tool_use', id: 'tu1', name: 'noop', input: {} }],
+      model: 'mock-model',
+      stop_reason: 'tool_use',
+      usage: { input_tokens: 100, output_tokens: 50 },
+    }
+    const resp2: LLMResponse = {
+      id: 'r2',
+      content: [{ type: 'text', text: 'Final answer' }],
+      model: 'mock-model',
+      stop_reason: 'end_turn',
+      usage: { input_tokens: 200, output_tokens: 100 },
+    }
+
+    const adapter = mockAdapter([resp1, resp2])
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'mock-model',
+      agentName: 'token-agent',
+    })
+
+    await runner.run(
+      [{ role: 'user', content: [{ type: 'text', text: 'go' }] }],
+      { onTrace: (e) => traces.push(e), runId: 'run-tok', traceAgent: 'token-agent' },
+    )
+
+    const llmTraces = traces.filter(t => t.type === 'llm_call')
+    expect(llmTraces).toHaveLength(2)
+
+    // Each trace carries its own turn's token usage, not the aggregate
+    expect(llmTraces[0]!.tokens).toEqual({ input_tokens: 100, output_tokens: 50 })
+    expect(llmTraces[1]!.tokens).toEqual({ input_tokens: 200, output_tokens: 100 })
+
+    // Turn numbers should be sequential
+    expect(llmTraces[0]!.turn).toBe(1)
+    expect(llmTraces[1]!.turn).toBe(2)
+  })
+})