Compare commits
72 Commits
| Author | SHA1 | Date |
|---|---|---|
|
|
b4b38ffdd8 | |
|
|
4714dd1d4c | |
|
|
6de7bbd41f | |
|
|
696269c924 | |
|
|
a6b5181c74 | |
|
|
c3ead26677 | |
|
|
d78af2787f | |
|
|
cd57c2ee31 | |
|
|
b6ee63bda0 | |
|
|
1e154b22fd | |
|
|
0485bfb82f | |
|
|
fa4533e8d0 | |
|
|
5305cb2321 | |
|
|
34b911825a | |
|
|
1cc43eba6e | |
|
|
8ecfc1504c | |
|
|
0170e43c4e | |
|
|
93795db09f | |
|
|
9b487ca368 | |
|
|
38a88df144 | |
|
|
017e0f42f6 | |
|
|
c0ddcfc7aa | |
|
|
cdec60e7ad | |
|
|
dfe46721a5 | |
|
|
0f16e81ae6 | |
|
|
5804a54898 | |
|
|
252419e1f8 | |
|
|
6ea66afab5 | |
|
|
97c5e457dd | |
|
|
9b04fbf2e5 | |
|
|
9a446b8796 | |
|
|
dc88232885 | |
|
|
ced1d90a93 | |
|
|
0fb8a38284 | |
|
|
629d9c8253 | |
|
|
167085c3a7 | |
|
|
12dd802ad8 | |
|
|
1fbed196ca | |
|
|
a220b6ecc5 | |
|
|
89311dc4d4 | |
|
|
06cc415ddf | |
|
|
aa5fab59fa | |
|
|
7aa1bb7b5d | |
|
|
eb484d9bbf | |
|
|
f1c7477a26 | |
|
|
664bed987f | |
|
|
2022882bfb | |
|
|
0b57ffe3e9 | |
|
|
03dc897929 | |
|
|
cb11020c65 | |
|
|
91494bcca9 | |
|
|
faf24aaffa | |
|
|
d8c3808851 | |
|
|
54bfe2ed2d | |
|
|
40f13a09a6 | |
|
|
30369b0597 | |
|
|
dc8cbe0262 | |
|
|
97c39b316c | |
|
|
48fbec6659 | |
|
|
9463dbb28e | |
|
|
cfbbd24601 | |
|
|
0fd18d8a19 | |
|
|
34ca8602d0 | |
|
|
607ba57a69 | |
|
|
5a67d559a3 | |
|
|
a29d87f384 | |
|
|
73b2454c2f | |
|
|
60fb2b142e | |
|
|
1e3bd1013e | |
|
|
336d94e50d | |
|
|
d59898ce3d | |
|
|
c23a20bb6c |
|
|
@ -6,6 +6,17 @@ labels: enhancement
|
|||
assignees: ''
|
||||
---
|
||||
|
||||
## Source
|
||||
|
||||
**Where did this idea come from?** (Pick one — helps maintainers triage and prioritize.)
|
||||
|
||||
- [ ] **Real use case** — I'm using open-multi-agent and hit this limit. Describe the use case in "Problem" below.
|
||||
- [ ] **Competitive reference** — Another framework has this (LangChain, AutoGen, CrewAI, Mastra, XCLI, etc.). Please name or link it.
|
||||
- [ ] **Systematic gap** — A missing piece in the framework matrix (provider not supported, tool not covered, etc.).
|
||||
- [ ] **Discussion / inspiration** — Came up in a tweet, Reddit post, Discord, or AI conversation. Please link or paste the source if possible.
|
||||
|
||||
> **Maintainer note**: after triage, label with one of `community-feedback`, `source:competitive`, `source:analysis`, `source:owner` (multiple OK if the source is mixed — e.g. competitive analysis + user feedback).
|
||||
|
||||
## Problem
|
||||
|
||||
A clear description of the problem or limitation you're experiencing.
|
||||
|
|
|
|||
|
|
@ -3,5 +3,3 @@ dist/
|
|||
coverage/
|
||||
*.tgz
|
||||
.DS_Store
|
||||
promo-*.md
|
||||
non-tech_*/
|
||||
|
|
|
|||
|
|
@ -10,9 +10,10 @@ npm run dev # Watch mode compilation
|
|||
npm run lint # Type-check only (tsc --noEmit)
|
||||
npm test # Run all tests (vitest run)
|
||||
npm run test:watch # Vitest watch mode
|
||||
node dist/cli/oma.js help # After build: shell/CI CLI (`oma` when installed via npm bin)
|
||||
```
|
||||
|
||||
Tests live in `tests/` (vitest). Examples in `examples/` are standalone scripts requiring API keys (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`).
|
||||
Tests live in `tests/` (vitest). Examples in `examples/` are standalone scripts requiring API keys (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`). CLI usage and JSON schemas: `docs/cli.md`.
|
||||
|
||||
## Architecture
|
||||
|
||||
|
|
@ -55,7 +56,7 @@ This is the framework's key feature. When `runTeam()` is called:
|
|||
|
||||
### Concurrency Control
|
||||
|
||||
Two independent semaphores: `AgentPool` (max concurrent agent runs, default 5) and `ToolExecutor` (max concurrent tool calls, default 4).
|
||||
Three semaphore layers: `AgentPool` pool-level (max concurrent agent runs, default 5), `AgentPool` per-agent mutex (serializes concurrent runs on the same `Agent` instance), and `ToolExecutor` (max concurrent tool calls, default 4).
|
||||
|
||||
### Structured Output
|
||||
|
||||
|
|
|
|||
30
DECISIONS.md
30
DECISIONS.md
|
|
@ -1,11 +1,11 @@
|
|||
# Architecture Decisions
|
||||
|
||||
This document records deliberate "won't do" decisions for the project. These are features we evaluated and chose NOT to implement — not because they're bad ideas, but because they conflict with our positioning as the **simplest multi-agent framework**.
|
||||
|
||||
If you're considering a PR in any of these areas, please open a discussion first.
|
||||
This document records our architectural decisions — both what we choose NOT to build, and what we're actively working toward. Our goal is to be the **simplest multi-agent framework**, but simplicity doesn't mean closed. We believe the long-term value of a framework isn't its feature checklist — it's the size of the network it connects to.
|
||||
|
||||
## Won't Do
|
||||
|
||||
These are paradigms we evaluated and deliberately chose not to implement, because they conflict with our core model.
|
||||
|
||||
### 1. Agent Handoffs
|
||||
|
||||
**What**: Agent A transfers an in-progress conversation to Agent B (like OpenAI Agents SDK `handoff()`).
|
||||
|
|
@ -20,24 +20,30 @@ If you're considering a PR in any of these areas, please open a discussion first
|
|||
|
||||
**Related**: Closing #20 with this rationale.
|
||||
|
||||
### 3. A2A Protocol (Agent-to-Agent)
|
||||
## Open to Adoption
|
||||
|
||||
**What**: Google's open protocol for agents on different servers to discover and communicate with each other.
|
||||
These are protocols we see strategic value in and are actively tracking. We're waiting for the right moment — not the right feature spec, but the right network density.
|
||||
|
||||
**Why not**: Too early — the spec is still evolving and adoption is minimal. Our users run agents in a single process, not across distributed services. If A2A matures and there's real demand, we can revisit. Today it would add complexity for zero practical benefit.
|
||||
> **Our thesis**: Framework competition on features (DAG scheduling, shared memory, zero-dependency) is a race that can always be caught. Network competition — where the value of the framework grows with every agent published to it — creates a fundamentally different moat. MCP and A2A are the protocols that turn a framework from a build tool into a registry.
|
||||
|
||||
### 4. MCP Integration (Model Context Protocol)
|
||||
### 3. MCP Integration (Model Context Protocol)
|
||||
|
||||
**What**: Anthropic's protocol for connecting LLMs to external tools and data sources.
|
||||
|
||||
**Why not**: MCP is valuable but targets a different layer. Our `defineTool()` API already lets users wrap any external service as a tool in ~10 lines of code. Adding MCP would mean maintaining protocol compatibility, transport layers, and tool discovery — complexity that serves tool platform builders, not our target users who just want to run agent teams.
|
||||
**Status**: **Next up.** MCP has crossed the adoption threshold — Cursor, Windsurf, Claude Code all ship with built-in support, and many services now provide MCP servers directly. Asking users to re-wrap each one via `defineTool()` creates unnecessary friction.
|
||||
|
||||
### 5. Dashboard / Visualization
|
||||
**Approach**: Optional peer dependency (`@modelcontextprotocol/sdk`). Zero impact on the core — if you don't use MCP, you don't pay for it. This preserves our minimal-dependency principle while connecting to the broader tool ecosystem.
|
||||
|
||||
**What**: Built-in web UI to visualize task DAGs, agent activity, and token usage.
|
||||
**Tracking**: #86
|
||||
|
||||
**Why not**: We expose data, we don't build UI. The `onProgress` callback and upcoming `onTrace` (#18) give users all the raw data. They can pipe it into Grafana, build a custom dashboard, or use console logs. Shipping a web UI means owning a frontend stack, which is outside our scope.
|
||||
### 4. A2A Protocol (Agent-to-Agent)
|
||||
|
||||
**What**: Google's open protocol for agents on different servers to discover and communicate with each other.
|
||||
|
||||
**Status**: **Watching.** The spec is still evolving and production adoption is minimal. But we recognize A2A's potential to enable the network effect we care about — if 1,000 developers publish agent services using open-multi-agent, the 1,001st developer isn't just choosing an API, they're choosing which ecosystem has the most agents they can call.
|
||||
|
||||
**When we'll move**: When A2A adoption reaches a tipping point where the protocol connects real, production agent services — not just demos. We'll prioritize a lightweight integration that lets agents be both consumers and providers of A2A services.
|
||||
|
||||
---
|
||||
|
||||
*Last updated: 2026-04-03*
|
||||
*Last updated: 2026-04-09*
|
||||
|
|
|
|||
243
README.md
243
README.md
|
|
@ -1,29 +1,62 @@
|
|||
# Open Multi-Agent
|
||||
|
||||
TypeScript framework for multi-agent orchestration. One `runTeam()` call from goal to result — the framework decomposes it into tasks, resolves dependencies, and runs agents in parallel.
|
||||
The lightweight multi-agent orchestration engine for TypeScript. Three runtime dependencies, zero config, goal to result in one `runTeam()` call.
|
||||
|
||||
3 runtime dependencies · 33 source files · Deploys anywhere Node.js runs · Mentioned in [Latent Space](https://www.latent.space/p/ainews-a-quiet-april-fools) AI News
|
||||
CrewAI is Python. LangGraph makes you draw the graph by hand. `open-multi-agent` is the `npm install` you drop into an existing Node.js backend when you need a team of agents to work on a goal together. Nothing more, nothing less.
|
||||
|
||||
3 runtime dependencies · 41 source files · Deploys anywhere Node.js runs
|
||||
|
||||
[](https://github.com/JackChen-me/open-multi-agent/stargazers)
|
||||
[](./LICENSE)
|
||||
[](https://www.typescriptlang.org/)
|
||||
[](https://github.com/JackChen-me/open-multi-agent/actions)
|
||||
[](https://github.com/JackChen-me/open-multi-agent/actions)
|
||||
|
||||
**English** | [中文](./README_zh.md)
|
||||
|
||||
## Why Open Multi-Agent?
|
||||
## What you actually get
|
||||
|
||||
- **Goal In, Result Out** — `runTeam(team, "Build a REST API")`. A coordinator agent auto-decomposes the goal into a task DAG with dependencies and assignees, runs independent tasks in parallel, and synthesizes the final output. No manual task definitions or graph wiring required.
|
||||
- **TypeScript-Native** — Built for the Node.js ecosystem. `npm install`, import, run. No Python runtime, no subprocess bridge, no sidecar services. Embed in Express, Next.js, serverless functions, or CI/CD pipelines.
|
||||
- **Auditable and Lightweight** — 3 runtime dependencies (`@anthropic-ai/sdk`, `openai`, `zod`). 33 source files. The entire codebase is readable in an afternoon.
|
||||
- **Model Agnostic** — Claude, GPT, Gemma 4, and local models (Ollama, vLLM, LM Studio, llama.cpp server) in the same team. Swap models per agent via `baseURL`.
|
||||
- **Multi-Agent Collaboration** — Agents with different roles, tools, and models collaborate through a message bus and shared memory.
|
||||
- **Structured Output** — Add `outputSchema` (Zod) to any agent. Output is parsed as JSON, validated, and auto-retried once on failure. Access typed results via `result.structured`.
|
||||
- **Task Retry** — Set `maxRetries` on tasks for automatic retry with exponential backoff. Failed attempts accumulate token usage for accurate billing.
|
||||
- **Human-in-the-Loop** — Optional `onApproval` callback on `runTasks()`. After each batch of tasks completes, your callback decides whether to proceed or abort remaining work.
|
||||
- **Lifecycle Hooks** — `beforeRun` / `afterRun` on `AgentConfig`. Intercept the prompt before execution or post-process results after. Throw from either hook to abort.
|
||||
- **Loop Detection** — `loopDetection` on `AgentConfig` catches stuck agents repeating the same tool calls or text output. Configurable action: warn (default), terminate, or custom callback.
|
||||
- **Observability** — Optional `onTrace` callback emits structured spans for every LLM call, tool execution, task, and agent run — with timing, token usage, and a shared `runId` for correlation. Zero overhead when not subscribed, zero extra dependencies.
|
||||
- **Goal to result in one call.** `runTeam(team, "Build a REST API")` kicks off a coordinator agent that decomposes the goal into a task DAG, resolves dependencies, runs independent tasks in parallel, and synthesizes the final output. No graph to draw, no tasks to wire up.
|
||||
- **TypeScript-native, three runtime dependencies.** `@anthropic-ai/sdk`, `openai`, `zod`. That is the whole runtime. Embed in Express, Next.js, serverless functions, or CI/CD pipelines. No Python runtime, no subprocess bridge, no cloud sidecar.
|
||||
- **Multi-model teams.** Claude, GPT, Gemini, Grok, MiniMax, DeepSeek, Copilot, or any OpenAI-compatible local model (Ollama, vLLM, LM Studio, llama.cpp) in the same team. Run the architect on Opus 4.6, the developer on GPT-5.4, the reviewer on local Gemma 4, all in one `runTeam()` call. Gemini ships as an optional peer dependency: `npm install @google/genai` to enable.
|
||||
|
||||
Other features (MCP integration, context strategies, structured output, task retry, human-in-the-loop, lifecycle hooks, loop detection, observability) live below the fold and in [`examples/`](./examples/).
|
||||
|
||||
## Philosophy: what we build, what we don't
|
||||
|
||||
Our goal is to be the simplest multi-agent framework for TypeScript. Simplicity does not mean closed. We believe the long-term value of a framework is the size of the network it connects to, not its feature checklist.
|
||||
|
||||
**We build:**
|
||||
- A coordinator that decomposes a goal into a task DAG.
|
||||
- A task queue that runs independent tasks in parallel and cascades failures to dependents.
|
||||
- A shared memory and message bus so agents can see each other's output.
|
||||
- Multi-model teams where each agent can use a different LLM provider.
|
||||
|
||||
**We don't build:**
|
||||
- **Agent handoffs.** If agent A needs to transfer mid-conversation to agent B, use [OpenAI Agents SDK](https://github.com/openai/openai-agents-python). In our model, each agent owns one task end-to-end, with no mid-conversation transfers.
|
||||
- **State persistence / checkpointing.** Not planned for now. Adding a storage backend would break the three-dependency promise, and our workflows run in seconds to minutes, not hours. If real usage shifts toward long-running workflows, we will revisit.
|
||||
|
||||
**Tracking:**
|
||||
- **A2A protocol.** Watching, will move when production adoption is real.
|
||||
|
||||
See [`DECISIONS.md`](./DECISIONS.md) for the full rationale.
|
||||
|
||||
## How is this different from X?
|
||||
|
||||
**vs. [LangGraph JS](https://github.com/langchain-ai/langgraphjs).** LangGraph is declarative graph orchestration: you define nodes, edges, and conditional routing, then `compile()` and `invoke()`. `open-multi-agent` is goal-driven: you declare a team and a goal, a coordinator decomposes it into a task DAG at runtime. LangGraph gives you total control of topology (great for fixed production workflows). This gives you less typing and faster iteration (great for exploratory multi-agent work). LangGraph also has mature checkpointing; we do not.
|
||||
|
||||
**vs. [CrewAI](https://github.com/crewAIInc/crewAI).** CrewAI is the mature Python choice. If your stack is Python, use CrewAI. `open-multi-agent` is TypeScript-native: three runtime dependencies, embeds directly in Node.js without a subprocess bridge. Roughly comparable capability on the orchestration side. Choose on language fit.
|
||||
|
||||
**vs. [Vercel AI SDK](https://github.com/vercel/ai).** AI SDK is the LLM call layer: a unified TypeScript client for 60+ providers with streaming, tool calls, and structured outputs. It does not orchestrate multi-agent teams. `open-multi-agent` sits on top when you need that. They compose: use AI SDK for single-agent work, reach for this when you need a team.
|
||||
|
||||
## Used by
|
||||
|
||||
`open-multi-agent` is a new project (launched 2026-04-01, MIT, 5,500+ stars). The ecosystem is still forming, so the list below is short and honest:
|
||||
|
||||
- **[temodar-agent](https://github.com/xeloxa/temodar-agent)** (~50 stars). WordPress security analysis platform by [Ali Sünbül](https://github.com/xeloxa). Uses our built-in tools (`bash`, `file_*`, `grep`) directly in its Docker runtime. Confirmed production use.
|
||||
- **[rentech-quant-platform](https://github.com/rookiecoderasz/rentech-quant-platform).** Multi-agent quant trading research platform. Five pipelines plus MCP integrations, built on top of `open-multi-agent`. Early signal, very new.
|
||||
- **Cybersecurity SOC (home lab).** A private setup running Qwen 2.5 + DeepSeek Coder entirely offline via Ollama, building an autonomous SOC pipeline on Wazuh + Proxmox. Early user, not yet public.
|
||||
|
||||
Using `open-multi-agent` in production or a side project? [Open a discussion](https://github.com/JackChen-me/open-multi-agent/discussions) and we will list it here.
|
||||
|
||||
## Quick Start
|
||||
|
||||
|
|
@ -38,8 +71,14 @@ Set the API key for your provider. Local models via Ollama require no API key
|
|||
- `ANTHROPIC_API_KEY`
|
||||
- `OPENAI_API_KEY`
|
||||
- `GEMINI_API_KEY`
|
||||
- `XAI_API_KEY` (for Grok)
|
||||
- `MINIMAX_API_KEY` (for MiniMax)
|
||||
- `MINIMAX_BASE_URL` (for MiniMax — optional, selects endpoint)
|
||||
- `DEEPSEEK_API_KEY` (for DeepSeek)
|
||||
- `GITHUB_TOKEN` (for Copilot)
|
||||
|
||||
**CLI (`oma`).** For shell and CI, the package exposes a JSON-first binary. See [docs/cli.md](./docs/cli.md) for `oma run`, `oma task`, `oma provider`, exit codes, and file formats.
|
||||
|
||||
Three agents, one goal — the framework handles the rest:
|
||||
|
||||
```typescript
|
||||
|
|
@ -53,19 +92,8 @@ const architect: AgentConfig = {
|
|||
tools: ['file_write'],
|
||||
}
|
||||
|
||||
const developer: AgentConfig = {
|
||||
name: 'developer',
|
||||
model: 'claude-sonnet-4-6',
|
||||
systemPrompt: 'You implement what the architect designs.',
|
||||
tools: ['bash', 'file_read', 'file_write', 'file_edit'],
|
||||
}
|
||||
|
||||
const reviewer: AgentConfig = {
|
||||
name: 'reviewer',
|
||||
model: 'claude-sonnet-4-6',
|
||||
systemPrompt: 'You review code for correctness and clarity.',
|
||||
tools: ['file_read', 'grep'],
|
||||
}
|
||||
const developer: AgentConfig = { /* same shape, tools: ['bash', 'file_read', 'file_write', 'file_edit'] */ }
|
||||
const reviewer: AgentConfig = { /* same shape, tools: ['file_read', 'grep'] */ }
|
||||
|
||||
const orchestrator = new OpenMultiAgent({
|
||||
defaultModel: 'claude-sonnet-4-6',
|
||||
|
|
@ -94,8 +122,8 @@ task_complete architect
|
|||
task_start developer
|
||||
task_start developer // independent tasks run in parallel
|
||||
task_complete developer
|
||||
task_start reviewer // unblocked after implementation
|
||||
task_complete developer
|
||||
task_start reviewer // unblocked after implementation
|
||||
task_complete reviewer
|
||||
agent_complete coordinator // synthesizes final result
|
||||
Success: true
|
||||
|
|
@ -110,29 +138,21 @@ Tokens: 12847 output tokens
|
|||
| Auto-orchestrated team | `runTeam()` | Give a goal, framework plans and executes |
|
||||
| Explicit pipeline | `runTasks()` | You define the task graph and assignments |
|
||||
|
||||
For MapReduce-style fan-out without task dependencies, use `AgentPool.runParallel()` directly. See [example 07](examples/07-fan-out-aggregate.ts).
|
||||
|
||||
## Examples
|
||||
|
||||
All examples are runnable scripts in [`examples/`](./examples/). Run any of them with `npx tsx`:
|
||||
18 runnable scripts and 1 full-stack demo in [`examples/`](./examples/). Start with these:
|
||||
|
||||
```bash
|
||||
npx tsx examples/01-single-agent.ts
|
||||
```
|
||||
- [02 — Team Collaboration](examples/02-team-collaboration.ts): `runTeam()` coordinator pattern.
|
||||
- [06 — Local Model](examples/06-local-model.ts): Ollama and Claude in one pipeline via `baseURL`.
|
||||
- [09 — Structured Output](examples/09-structured-output.ts): any agent returns Zod-validated JSON.
|
||||
- [11 — Trace Observability](examples/11-trace-observability.ts): `onTrace` spans for LLM calls, tools, and tasks.
|
||||
- [17 — MiniMax](examples/17-minimax.ts): three-agent team using MiniMax M2.7.
|
||||
- [18 — DeepSeek](examples/18-deepseek.ts): three-agent team using DeepSeek Chat.
|
||||
- [with-vercel-ai-sdk](examples/with-vercel-ai-sdk/): Next.js app — OMA `runTeam()` + AI SDK `useChat` streaming.
|
||||
|
||||
| Example | What it shows |
|
||||
|---------|---------------|
|
||||
| [01 — Single Agent](examples/01-single-agent.ts) | `runAgent()` one-shot, `stream()` streaming, `prompt()` multi-turn |
|
||||
| [02 — Team Collaboration](examples/02-team-collaboration.ts) | `runTeam()` auto-orchestration with coordinator pattern |
|
||||
| [03 — Task Pipeline](examples/03-task-pipeline.ts) | `runTasks()` explicit dependency graph (design → implement → test + review) |
|
||||
| [04 — Multi-Model Team](examples/04-multi-model-team.ts) | `defineTool()` custom tools, mixed Anthropic + OpenAI providers, `AgentPool` |
|
||||
| [05 — Copilot](examples/05-copilot-test.ts) | GitHub Copilot as an LLM provider |
|
||||
| [06 — Local Model](examples/06-local-model.ts) | Ollama + Claude in one pipeline via `baseURL` (works with vLLM, LM Studio, etc.) |
|
||||
| [07 — Fan-Out / Aggregate](examples/07-fan-out-aggregate.ts) | `runParallel()` MapReduce — 3 analysts in parallel, then synthesize |
|
||||
| [08 — Gemma 4 Local](examples/08-gemma4-local.ts) | `runTasks()` + `runTeam()` with local Gemma 4 via Ollama — zero API cost |
|
||||
| [09 — Structured Output](examples/09-structured-output.ts) | `outputSchema` (Zod) on AgentConfig — validated JSON via `result.structured` |
|
||||
| [10 — Task Retry](examples/10-task-retry.ts) | `maxRetries` / `retryDelayMs` / `retryBackoff` with `task_retry` progress events |
|
||||
| [11 — Trace Observability](examples/11-trace-observability.ts) | `onTrace` callback — structured spans for LLM calls, tools, tasks, and agents |
|
||||
| [12 — Grok](examples/12-grok.ts) | Same as example 02 (`runTeam()` collaboration) with Grok (`XAI_API_KEY`) |
|
||||
| [13 — Gemini](examples/13-gemini.ts) | Gemini adapter smoke test with `gemini-2.5-flash` (`GEMINI_API_KEY`) |
|
||||
Run scripts with `npx tsx examples/02-team-collaboration.ts`.
|
||||
|
||||
## Architecture
|
||||
|
||||
|
|
@ -168,12 +188,14 @@ npx tsx examples/01-single-agent.ts
|
|||
│ │ - CopilotAdapter │
|
||||
│ │ - GeminiAdapter │
|
||||
│ │ - GrokAdapter │
|
||||
│ │ - MiniMaxAdapter │
|
||||
│ │ - DeepSeekAdapter │
|
||||
│ └──────────────────────┘
|
||||
┌────────▼──────────┐
|
||||
│ AgentRunner │ ┌──────────────────────┐
|
||||
│ - conversation │───►│ ToolRegistry │
|
||||
│ loop │ │ - defineTool() │
|
||||
│ - tool dispatch │ │ - 5 built-in tools │
|
||||
│ - tool dispatch │ │ - 6 built-in tools │
|
||||
└───────────────────┘ └──────────────────────┘
|
||||
```
|
||||
|
||||
|
|
@ -186,6 +208,79 @@ npx tsx examples/01-single-agent.ts
|
|||
| `file_write` | Write or create a file. Auto-creates parent directories. |
|
||||
| `file_edit` | Edit a file by replacing an exact string match. |
|
||||
| `grep` | Search file contents with regex. Uses ripgrep when available, falls back to Node.js. |
|
||||
| `glob` | Find files by glob pattern. Returns matching paths sorted by modification time. |
|
||||
|
||||
## Tool Configuration
|
||||
|
||||
Agents can be configured with fine-grained tool access control using presets, allowlists, and denylists.
|
||||
|
||||
### Tool Presets
|
||||
|
||||
Predefined tool sets for common use cases:
|
||||
|
||||
```typescript
|
||||
const readonlyAgent: AgentConfig = {
|
||||
name: 'reader',
|
||||
model: 'claude-sonnet-4-6',
|
||||
toolPreset: 'readonly', // file_read, grep, glob
|
||||
}
|
||||
|
||||
const readwriteAgent: AgentConfig = {
|
||||
name: 'editor',
|
||||
model: 'claude-sonnet-4-6',
|
||||
toolPreset: 'readwrite', // file_read, file_write, file_edit, grep, glob
|
||||
}
|
||||
|
||||
const fullAgent: AgentConfig = {
|
||||
name: 'executor',
|
||||
model: 'claude-sonnet-4-6',
|
||||
toolPreset: 'full', // file_read, file_write, file_edit, grep, glob, bash
|
||||
}
|
||||
```
|
||||
|
||||
### Advanced Filtering
|
||||
|
||||
Combine presets with allowlists and denylists for precise control:
|
||||
|
||||
```typescript
|
||||
const customAgent: AgentConfig = {
|
||||
name: 'custom',
|
||||
model: 'claude-sonnet-4-6',
|
||||
toolPreset: 'readwrite', // Start with: file_read, file_write, file_edit, grep, glob
|
||||
tools: ['file_read', 'grep'], // Allowlist: intersect with preset = file_read, grep
|
||||
disallowedTools: ['grep'], // Denylist: subtract = file_read only
|
||||
}
|
||||
```
|
||||
|
||||
**Resolution order:** preset → allowlist → denylist → framework safety rails.
|
||||
|
||||
### Custom Tools
|
||||
|
||||
Tools added via `agent.addTool()` are always available regardless of filtering.
|
||||
|
||||
### MCP Tools (Model Context Protocol)
|
||||
|
||||
`open-multi-agent` can connect to any MCP server and expose its tools directly to agents.
|
||||
|
||||
```typescript
|
||||
import { connectMCPTools } from '@jackchen_me/open-multi-agent/mcp'
|
||||
|
||||
const { tools, disconnect } = await connectMCPTools({
|
||||
command: 'npx',
|
||||
args: ['-y', '@modelcontextprotocol/server-github'],
|
||||
env: { GITHUB_TOKEN: process.env.GITHUB_TOKEN },
|
||||
namePrefix: 'github',
|
||||
})
|
||||
|
||||
// Register each MCP tool in your ToolRegistry, then include their names in AgentConfig.tools
|
||||
// Don't forget cleanup when done
|
||||
await disconnect()
|
||||
```
|
||||
|
||||
Notes:
|
||||
- `@modelcontextprotocol/sdk` is an optional peer dependency, only needed when using MCP.
|
||||
- Current transport support is stdio.
|
||||
- MCP input validation is delegated to the MCP server (`inputSchema` is `z.any()`).
|
||||
|
||||
## Supported Providers
|
||||
|
||||
|
|
@ -194,14 +289,20 @@ npx tsx examples/01-single-agent.ts
|
|||
| Anthropic (Claude) | `provider: 'anthropic'` | `ANTHROPIC_API_KEY` | Verified |
|
||||
| OpenAI (GPT) | `provider: 'openai'` | `OPENAI_API_KEY` | Verified |
|
||||
| Grok (xAI) | `provider: 'grok'` | `XAI_API_KEY` | Verified |
|
||||
| MiniMax (global) | `provider: 'minimax'` | `MINIMAX_API_KEY` | Verified |
|
||||
| MiniMax (China) | `provider: 'minimax'` + `MINIMAX_BASE_URL` | `MINIMAX_API_KEY` | Verified |
|
||||
| DeepSeek | `provider: 'deepseek'` | `DEEPSEEK_API_KEY` | Verified |
|
||||
| GitHub Copilot | `provider: 'copilot'` | `GITHUB_TOKEN` | Verified |
|
||||
| Gemini | `provider: 'gemini'` | `GEMINI_API_KEY` | Verified |
|
||||
| Ollama / vLLM / LM Studio | `provider: 'openai'` + `baseURL` | — | Verified |
|
||||
| Groq | `provider: 'openai'` + `baseURL` | `GROQ_API_KEY` | Verified |
|
||||
| llama.cpp server | `provider: 'openai'` + `baseURL` | — | Verified |
|
||||
|
||||
Gemini requires `npm install @google/genai` (optional peer dependency).
|
||||
|
||||
Verified local models with tool-calling: **Gemma 4** (see [example 08](examples/08-gemma4-local.ts)).
|
||||
|
||||
Any OpenAI-compatible API should work via `provider: 'openai'` + `baseURL` (DeepSeek, Groq, Mistral, Qwen, MiniMax, etc.). **Grok now has first-class support** via `provider: 'grok'`.
|
||||
Any OpenAI-compatible API should work via `provider: 'openai'` + `baseURL` (Mistral, Qwen, Moonshot, Doubao, etc.). Groq is now verified in [example 19](examples/19-groq.ts). **Grok, MiniMax, and DeepSeek now have first-class support** via `provider: 'grok'`, `provider: 'minimax'`, and `provider: 'deepseek'`.
|
||||
|
||||
### Local Model Tool-Calling
|
||||
|
||||
|
|
@ -241,33 +342,55 @@ const grokAgent: AgentConfig = {
|
|||
}
|
||||
```
|
||||
|
||||
(Set your `XAI_API_KEY` environment variable — no `baseURL` needed anymore.)
|
||||
(Set your `XAI_API_KEY` environment variable — no `baseURL` needed.)
|
||||
|
||||
```typescript
|
||||
const minimaxAgent: AgentConfig = {
|
||||
name: 'minimax-agent',
|
||||
provider: 'minimax',
|
||||
model: 'MiniMax-M2.7',
|
||||
systemPrompt: 'You are a helpful assistant.',
|
||||
}
|
||||
```
|
||||
|
||||
Set `MINIMAX_API_KEY`. The adapter selects the endpoint via `MINIMAX_BASE_URL`:
|
||||
|
||||
- `https://api.minimax.io/v1` Global, default
|
||||
- `https://api.minimaxi.com/v1` China mainland endpoint
|
||||
|
||||
You can also pass `baseURL` directly in `AgentConfig` to override the env var.
|
||||
|
||||
```typescript
|
||||
const deepseekAgent: AgentConfig = {
|
||||
name: 'deepseek-agent',
|
||||
provider: 'deepseek',
|
||||
model: 'deepseek-chat',
|
||||
systemPrompt: 'You are a helpful assistant.',
|
||||
}
|
||||
```
|
||||
|
||||
Set `DEEPSEEK_API_KEY`. Available models: `deepseek-chat` (DeepSeek-V3, recommended for coding) and `deepseek-reasoner` (thinking mode).
|
||||
|
||||
## Contributing
|
||||
|
||||
Issues, feature requests, and PRs are welcome. Some areas where contributions would be especially valuable:
|
||||
|
||||
- **Provider integrations** — Verify and document OpenAI-compatible providers (DeepSeek, Groq, Qwen, MiniMax, etc.) via `baseURL`. See [#25](https://github.com/JackChen-me/open-multi-agent/issues/25). For providers that are NOT OpenAI-compatible (e.g. Gemini), a new `LLMAdapter` implementation is welcome — the interface requires just two methods: `chat()` and `stream()`.
|
||||
- **Examples** — Real-world workflows and use cases.
|
||||
- **Documentation** — Guides, tutorials, and API docs.
|
||||
|
||||
## Author
|
||||
|
||||
> JackChen — Ex PM (¥100M+ revenue), now indie builder. Follow on [X](https://x.com/JackChen_x) for AI Agent insights.
|
||||
|
||||
## Contributors
|
||||
|
||||
<a href="https://github.com/JackChen-me/open-multi-agent/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=JackChen-me/open-multi-agent&v=20260405" />
|
||||
<img src="https://contrib.rocks/image?repo=JackChen-me/open-multi-agent&max=20&v=20260411" />
|
||||
</a>
|
||||
|
||||
## Star History
|
||||
|
||||
<a href="https://star-history.com/#JackChen-me/open-multi-agent&Date">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&theme=dark&v=20260405" />
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260405" />
|
||||
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260405" />
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&theme=dark" />
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date" />
|
||||
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date" />
|
||||
</picture>
|
||||
</a>
|
||||
|
||||
|
|
|
|||
217
README_zh.md
217
README_zh.md
|
|
@ -1,29 +1,62 @@
|
|||
# Open Multi-Agent
|
||||
|
||||
TypeScript 多智能体编排框架。一次 `runTeam()` 调用从目标到结果——框架自动拆解任务、解析依赖、并行执行。
|
||||
面向 TypeScript 的轻量多智能体编排引擎。3 个运行时依赖,零配置,一次 `runTeam()` 调用从目标到结果。
|
||||
|
||||
3 个运行时依赖 · 33 个源文件 · Node.js 能跑的地方都能部署 · 被 [Latent Space](https://www.latent.space/p/ainews-a-quiet-april-fools) AI News 提及(AI 工程领域头部 Newsletter,17 万+订阅者)
|
||||
CrewAI 是 Python。LangGraph 需要你自己画图。`open-multi-agent` 是你现有 Node.js 后端里 `npm install` 一下就能用的那一层。当你需要让一支 agent 团队围绕一个目标协作时,只提供这个,不多不少。
|
||||
|
||||
3 个运行时依赖 · 41 个源文件 · Node.js 能跑的地方都能部署
|
||||
|
||||
[](https://github.com/JackChen-me/open-multi-agent/stargazers)
|
||||
[](./LICENSE)
|
||||
[](https://www.typescriptlang.org/)
|
||||
[](https://github.com/JackChen-me/open-multi-agent/actions)
|
||||
[](https://github.com/JackChen-me/open-multi-agent/actions)
|
||||
|
||||
[English](./README.md) | **中文**
|
||||
|
||||
## 为什么选择 Open Multi-Agent?
|
||||
## 你真正得到的三件事
|
||||
|
||||
- **目标进,结果出** — `runTeam(team, "构建一个 REST API")`。协调者智能体自动将目标拆解为带依赖关系的任务图,分配给对应智能体,独立任务并行执行,最终合成输出。无需手动定义任务或编排流程图。
|
||||
- **TypeScript 原生** — 为 Node.js 生态而生。`npm install` 即用,无需 Python 运行时、无子进程桥接、无额外基础设施。可嵌入 Express、Next.js、Serverless 函数或 CI/CD 流水线。
|
||||
- **可审计、极轻量** — 3 个运行时依赖(`@anthropic-ai/sdk`、`openai`、`zod`),33 个源文件。一个下午就能读完全部源码。
|
||||
- **模型无关** — Claude、GPT、Gemma 4 和本地模型(Ollama、vLLM、LM Studio、llama.cpp server)可以在同一个团队中使用。通过 `baseURL` 即可接入任何 OpenAI 兼容服务。
|
||||
- **多智能体协作** — 定义不同角色、工具和模型的智能体,通过消息总线和共享内存协作。
|
||||
- **结构化输出** — 为任意智能体添加 `outputSchema`(Zod),输出自动解析为 JSON 并校验,校验失败自动重试一次。通过 `result.structured` 获取类型化结果。
|
||||
- **任务重试** — 为任务设置 `maxRetries`,失败时自动指数退避重试。所有尝试的 token 用量累计,确保计费准确。
|
||||
- **人机协同** — `runTasks()` 支持可选的 `onApproval` 回调。每批任务完成后,由你的回调决定是否继续执行后续任务。
|
||||
- **生命周期钩子** — `AgentConfig` 上的 `beforeRun` / `afterRun`。在执行前拦截 prompt,或在执行后处理结果。从钩子中 throw 可中止运行。
|
||||
- **循环检测** — `AgentConfig` 上的 `loopDetection` 可检测智能体重复相同工具调用或文本输出的卡死循环。可配置行为:警告(默认)、终止、或自定义回调。
|
||||
- **可观测性** — 可选的 `onTrace` 回调为每次 LLM 调用、工具执行、任务和智能体运行发出结构化 span 事件——包含耗时、token 用量和共享的 `runId` 用于关联追踪。未订阅时零开销,零额外依赖。
|
||||
- **一次调用从目标到结果。** `runTeam(team, "构建一个 REST API")` 启动一个协调者 agent,把目标拆成任务 DAG,解析依赖,独立任务并行执行,最终合成输出。不需要画图,不需要手动连任务。
|
||||
- **TypeScript 原生,3 个运行时依赖。** `@anthropic-ai/sdk`、`openai`、`zod`。这就是全部运行时。可嵌入 Express、Next.js、Serverless 函数或 CI/CD 流水线。没有 Python 运行时,没有子进程桥接,没有云端 sidecar。
|
||||
- **多模型团队。** Claude、GPT、Gemini、Grok、MiniMax、DeepSeek、Copilot,或任何 OpenAI 兼容的本地模型(Ollama、vLLM、LM Studio、llama.cpp)可以在同一个团队中使用。让架构师用 Opus 4.6,开发者用 GPT-5.4,评审用本地的 Gemma 4,一次 `runTeam()` 调用全部搞定。Gemini 作为 optional peer dependency 提供:使用前需 `npm install @google/genai`。
|
||||
|
||||
其他能力(MCP 集成、上下文策略、结构化输出、任务重试、人机协同、生命周期钩子、循环检测、可观测性)在下方章节和 [`examples/`](./examples/) 里。
|
||||
|
||||
## 哲学:我们做什么,不做什么
|
||||
|
||||
我们的目标是做 TypeScript 生态里最简单的多智能体框架。简单不等于封闭。框架的长期价值不在于功能清单的长度,而在于它连接的网络有多大。
|
||||
|
||||
**我们做:**
|
||||
- 一个协调者,把目标拆成任务 DAG。
|
||||
- 一个任务队列,独立任务并行执行,失败级联到下游。
|
||||
- 共享内存和消息总线,让 agent 之间能看到彼此的输出。
|
||||
- 多模型团队,每个 agent 可以用不同的 LLM provider。
|
||||
|
||||
**我们不做:**
|
||||
- **Agent Handoffs。** 如果 agent A 需要把对话中途交接给 agent B,去用 [OpenAI Agents SDK](https://github.com/openai/openai-agents-python)。在我们的模型里,每个 agent 完整负责自己的任务,不会中途交接。
|
||||
- **状态持久化 / 检查点。** 短期内不做。加存储后端会打破 3 个依赖的承诺,而且我们的工作流执行时间是秒到分钟级,不是小时级。如果真实使用场景转向长时间工作流,我们会重新评估。
|
||||
|
||||
**正在跟踪:**
|
||||
- **A2A 协议。** 观望中,等生产级采纳到位再行动。
|
||||
|
||||
完整理由见 [`DECISIONS.md`](./DECISIONS.md)。
|
||||
|
||||
## 和 X 有什么不同?
|
||||
|
||||
**vs. [LangGraph JS](https://github.com/langchain-ai/langgraphjs)。** LangGraph 是声明式图编排:你定义节点、边、条件路由,然后 `compile()` + `invoke()`。`open-multi-agent` 是目标驱动:你声明团队和目标,协调者在运行时把目标拆成任务 DAG。LangGraph 给你完全的拓扑控制(适合固定的生产工作流)。这个框架代码更少、迭代更快(适合探索型多智能体协作)。LangGraph 还有成熟的检查点能力,我们没有。
|
||||
|
||||
**vs. [CrewAI](https://github.com/crewAIInc/crewAI)。** CrewAI 是成熟的 Python 选择。如果你的技术栈是 Python,用 CrewAI。`open-multi-agent` 是 TypeScript 原生:3 个运行时依赖,直接嵌入 Node.js,不需要子进程桥接。编排能力大致相当,按语言契合度选。
|
||||
|
||||
**vs. [Vercel AI SDK](https://github.com/vercel/ai)。** AI SDK 是 LLM 调用层:统一的 TypeScript 客户端,支持 60+ provider,带流式、tool calls、结构化输出。它不做多智能体编排。`open-multi-agent` 需要多 agent 时叠在它之上。两者互补:单 agent 用 AI SDK,需要团队用这个。
|
||||
|
||||
## 谁在用
|
||||
|
||||
`open-multi-agent` 是一个新项目(2026-04-01 发布,MIT 许可,5,500+ stars)。生态还在成形,下面这份列表很短,但都真实:
|
||||
|
||||
- **[temodar-agent](https://github.com/xeloxa/temodar-agent)**(约 50 stars)。WordPress 安全分析平台,作者 [Ali Sünbül](https://github.com/xeloxa)。在 Docker runtime 里直接使用我们的内置工具(`bash`、`file_*`、`grep`)。已确认生产环境使用。
|
||||
- **[rentech-quant-platform](https://github.com/rookiecoderasz/rentech-quant-platform)。** 多智能体量化交易研究平台,5 条管线 + MCP 集成,基于 `open-multi-agent` 构建。早期信号,项目非常新。
|
||||
- **家用服务器 Cybersecurity SOC。** 本地完全离线运行 Qwen 2.5 + DeepSeek Coder(通过 Ollama),在 Wazuh + Proxmox 上构建自主 SOC 流水线。早期用户,未公开。
|
||||
|
||||
你在生产环境或 side project 里用 `open-multi-agent` 吗?[开一个 Discussion](https://github.com/JackChen-me/open-multi-agent/discussions),我们会把你列上来。
|
||||
|
||||
## 快速开始
|
||||
|
||||
|
|
@ -39,6 +72,9 @@ npm install @jackchen_me/open-multi-agent
|
|||
- `OPENAI_API_KEY`
|
||||
- `GEMINI_API_KEY`
|
||||
- `XAI_API_KEY`(Grok)
|
||||
- `MINIMAX_API_KEY`(MiniMax)
|
||||
- `MINIMAX_BASE_URL`(MiniMax — 可选,用于选择接入端点)
|
||||
- `DEEPSEEK_API_KEY`(DeepSeek)
|
||||
- `GITHUB_TOKEN`(Copilot)
|
||||
|
||||
三个智能体,一个目标——框架处理剩下的一切:
|
||||
|
|
@ -54,19 +90,8 @@ const architect: AgentConfig = {
|
|||
tools: ['file_write'],
|
||||
}
|
||||
|
||||
const developer: AgentConfig = {
|
||||
name: 'developer',
|
||||
model: 'claude-sonnet-4-6',
|
||||
systemPrompt: 'You implement what the architect designs.',
|
||||
tools: ['bash', 'file_read', 'file_write', 'file_edit'],
|
||||
}
|
||||
|
||||
const reviewer: AgentConfig = {
|
||||
name: 'reviewer',
|
||||
model: 'claude-sonnet-4-6',
|
||||
systemPrompt: 'You review code for correctness and clarity.',
|
||||
tools: ['file_read', 'grep'],
|
||||
}
|
||||
const developer: AgentConfig = { /* 同样结构,tools: ['bash', 'file_read', 'file_write', 'file_edit'] */ }
|
||||
const reviewer: AgentConfig = { /* 同样结构,tools: ['file_read', 'grep'] */ }
|
||||
|
||||
const orchestrator = new OpenMultiAgent({
|
||||
defaultModel: 'claude-sonnet-4-6',
|
||||
|
|
@ -82,8 +107,8 @@ const team = orchestrator.createTeam('api-team', {
|
|||
// 描述一个目标——框架将其拆解为任务并编排执行
|
||||
const result = await orchestrator.runTeam(team, 'Create a REST API for a todo list in /tmp/todo-api/')
|
||||
|
||||
console.log(`成功: ${result.success}`)
|
||||
console.log(`Token 用量: ${result.totalTokenUsage.output_tokens} output tokens`)
|
||||
console.log(`Success: ${result.success}`)
|
||||
console.log(`Tokens: ${result.totalTokenUsage.output_tokens} output tokens`)
|
||||
```
|
||||
|
||||
执行过程:
|
||||
|
|
@ -95,8 +120,8 @@ task_complete architect
|
|||
task_start developer
|
||||
task_start developer // 无依赖的任务并行执行
|
||||
task_complete developer
|
||||
task_start reviewer // 实现完成后自动解锁
|
||||
task_complete developer
|
||||
task_start reviewer // 实现完成后自动解锁
|
||||
task_complete reviewer
|
||||
agent_complete coordinator // 综合所有结果
|
||||
Success: true
|
||||
|
|
@ -111,29 +136,21 @@ Tokens: 12847 output tokens
|
|||
| 自动编排团队 | `runTeam()` | 给一个目标,框架自动规划和执行 |
|
||||
| 显式任务管线 | `runTasks()` | 你自己定义任务图和分配 |
|
||||
|
||||
如果需要 MapReduce 风格的扇出而不涉及任务依赖,直接使用 `AgentPool.runParallel()`。参见[示例 07](examples/07-fan-out-aggregate.ts)。
|
||||
|
||||
## 示例
|
||||
|
||||
所有示例都是可运行脚本,位于 [`examples/`](./examples/) 目录。使用 `npx tsx` 运行:
|
||||
[`examples/`](./examples/) 里有 18 个可运行脚本和 1 个完整项目。推荐从这几个开始:
|
||||
|
||||
```bash
|
||||
npx tsx examples/01-single-agent.ts
|
||||
```
|
||||
- [02 — 团队协作](examples/02-team-collaboration.ts):`runTeam()` 协调者模式。
|
||||
- [06 — 本地模型](examples/06-local-model.ts):通过 `baseURL` 把 Ollama 和 Claude 放在同一条管线。
|
||||
- [09 — 结构化输出](examples/09-structured-output.ts):任意 agent 产出 Zod 校验过的 JSON。
|
||||
- [11 — 可观测性](examples/11-trace-observability.ts):`onTrace` 回调,为 LLM 调用、工具、任务发出结构化 span。
|
||||
- [17 — MiniMax](examples/17-minimax.ts):使用 MiniMax M2.7 的三智能体团队。
|
||||
- [18 — DeepSeek](examples/18-deepseek.ts):使用 DeepSeek Chat 的三智能体团队。
|
||||
- [with-vercel-ai-sdk](examples/with-vercel-ai-sdk/):Next.js 应用 — OMA `runTeam()` + AI SDK `useChat` 流式输出。
|
||||
|
||||
| 示例 | 展示内容 |
|
||||
|------|----------|
|
||||
| [01 — 单智能体](examples/01-single-agent.ts) | `runAgent()` 单次调用、`stream()` 流式输出、`prompt()` 多轮对话 |
|
||||
| [02 — 团队协作](examples/02-team-collaboration.ts) | `runTeam()` 自动编排 + 协调者模式 |
|
||||
| [03 — 任务流水线](examples/03-task-pipeline.ts) | `runTasks()` 显式依赖图(设计 → 实现 → 测试 + 评审) |
|
||||
| [04 — 多模型团队](examples/04-multi-model-team.ts) | `defineTool()` 自定义工具、Anthropic + OpenAI 混合、`AgentPool` |
|
||||
| [05 — Copilot](examples/05-copilot-test.ts) | GitHub Copilot 作为 LLM 提供者 |
|
||||
| [06 — 本地模型](examples/06-local-model.ts) | Ollama + Claude 混合流水线,通过 `baseURL` 接入(兼容 vLLM、LM Studio 等) |
|
||||
| [07 — 扇出聚合](examples/07-fan-out-aggregate.ts) | `runParallel()` MapReduce — 3 个分析师并行,然后综合 |
|
||||
| [08 — Gemma 4 本地](examples/08-gemma4-local.ts) | `runTasks()` + `runTeam()` 本地 Gemma 4 via Ollama — 零 API 费用 |
|
||||
| [09 — 结构化输出](examples/09-structured-output.ts) | `outputSchema`(Zod)— 校验 JSON 输出,通过 `result.structured` 获取 |
|
||||
| [10 — 任务重试](examples/10-task-retry.ts) | `maxRetries` / `retryDelayMs` / `retryBackoff` + `task_retry` 进度事件 |
|
||||
| [11 — 可观测性](examples/11-trace-observability.ts) | `onTrace` 回调 — LLM 调用、工具、任务、智能体的结构化 span 事件 |
|
||||
| [12 — Grok](examples/12-grok.ts) | 同示例 02(`runTeam()` 团队协作),使用 Grok(`XAI_API_KEY`) |
|
||||
| [13 — Gemini](examples/13-gemini.ts) | Gemini 适配器测试,使用 `gemini-2.5-flash`(`GEMINI_API_KEY`) |
|
||||
用 `npx tsx examples/02-team-collaboration.ts` 运行脚本示例。
|
||||
|
||||
## 架构
|
||||
|
||||
|
|
@ -169,12 +186,14 @@ npx tsx examples/01-single-agent.ts
|
|||
│ │ - CopilotAdapter │
|
||||
│ │ - GeminiAdapter │
|
||||
│ │ - GrokAdapter │
|
||||
│ │ - MiniMaxAdapter │
|
||||
│ │ - DeepSeekAdapter │
|
||||
│ └──────────────────────┘
|
||||
┌────────▼──────────┐
|
||||
│ AgentRunner │ ┌──────────────────────┐
|
||||
│ - conversation │───►│ ToolRegistry │
|
||||
│ loop │ │ - defineTool() │
|
||||
│ - tool dispatch │ │ - 5 built-in tools │
|
||||
│ - tool dispatch │ │ - 6 built-in tools │
|
||||
└───────────────────┘ └──────────────────────┘
|
||||
```
|
||||
|
||||
|
|
@ -187,6 +206,55 @@ npx tsx examples/01-single-agent.ts
|
|||
| `file_write` | 写入或创建文件。自动创建父目录。 |
|
||||
| `file_edit` | 通过精确字符串匹配编辑文件。 |
|
||||
| `grep` | 使用正则表达式搜索文件内容。优先使用 ripgrep,回退到 Node.js 实现。 |
|
||||
| `glob` | 按 glob 模式查找文件。返回按修改时间排序的匹配路径。 |
|
||||
|
||||
## 工具配置
|
||||
|
||||
可以通过预设、白名单和黑名单对 agent 的工具访问进行精细控制。
|
||||
|
||||
### 工具预设
|
||||
|
||||
为常见场景预定义的工具组合:
|
||||
|
||||
```typescript
|
||||
const readonlyAgent: AgentConfig = {
|
||||
name: 'reader',
|
||||
model: 'claude-sonnet-4-6',
|
||||
toolPreset: 'readonly', // file_read, grep, glob
|
||||
}
|
||||
|
||||
const readwriteAgent: AgentConfig = {
|
||||
name: 'editor',
|
||||
model: 'claude-sonnet-4-6',
|
||||
toolPreset: 'readwrite', // file_read, file_write, file_edit, grep, glob
|
||||
}
|
||||
|
||||
const fullAgent: AgentConfig = {
|
||||
name: 'executor',
|
||||
model: 'claude-sonnet-4-6',
|
||||
toolPreset: 'full', // file_read, file_write, file_edit, grep, glob, bash
|
||||
}
|
||||
```
|
||||
|
||||
### 高级过滤
|
||||
|
||||
将预设与白名单、黑名单组合,实现精确控制:
|
||||
|
||||
```typescript
|
||||
const customAgent: AgentConfig = {
|
||||
name: 'custom',
|
||||
model: 'claude-sonnet-4-6',
|
||||
toolPreset: 'readwrite', // 起点:file_read, file_write, file_edit, grep, glob
|
||||
tools: ['file_read', 'grep'], // 白名单:与预设取交集 = file_read, grep
|
||||
disallowedTools: ['grep'], // 黑名单:再减去 = 只剩 file_read
|
||||
}
|
||||
```
|
||||
|
||||
**解析顺序:** preset → allowlist → denylist → 框架安全护栏。
|
||||
|
||||
### 自定义工具
|
||||
|
||||
通过 `agent.addTool()` 添加的工具始终可用,不受过滤规则影响。
|
||||
|
||||
## 支持的 Provider
|
||||
|
||||
|
|
@ -195,14 +263,19 @@ npx tsx examples/01-single-agent.ts
|
|||
| Anthropic (Claude) | `provider: 'anthropic'` | `ANTHROPIC_API_KEY` | 已验证 |
|
||||
| OpenAI (GPT) | `provider: 'openai'` | `OPENAI_API_KEY` | 已验证 |
|
||||
| Grok (xAI) | `provider: 'grok'` | `XAI_API_KEY` | 已验证 |
|
||||
| MiniMax(全球) | `provider: 'minimax'` | `MINIMAX_API_KEY` | 已验证 |
|
||||
| MiniMax(国内) | `provider: 'minimax'` + `MINIMAX_BASE_URL` | `MINIMAX_API_KEY` | 已验证 |
|
||||
| DeepSeek | `provider: 'deepseek'` | `DEEPSEEK_API_KEY` | 已验证 |
|
||||
| GitHub Copilot | `provider: 'copilot'` | `GITHUB_TOKEN` | 已验证 |
|
||||
| Gemini | `provider: 'gemini'` | `GEMINI_API_KEY` | 已验证 |
|
||||
| Ollama / vLLM / LM Studio | `provider: 'openai'` + `baseURL` | — | 已验证 |
|
||||
| llama.cpp server | `provider: 'openai'` + `baseURL` | — | 已验证 |
|
||||
|
||||
Gemini 需要 `npm install @google/genai`(optional peer dependency)。
|
||||
|
||||
已验证支持 tool-calling 的本地模型:**Gemma 4**(见[示例 08](examples/08-gemma4-local.ts))。
|
||||
|
||||
任何 OpenAI 兼容 API 均可通过 `provider: 'openai'` + `baseURL` 接入(DeepSeek、Groq、Mistral、Qwen、MiniMax 等)。**Grok 现已原生支持**,使用 `provider: 'grok'`。
|
||||
任何 OpenAI 兼容 API 均可通过 `provider: 'openai'` + `baseURL` 接入(Groq、Mistral、Qwen 等)。**Grok、MiniMax 和 DeepSeek 现已原生支持**,分别使用 `provider: 'grok'`、`provider: 'minimax'` 和 `provider: 'deepseek'`。
|
||||
|
||||
### 本地模型 Tool-Calling
|
||||
|
||||
|
|
@ -244,31 +317,53 @@ const grokAgent: AgentConfig = {
|
|||
|
||||
(设置 `XAI_API_KEY` 环境变量即可,无需 `baseURL`。)
|
||||
|
||||
```typescript
|
||||
const minimaxAgent: AgentConfig = {
|
||||
name: 'minimax-agent',
|
||||
provider: 'minimax',
|
||||
model: 'MiniMax-M2.7',
|
||||
systemPrompt: 'You are a helpful assistant.',
|
||||
}
|
||||
```
|
||||
|
||||
设置 `MINIMAX_API_KEY`。适配器通过 `MINIMAX_BASE_URL` 选择接入端点:
|
||||
|
||||
- `https://api.minimax.io/v1` 全球端点,默认
|
||||
- `https://api.minimaxi.com/v1` 中国大陆端点
|
||||
|
||||
也可在 `AgentConfig` 中直接传入 `baseURL` 覆盖环境变量。
|
||||
|
||||
```typescript
|
||||
const deepseekAgent: AgentConfig = {
|
||||
name: 'deepseek-agent',
|
||||
provider: 'deepseek',
|
||||
model: 'deepseek-chat',
|
||||
systemPrompt: '你是一个有用的助手。',
|
||||
}
|
||||
```
|
||||
|
||||
设置 `DEEPSEEK_API_KEY`。可用模型:`deepseek-chat`(DeepSeek-V3,推荐用于编码任务)和 `deepseek-reasoner`(思考模式)。
|
||||
|
||||
## 参与贡献
|
||||
|
||||
欢迎提 Issue、功能需求和 PR。以下方向的贡献尤其有价值:
|
||||
|
||||
- **Provider 集成** — 验证并文档化 OpenAI 兼容 Provider(DeepSeek、Groq、Qwen、MiniMax 等)通过 `baseURL` 接入。详见 [#25](https://github.com/JackChen-me/open-multi-agent/issues/25)。对于非 OpenAI 兼容的 Provider,欢迎贡献新的 `LLMAdapter` 实现——接口只需两个方法:`chat()` 和 `stream()`。
|
||||
- **示例** — 真实场景的工作流和用例。
|
||||
- **文档** — 指南、教程和 API 文档。
|
||||
|
||||
## 作者
|
||||
|
||||
> JackChen — 前 WPS 产品经理,现独立创业者。关注小红书[「杰克西|硅基杠杆」](https://www.xiaohongshu.com/user/profile/5a1bdc1e4eacab4aa39ea6d6),持续获取我的 AI Agent 观点和思考。
|
||||
|
||||
## 贡献者
|
||||
|
||||
<a href="https://github.com/JackChen-me/open-multi-agent/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=JackChen-me/open-multi-agent&v=20260405" />
|
||||
<img src="https://contrib.rocks/image?repo=JackChen-me/open-multi-agent&max=20&v=20260411" />
|
||||
</a>
|
||||
|
||||
## Star 趋势
|
||||
|
||||
<a href="https://star-history.com/#JackChen-me/open-multi-agent&Date">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&theme=dark&v=20260405" />
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260405" />
|
||||
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&v=20260405" />
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date&theme=dark" />
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date" />
|
||||
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=JackChen-me/open-multi-agent&type=Date" />
|
||||
</picture>
|
||||
</a>
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,255 @@
|
|||
# Command-line interface (`oma`)
|
||||
|
||||
The package ships a small binary **`oma`** that exposes the same primitives as the TypeScript API: `runTeam`, `runTasks`, plus a static provider reference. It is meant for **shell scripts and CI** (JSON on stdout, stable exit codes).
|
||||
|
||||
It does **not** provide an interactive REPL, working-directory injection into tools, human approval gates, or session persistence. Those stay in application code.
|
||||
|
||||
## Installation and invocation
|
||||
|
||||
After installing the package, the binary is on `PATH` when using `npx` or a local `node_modules/.bin`:
|
||||
|
||||
```bash
|
||||
npm install @jackchen_me/open-multi-agent
|
||||
npx oma help
|
||||
```
|
||||
|
||||
From a clone of the repository you need a build first:
|
||||
|
||||
```bash
|
||||
npm run build
|
||||
node dist/cli/oma.js help
|
||||
```
|
||||
|
||||
Set the usual provider API keys in the environment (see [README](../README.md#quick-start)); the CLI does not read secrets from flags. MiniMax additionally reads `MINIMAX_BASE_URL` to select the global (`https://api.minimax.io/v1`) or China (`https://api.minimaxi.com/v1`) endpoint.
|
||||
|
||||
---
|
||||
|
||||
## Commands
|
||||
|
||||
### `oma run`
|
||||
|
||||
Runs **`OpenMultiAgent.runTeam(team, goal)`**: coordinator decomposition, task queue, optional synthesis.
|
||||
|
||||
| Argument | Required | Description |
|
||||
|----------|----------|-------------|
|
||||
| `--goal` | Yes | Natural-language goal passed to the team run. |
|
||||
| `--team` | Yes | Path to JSON (see [Team file](#team-file)). |
|
||||
| `--orchestrator` | No | Path to JSON merged into `new OpenMultiAgent(...)` after any orchestrator fragment from the team file. |
|
||||
| `--coordinator` | No | Path to JSON passed as `runTeam(..., { coordinator })` (`CoordinatorConfig`). |
|
||||
|
||||
Global flags: [`--pretty`](#output-flags), [`--include-messages`](#output-flags).
|
||||
|
||||
### `oma task`
|
||||
|
||||
Runs **`OpenMultiAgent.runTasks(team, tasks)`** with a fixed task list (no coordinator decomposition).
|
||||
|
||||
| Argument | Required | Description |
|
||||
|----------|----------|-------------|
|
||||
| `--file` | Yes | Path to [tasks file](#tasks-file). |
|
||||
| `--team` | No | Path to JSON `TeamConfig`. When set, overrides the `team` object inside `--file`. |
|
||||
|
||||
Global flags: [`--pretty`](#output-flags), [`--include-messages`](#output-flags).
|
||||
|
||||
### `oma provider`
|
||||
|
||||
Read-only helper for wiring JSON configs and env vars.
|
||||
|
||||
- **`oma provider`** or **`oma provider list`** — Prints JSON: built-in provider ids, API key environment variable names, whether `baseURL` is supported, and short notes (e.g. OpenAI-compatible servers, Copilot in CI).
|
||||
- **`oma provider template <provider>`** — Prints a JSON object with example `orchestrator` and `agent` fields plus placeholder `env` entries. `<provider>` is one of: `anthropic`, `openai`, `gemini`, `grok`, `minimax`, `deepseek`, `copilot`.
|
||||
|
||||
Supports `--pretty`.
|
||||
|
||||
### `oma`, `oma help`, `oma -h`, `oma --help`
|
||||
|
||||
Prints usage text to stdout and exits **0**.
|
||||
|
||||
---
|
||||
|
||||
## Configuration files
|
||||
|
||||
Shapes match the library types `TeamConfig`, `OrchestratorConfig`, `CoordinatorConfig`, and the task objects accepted by `runTasks()`.
|
||||
|
||||
### Team file
|
||||
|
||||
Used with **`oma run --team`** (and optionally **`oma task --team`**).
|
||||
|
||||
**Option A — plain `TeamConfig`**
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "api-team",
|
||||
"agents": [
|
||||
{
|
||||
"name": "architect",
|
||||
"model": "claude-sonnet-4-6",
|
||||
"provider": "anthropic",
|
||||
"systemPrompt": "You design APIs.",
|
||||
"tools": ["file_read", "file_write"],
|
||||
"maxTurns": 6
|
||||
}
|
||||
],
|
||||
"sharedMemory": true
|
||||
}
|
||||
```
|
||||
|
||||
**Option B — team plus default orchestrator settings**
|
||||
|
||||
```json
|
||||
{
|
||||
"team": {
|
||||
"name": "api-team",
|
||||
"agents": [{ "name": "worker", "model": "claude-sonnet-4-6", "systemPrompt": "…" }]
|
||||
},
|
||||
"orchestrator": {
|
||||
"defaultModel": "claude-sonnet-4-6",
|
||||
"defaultProvider": "anthropic",
|
||||
"maxConcurrency": 3
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Validation rules enforced by the CLI:
|
||||
|
||||
- Root (or `team`) must be an object.
|
||||
- `team.name` is a non-empty string.
|
||||
- `team.agents` is a non-empty array; each agent must have non-empty `name` and `model`.
|
||||
|
||||
Any other fields are passed through to the library as in TypeScript.
|
||||
|
||||
### Tasks file
|
||||
|
||||
Used with **`oma task --file`**.
|
||||
|
||||
```json
|
||||
{
|
||||
"orchestrator": {
|
||||
"defaultModel": "claude-sonnet-4-6"
|
||||
},
|
||||
"team": {
|
||||
"name": "pipeline",
|
||||
"agents": [
|
||||
{ "name": "designer", "model": "claude-sonnet-4-6", "systemPrompt": "…" },
|
||||
{ "name": "builder", "model": "claude-sonnet-4-6", "systemPrompt": "…" }
|
||||
],
|
||||
"sharedMemory": true
|
||||
},
|
||||
"tasks": [
|
||||
{
|
||||
"title": "Design",
|
||||
"description": "Produce a short spec for the feature.",
|
||||
"assignee": "designer"
|
||||
},
|
||||
{
|
||||
"title": "Implement",
|
||||
"description": "Implement from the design.",
|
||||
"assignee": "builder",
|
||||
"dependsOn": ["Design"]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
- **`dependsOn`** — Task titles (not internal ids), same convention as the coordinator output in the library.
|
||||
- Optional per-task fields: `memoryScope` (`"dependencies"` \| `"all"`), `maxRetries`, `retryDelayMs`, `retryBackoff`.
|
||||
- **`tasks`** must be a non-empty array; each item needs string `title` and `description`.
|
||||
|
||||
If **`--team path.json`** is passed, the file’s top-level `team` property is ignored and the external file is used instead (useful when the same team definition is shared across several pipeline files).
|
||||
|
||||
### Orchestrator and coordinator JSON
|
||||
|
||||
These files are arbitrary JSON objects merged into **`OrchestratorConfig`** and **`CoordinatorConfig`**. Function-valued options (`onProgress`, `onApproval`, etc.) cannot appear in JSON and are not supported by the CLI.
|
||||
|
||||
---
|
||||
|
||||
## Output
|
||||
|
||||
### Stdout
|
||||
|
||||
Every invocation prints **one JSON document** to stdout, followed by a newline.
|
||||
|
||||
**Successful `run` / `task`**
|
||||
|
||||
```json
|
||||
{
|
||||
"command": "run",
|
||||
"success": true,
|
||||
"totalTokenUsage": { "input_tokens": 0, "output_tokens": 0 },
|
||||
"agentResults": {
|
||||
"architect": {
|
||||
"success": true,
|
||||
"output": "…",
|
||||
"tokenUsage": { "input_tokens": 0, "output_tokens": 0 },
|
||||
"toolCalls": [],
|
||||
"structured": null,
|
||||
"loopDetected": false,
|
||||
"budgetExceeded": false
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`agentResults` keys are agent names. When an agent ran multiple tasks, the library merges results; the CLI mirrors the merged `AgentRunResult` fields.
|
||||
|
||||
**Errors (usage, validation, I/O, runtime)**
|
||||
|
||||
```json
|
||||
{
|
||||
"error": {
|
||||
"kind": "usage",
|
||||
"message": "--goal and --team are required"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`kind` is one of: `usage`, `validation`, `io`, `runtime`, or `internal` (uncaught errors in the outer handler).
|
||||
|
||||
### Output flags
|
||||
|
||||
| Flag | Effect |
|
||||
|------|--------|
|
||||
| `--pretty` | Pretty-print JSON with indentation. |
|
||||
| `--include-messages` | Include each agent’s full `messages` array in `agentResults`. **Very large** for long runs; default is omit. |
|
||||
|
||||
There is no separate progress stream; for rich telemetry use the TypeScript API with `onProgress` / `onTrace`.
|
||||
|
||||
---
|
||||
|
||||
## Exit codes
|
||||
|
||||
| Code | Meaning |
|
||||
|------|---------|
|
||||
| **0** | Success: `run`/`task` finished with `success === true`, or help / `provider` completed normally. |
|
||||
| **1** | Run finished but **`success === false`** (agent or task failure as reported by the library). |
|
||||
| **2** | Usage, validation, readable JSON errors, or file access issues (e.g. missing file). |
|
||||
| **3** | Unexpected error, including typical LLM/API failures surfaced as thrown errors. |
|
||||
|
||||
In scripts:
|
||||
|
||||
```bash
|
||||
npx oma run --goal "Summarize README" --team team.json > result.json
|
||||
code=$?
|
||||
case $code in
|
||||
0) echo "OK" ;;
|
||||
1) echo "Run reported failure — inspect result.json" ;;
|
||||
2) echo "Bad inputs or files" ;;
|
||||
3) echo "Crash or API error" ;;
|
||||
esac
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Argument parsing
|
||||
|
||||
- Long options only: `--goal`, `--team`, `--file`, etc.
|
||||
- Values may be attached with `=`: `--team=./team.json`.
|
||||
- Boolean-style flags (`--pretty`, `--include-messages`) take no value; if the next token does not start with `--`, it is treated as the value of the previous option (standard `getopt`-style pairing).
|
||||
|
||||
---
|
||||
|
||||
## Limitations (by design)
|
||||
|
||||
- No TTY session, history, or `stdin` goal input.
|
||||
- No built-in **`cwd`** or metadata passed into `ToolUseContext` (tools use process cwd unless the library adds other hooks later).
|
||||
- No **`onApproval`** from JSON; non-interactive batch only.
|
||||
- Coordinator **`runTeam`** path still requires network and API keys like any other run.
|
||||
|
||||
|
|
@ -114,6 +114,8 @@ const conversationAgent = new Agent(
|
|||
model: 'claude-sonnet-4-6',
|
||||
systemPrompt: 'You are a TypeScript tutor. Give short, direct answers.',
|
||||
maxTurns: 2,
|
||||
// Keep only the most recent turn in long prompt() conversations.
|
||||
contextStrategy: { type: 'sliding-window', maxTurns: 1 },
|
||||
},
|
||||
new ToolRegistry(), // no tools needed for this conversation
|
||||
new ToolExecutor(new ToolRegistry()),
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
* Demonstrates how to define tasks with explicit dependency chains
|
||||
* (design → implement → test → review) using runTasks(). The TaskQueue
|
||||
* automatically blocks downstream tasks until their dependencies complete.
|
||||
* Prompt context is dependency-scoped by default: each task sees only its own
|
||||
* description plus direct dependency results (not unrelated team outputs).
|
||||
*
|
||||
* Run:
|
||||
* npx tsx examples/03-task-pipeline.ts
|
||||
|
|
@ -116,6 +118,7 @@ const tasks: Array<{
|
|||
description: string
|
||||
assignee?: string
|
||||
dependsOn?: string[]
|
||||
memoryScope?: 'dependencies' | 'all'
|
||||
}> = [
|
||||
{
|
||||
title: 'Design: URL shortener data model',
|
||||
|
|
@ -162,6 +165,9 @@ Produce a structured code review with sections:
|
|||
- Verdict: SHIP or NEEDS WORK`,
|
||||
assignee: 'reviewer',
|
||||
dependsOn: ['Implement: URL shortener'], // runs in parallel with Test after Implement completes
|
||||
// Optional override: reviewers can opt into full shared memory when needed.
|
||||
// Remove this line to keep strict dependency-only context.
|
||||
memoryScope: 'all',
|
||||
},
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,188 @@
|
|||
/**
|
||||
* Multi-Perspective Code Review
|
||||
*
|
||||
* Demonstrates:
|
||||
* - Dependency chain: generator produces code, three reviewers depend on it
|
||||
* - Parallel execution: security, performance, and style reviewers run concurrently
|
||||
* - Shared memory: each agent's output is automatically stored and injected
|
||||
* into downstream agents' prompts by the framework
|
||||
*
|
||||
* Flow:
|
||||
* generator → [security-reviewer, performance-reviewer, style-reviewer] (parallel) → synthesizer
|
||||
*
|
||||
* Run:
|
||||
* npx tsx examples/14-multi-perspective-code-review.ts
|
||||
*
|
||||
* Prerequisites:
|
||||
* ANTHROPIC_API_KEY env var must be set.
|
||||
*/
|
||||
|
||||
import { OpenMultiAgent } from '../src/index.js'
|
||||
import type { AgentConfig, OrchestratorEvent } from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// API spec to implement
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const API_SPEC = `POST /users endpoint that:
|
||||
- Accepts JSON body with name (string, required), email (string, required), age (number, optional)
|
||||
- Validates all fields
|
||||
- Inserts into a PostgreSQL database
|
||||
- Returns 201 with the created user or 400/500 on error`
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Agents
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const generator: AgentConfig = {
|
||||
name: 'generator',
|
||||
model: 'claude-sonnet-4-6',
|
||||
systemPrompt: `You are a Node.js backend developer. Given an API spec, write a complete
|
||||
Express route handler. Include imports, validation, database query, and error handling.
|
||||
Output only the code, no explanation. Keep it under 80 lines.`,
|
||||
maxTurns: 2,
|
||||
}
|
||||
|
||||
const securityReviewer: AgentConfig = {
|
||||
name: 'security-reviewer',
|
||||
model: 'claude-sonnet-4-6',
|
||||
systemPrompt: `You are a security reviewer. Review the code provided in context and check
|
||||
for OWASP top 10 vulnerabilities: SQL injection, XSS, broken authentication,
|
||||
sensitive data exposure, etc. Write your findings as a markdown checklist.
|
||||
Keep it to 150-200 words.`,
|
||||
maxTurns: 2,
|
||||
}
|
||||
|
||||
const performanceReviewer: AgentConfig = {
|
||||
name: 'performance-reviewer',
|
||||
model: 'claude-sonnet-4-6',
|
||||
systemPrompt: `You are a performance reviewer. Review the code provided in context and check
|
||||
for N+1 queries, memory leaks, blocking calls, missing connection pooling, and
|
||||
inefficient patterns. Write your findings as a markdown checklist.
|
||||
Keep it to 150-200 words.`,
|
||||
maxTurns: 2,
|
||||
}
|
||||
|
||||
const styleReviewer: AgentConfig = {
|
||||
name: 'style-reviewer',
|
||||
model: 'claude-sonnet-4-6',
|
||||
systemPrompt: `You are a code style reviewer. Review the code provided in context and check
|
||||
naming conventions, function structure, readability, error message clarity, and
|
||||
consistency. Write your findings as a markdown checklist.
|
||||
Keep it to 150-200 words.`,
|
||||
maxTurns: 2,
|
||||
}
|
||||
|
||||
const synthesizer: AgentConfig = {
|
||||
name: 'synthesizer',
|
||||
model: 'claude-sonnet-4-6',
|
||||
systemPrompt: `You are a lead engineer synthesizing code review feedback. Review all
|
||||
the feedback and original code provided in context. Produce a unified report with:
|
||||
|
||||
1. Critical issues (must fix before merge)
|
||||
2. Recommended improvements (should fix)
|
||||
3. Minor suggestions (nice to have)
|
||||
|
||||
Deduplicate overlapping feedback. Keep the report to 200-300 words.`,
|
||||
maxTurns: 2,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Orchestrator + team
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function handleProgress(event: OrchestratorEvent): void {
|
||||
if (event.type === 'task_start') {
|
||||
console.log(` [START] ${event.task ?? '?'} → ${event.agent ?? '?'}`)
|
||||
}
|
||||
if (event.type === 'task_complete') {
|
||||
const success = (event.data as { success?: boolean })?.success ?? true
|
||||
console.log(` [DONE] ${event.task ?? '?'} (${success ? 'OK' : 'FAIL'})`)
|
||||
}
|
||||
}
|
||||
|
||||
const orchestrator = new OpenMultiAgent({
|
||||
defaultModel: 'claude-sonnet-4-6',
|
||||
onProgress: handleProgress,
|
||||
})
|
||||
|
||||
const team = orchestrator.createTeam('code-review-team', {
|
||||
name: 'code-review-team',
|
||||
agents: [generator, securityReviewer, performanceReviewer, styleReviewer, synthesizer],
|
||||
sharedMemory: true,
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tasks
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const tasks = [
|
||||
{
|
||||
title: 'Generate code',
|
||||
description: `Write a Node.js Express route handler for this API spec:\n\n${API_SPEC}`,
|
||||
assignee: 'generator',
|
||||
},
|
||||
{
|
||||
title: 'Security review',
|
||||
description: 'Review the generated code for security vulnerabilities.',
|
||||
assignee: 'security-reviewer',
|
||||
dependsOn: ['Generate code'],
|
||||
},
|
||||
{
|
||||
title: 'Performance review',
|
||||
description: 'Review the generated code for performance issues.',
|
||||
assignee: 'performance-reviewer',
|
||||
dependsOn: ['Generate code'],
|
||||
},
|
||||
{
|
||||
title: 'Style review',
|
||||
description: 'Review the generated code for style and readability.',
|
||||
assignee: 'style-reviewer',
|
||||
dependsOn: ['Generate code'],
|
||||
},
|
||||
{
|
||||
title: 'Synthesize feedback',
|
||||
description: 'Synthesize all review feedback and the original code into a unified, prioritized action item report.',
|
||||
assignee: 'synthesizer',
|
||||
dependsOn: ['Security review', 'Performance review', 'Style review'],
|
||||
},
|
||||
]
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Run
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
console.log('Multi-Perspective Code Review')
|
||||
console.log('='.repeat(60))
|
||||
console.log(`Spec: ${API_SPEC.split('\n')[0]}`)
|
||||
console.log('Pipeline: generator → 3 reviewers (parallel) → synthesizer')
|
||||
console.log('='.repeat(60))
|
||||
console.log()
|
||||
|
||||
const result = await orchestrator.runTasks(team, tasks)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Output
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
console.log('\n' + '='.repeat(60))
|
||||
console.log(`Overall success: ${result.success}`)
|
||||
console.log(`Tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`)
|
||||
console.log()
|
||||
|
||||
for (const [name, r] of result.agentResults) {
|
||||
const icon = r.success ? 'OK ' : 'FAIL'
|
||||
const tokens = `in:${r.tokenUsage.input_tokens} out:${r.tokenUsage.output_tokens}`
|
||||
console.log(` [${icon}] ${name.padEnd(22)} ${tokens}`)
|
||||
}
|
||||
|
||||
const synthResult = result.agentResults.get('synthesizer')
|
||||
if (synthResult?.success) {
|
||||
console.log('\n' + '='.repeat(60))
|
||||
console.log('UNIFIED REVIEW REPORT')
|
||||
console.log('='.repeat(60))
|
||||
console.log()
|
||||
console.log(synthResult.output)
|
||||
}
|
||||
|
||||
console.log('\nDone.')
|
||||
|
|
@ -0,0 +1,169 @@
|
|||
/**
|
||||
* Example 15 — Multi-Source Research Aggregation
|
||||
*
|
||||
* Demonstrates runTasks() with explicit dependency chains:
|
||||
* - Parallel execution: three analyst agents research the same topic independently
|
||||
* - Dependency chain via dependsOn: synthesizer waits for all analysts to finish
|
||||
* - Automatic shared memory: agent output flows to downstream agents via the framework
|
||||
*
|
||||
* Compare with example 07 (fan-out-aggregate) which uses AgentPool.runParallel()
|
||||
* for the same 3-analysts + synthesizer pattern. This example shows the runTasks()
|
||||
* API with explicit dependsOn declarations instead.
|
||||
*
|
||||
* Flow:
|
||||
* [technical-analyst, market-analyst, community-analyst] (parallel) → synthesizer
|
||||
*
|
||||
* Run:
|
||||
* npx tsx examples/15-research-aggregation.ts
|
||||
*
|
||||
* Prerequisites:
|
||||
* ANTHROPIC_API_KEY env var must be set.
|
||||
*/
|
||||
|
||||
import { OpenMultiAgent } from '../src/index.js'
|
||||
import type { AgentConfig, OrchestratorEvent } from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Topic
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const TOPIC = 'WebAssembly adoption in 2026'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Agents — three analysts + one synthesizer
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const technicalAnalyst: AgentConfig = {
|
||||
name: 'technical-analyst',
|
||||
model: 'claude-sonnet-4-6',
|
||||
systemPrompt: `You are a technical analyst. Given a topic, research its technical
|
||||
capabilities, limitations, performance characteristics, and architectural patterns.
|
||||
Write your findings as structured markdown. Keep it to 200-300 words.`,
|
||||
maxTurns: 2,
|
||||
}
|
||||
|
||||
const marketAnalyst: AgentConfig = {
|
||||
name: 'market-analyst',
|
||||
model: 'claude-sonnet-4-6',
|
||||
systemPrompt: `You are a market analyst. Given a topic, research industry adoption
|
||||
rates, key companies using the technology, market size estimates, and competitive
|
||||
landscape. Write your findings as structured markdown. Keep it to 200-300 words.`,
|
||||
maxTurns: 2,
|
||||
}
|
||||
|
||||
const communityAnalyst: AgentConfig = {
|
||||
name: 'community-analyst',
|
||||
model: 'claude-sonnet-4-6',
|
||||
systemPrompt: `You are a developer community analyst. Given a topic, research
|
||||
developer sentiment, ecosystem maturity, learning resources, community size,
|
||||
and conference/meetup activity. Write your findings as structured markdown.
|
||||
Keep it to 200-300 words.`,
|
||||
maxTurns: 2,
|
||||
}
|
||||
|
||||
const synthesizer: AgentConfig = {
|
||||
name: 'synthesizer',
|
||||
model: 'claude-sonnet-4-6',
|
||||
systemPrompt: `You are a research director who synthesizes multiple analyst reports
|
||||
into a single cohesive document. You will receive all prior analyst outputs
|
||||
automatically. Then:
|
||||
|
||||
1. Cross-reference claims across reports - flag agreements and contradictions
|
||||
2. Identify the 3 most important insights
|
||||
3. Produce a structured report with: Executive Summary, Key Findings,
|
||||
Areas of Agreement, Open Questions, and Recommendation
|
||||
|
||||
Keep the final report to 300-400 words.`,
|
||||
maxTurns: 2,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Orchestrator + team
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function handleProgress(event: OrchestratorEvent): void {
|
||||
if (event.type === 'task_start') {
|
||||
console.log(` [START] ${event.task ?? ''} → ${event.agent ?? ''}`)
|
||||
}
|
||||
if (event.type === 'task_complete') {
|
||||
console.log(` [DONE] ${event.task ?? ''}`)
|
||||
}
|
||||
}
|
||||
|
||||
const orchestrator = new OpenMultiAgent({
|
||||
defaultModel: 'claude-sonnet-4-6',
|
||||
onProgress: handleProgress,
|
||||
})
|
||||
|
||||
const team = orchestrator.createTeam('research-team', {
|
||||
name: 'research-team',
|
||||
agents: [technicalAnalyst, marketAnalyst, communityAnalyst, synthesizer],
|
||||
sharedMemory: true,
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tasks — three analysts run in parallel, synthesizer depends on all three
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const tasks = [
|
||||
{
|
||||
title: 'Technical analysis',
|
||||
description: `Research the technical aspects of ${TOPIC}. Focus on capabilities, limitations, performance, and architecture.`,
|
||||
assignee: 'technical-analyst',
|
||||
},
|
||||
{
|
||||
title: 'Market analysis',
|
||||
description: `Research the market landscape for ${TOPIC}. Focus on adoption rates, key players, market size, and competition.`,
|
||||
assignee: 'market-analyst',
|
||||
},
|
||||
{
|
||||
title: 'Community analysis',
|
||||
description: `Research the developer community around ${TOPIC}. Focus on sentiment, ecosystem maturity, learning resources, and community activity.`,
|
||||
assignee: 'community-analyst',
|
||||
},
|
||||
{
|
||||
title: 'Synthesize report',
|
||||
description: `Cross-reference all analyst findings, identify key insights, flag contradictions, and produce a unified research report.`,
|
||||
assignee: 'synthesizer',
|
||||
dependsOn: ['Technical analysis', 'Market analysis', 'Community analysis'],
|
||||
},
|
||||
]
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Run
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
console.log('Multi-Source Research Aggregation')
|
||||
console.log('='.repeat(60))
|
||||
console.log(`Topic: ${TOPIC}`)
|
||||
console.log('Pipeline: 3 analysts (parallel) → synthesizer')
|
||||
console.log('='.repeat(60))
|
||||
console.log()
|
||||
|
||||
const result = await orchestrator.runTasks(team, tasks)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Output
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
console.log('\n' + '='.repeat(60))
|
||||
console.log(`Overall success: ${result.success}`)
|
||||
console.log(`Tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`)
|
||||
console.log()
|
||||
|
||||
for (const [name, r] of result.agentResults) {
|
||||
const icon = r.success ? 'OK ' : 'FAIL'
|
||||
const tokens = `in:${r.tokenUsage.input_tokens} out:${r.tokenUsage.output_tokens}`
|
||||
console.log(` [${icon}] ${name.padEnd(20)} ${tokens}`)
|
||||
}
|
||||
|
||||
const synthResult = result.agentResults.get('synthesizer')
|
||||
if (synthResult?.success) {
|
||||
console.log('\n' + '='.repeat(60))
|
||||
console.log('SYNTHESIZED REPORT')
|
||||
console.log('='.repeat(60))
|
||||
console.log()
|
||||
console.log(synthResult.output)
|
||||
}
|
||||
|
||||
console.log('\nDone.')
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
/**
|
||||
* Example 16 — MCP GitHub Tools
|
||||
*
|
||||
* Connect an MCP server over stdio and register all exposed MCP tools as
|
||||
* standard open-multi-agent tools.
|
||||
*
|
||||
* Run:
|
||||
* npx tsx examples/16-mcp-github.ts
|
||||
*
|
||||
* Prerequisites:
|
||||
* - GEMINI_API_KEY
|
||||
* - GITHUB_TOKEN
|
||||
* - @modelcontextprotocol/sdk installed
|
||||
*/
|
||||
|
||||
import { Agent, ToolExecutor, ToolRegistry, registerBuiltInTools } from '../src/index.js'
|
||||
import { connectMCPTools } from '../src/mcp.js'
|
||||
|
||||
if (!process.env.GITHUB_TOKEN?.trim()) {
|
||||
console.error('Missing GITHUB_TOKEN: set a GitHub personal access token in the environment.')
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
const { tools, disconnect } = await connectMCPTools({
|
||||
command: 'npx',
|
||||
args: ['-y', '@modelcontextprotocol/server-github'],
|
||||
env: {
|
||||
...process.env,
|
||||
GITHUB_TOKEN: process.env.GITHUB_TOKEN,
|
||||
},
|
||||
namePrefix: 'github',
|
||||
})
|
||||
|
||||
const registry = new ToolRegistry()
|
||||
registerBuiltInTools(registry)
|
||||
for (const tool of tools) registry.register(tool)
|
||||
const executor = new ToolExecutor(registry)
|
||||
|
||||
const agent = new Agent(
|
||||
{
|
||||
name: 'github-agent',
|
||||
model: 'gemini-2.5-flash',
|
||||
provider: 'gemini',
|
||||
tools: tools.map((tool) => tool.name),
|
||||
systemPrompt: 'Use GitHub MCP tools to answer repository questions.',
|
||||
},
|
||||
registry,
|
||||
executor,
|
||||
)
|
||||
|
||||
try {
|
||||
const result = await agent.run(
|
||||
'List the last 3 open issues in JackChen-me/open-multi-agent with title and number.',
|
||||
)
|
||||
|
||||
console.log(result.output)
|
||||
} finally {
|
||||
await disconnect()
|
||||
}
|
||||
|
|
@ -0,0 +1,159 @@
|
|||
/**
|
||||
* Example 17 — Multi-Agent Team Collaboration with MiniMax
|
||||
*
|
||||
* Three specialized agents (architect, developer, reviewer) collaborate via `runTeam()`
|
||||
* to build a minimal Express.js REST API. Every agent uses MiniMax's flagship model.
|
||||
*
|
||||
* Run:
|
||||
* npx tsx examples/17-minimax.ts
|
||||
*
|
||||
* Prerequisites:
|
||||
* MINIMAX_API_KEY environment variable must be set.
|
||||
* MINIMAX_BASE_URL environment variable can be set to switch to the China mainland endpoint if needed.
|
||||
*
|
||||
* Endpoints:
|
||||
* Global (default): https://api.minimax.io/v1
|
||||
* China mainland: https://api.minimaxi.com/v1 (set MINIMAX_BASE_URL)
|
||||
*/
|
||||
|
||||
import { OpenMultiAgent } from '../src/index.js'
|
||||
import type { AgentConfig, OrchestratorEvent } from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Agent definitions (all using MiniMax-M2.7)
|
||||
// ---------------------------------------------------------------------------
|
||||
const architect: AgentConfig = {
|
||||
name: 'architect',
|
||||
model: 'MiniMax-M2.7',
|
||||
provider: 'minimax',
|
||||
systemPrompt: `You are a software architect with deep experience in Node.js and REST API design.
|
||||
Your job is to design clear, production-quality API contracts and file/directory structures.
|
||||
Output concise plans in markdown — no unnecessary prose.`,
|
||||
tools: ['bash', 'file_write'],
|
||||
maxTurns: 5,
|
||||
temperature: 0.2,
|
||||
}
|
||||
|
||||
const developer: AgentConfig = {
|
||||
name: 'developer',
|
||||
model: 'MiniMax-M2.7',
|
||||
provider: 'minimax',
|
||||
systemPrompt: `You are a TypeScript/Node.js developer. You implement what the architect specifies.
|
||||
Write clean, runnable code with proper error handling. Use the tools to write files and run tests.`,
|
||||
tools: ['bash', 'file_read', 'file_write', 'file_edit'],
|
||||
maxTurns: 12,
|
||||
temperature: 0.1,
|
||||
}
|
||||
|
||||
const reviewer: AgentConfig = {
|
||||
name: 'reviewer',
|
||||
model: 'MiniMax-M2.7',
|
||||
provider: 'minimax',
|
||||
systemPrompt: `You are a senior code reviewer. Review code for correctness, security, and clarity.
|
||||
Provide a structured review with: LGTM items, suggestions, and any blocking issues.
|
||||
Read files using the tools before reviewing.`,
|
||||
tools: ['bash', 'file_read', 'grep'],
|
||||
maxTurns: 5,
|
||||
temperature: 0.3,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Progress tracking
|
||||
// ---------------------------------------------------------------------------
|
||||
const startTimes = new Map<string, number>()
|
||||
|
||||
function handleProgress(event: OrchestratorEvent): void {
|
||||
const ts = new Date().toISOString().slice(11, 23) // HH:MM:SS.mmm
|
||||
switch (event.type) {
|
||||
case 'agent_start':
|
||||
startTimes.set(event.agent ?? '', Date.now())
|
||||
console.log(`[${ts}] AGENT START → ${event.agent}`)
|
||||
break
|
||||
case 'agent_complete': {
|
||||
const elapsed = Date.now() - (startTimes.get(event.agent ?? '') ?? Date.now())
|
||||
console.log(`[${ts}] AGENT DONE ← ${event.agent} (${elapsed}ms)`)
|
||||
break
|
||||
}
|
||||
case 'task_start':
|
||||
console.log(`[${ts}] TASK START ↓ ${event.task}`)
|
||||
break
|
||||
case 'task_complete':
|
||||
console.log(`[${ts}] TASK DONE ↑ ${event.task}`)
|
||||
break
|
||||
case 'message':
|
||||
console.log(`[${ts}] MESSAGE • ${event.agent} → (team)`)
|
||||
break
|
||||
case 'error':
|
||||
console.error(`[${ts}] ERROR ✗ agent=${event.agent} task=${event.task}`)
|
||||
if (event.data instanceof Error) console.error(` ${event.data.message}`)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Orchestrate
|
||||
// ---------------------------------------------------------------------------
|
||||
const orchestrator = new OpenMultiAgent({
|
||||
defaultModel: 'MiniMax-M2.7',
|
||||
defaultProvider: 'minimax',
|
||||
maxConcurrency: 1, // sequential for readable output
|
||||
onProgress: handleProgress,
|
||||
})
|
||||
|
||||
const team = orchestrator.createTeam('api-team', {
|
||||
name: 'api-team',
|
||||
agents: [architect, developer, reviewer],
|
||||
sharedMemory: true,
|
||||
maxConcurrency: 1,
|
||||
})
|
||||
|
||||
console.log(`Team "${team.name}" created with agents: ${team.getAgents().map(a => a.name).join(', ')}`)
|
||||
console.log('\nStarting team run...\n')
|
||||
console.log('='.repeat(60))
|
||||
|
||||
const goal = `Create a minimal Express.js REST API in /tmp/express-api/ with:
|
||||
- GET /health → { status: "ok" }
|
||||
- GET /users → returns a hardcoded array of 2 user objects
|
||||
- POST /users → accepts { name, email } body, logs it, returns 201
|
||||
- Proper error handling middleware
|
||||
- The server should listen on port 3001
|
||||
- Include a package.json with the required dependencies`
|
||||
|
||||
const result = await orchestrator.runTeam(team, goal)
|
||||
|
||||
console.log('\n' + '='.repeat(60))
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Results
|
||||
// ---------------------------------------------------------------------------
|
||||
console.log('\nTeam run complete.')
|
||||
console.log(`Success: ${result.success}`)
|
||||
console.log(`Total tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`)
|
||||
|
||||
console.log('\nPer-agent results:')
|
||||
for (const [agentName, agentResult] of result.agentResults) {
|
||||
const status = agentResult.success ? 'OK' : 'FAILED'
|
||||
const tools = agentResult.toolCalls.length
|
||||
console.log(` ${agentName.padEnd(12)} [${status}] tool_calls=${tools}`)
|
||||
if (!agentResult.success) {
|
||||
console.log(` Error: ${agentResult.output.slice(0, 120)}`)
|
||||
}
|
||||
}
|
||||
|
||||
// Sample outputs
|
||||
const developerResult = result.agentResults.get('developer')
|
||||
if (developerResult?.success) {
|
||||
console.log('\nDeveloper output (last 600 chars):')
|
||||
console.log('─'.repeat(60))
|
||||
const out = developerResult.output
|
||||
console.log(out.length > 600 ? '...' + out.slice(-600) : out)
|
||||
console.log('─'.repeat(60))
|
||||
}
|
||||
|
||||
const reviewerResult = result.agentResults.get('reviewer')
|
||||
if (reviewerResult?.success) {
|
||||
console.log('\nReviewer output:')
|
||||
console.log('─'.repeat(60))
|
||||
console.log(reviewerResult.output)
|
||||
console.log('─'.repeat(60))
|
||||
}
|
||||
|
|
@ -0,0 +1,158 @@
|
|||
/**
|
||||
* Example 18 — Multi-Agent Team Collaboration with DeepSeek
|
||||
*
|
||||
* Three specialized agents (architect, developer, reviewer) collaborate via `runTeam()`
|
||||
* to build a minimal Express.js REST API. Every agent uses DeepSeek's flagship model.
|
||||
*
|
||||
* Run:
|
||||
* npx tsx examples/18-deepseek.ts
|
||||
*
|
||||
* Prerequisites:
|
||||
* DEEPSEEK_API_KEY environment variable must be set.
|
||||
*
|
||||
* Available models:
|
||||
* deepseek-chat — DeepSeek-V3 (non-thinking mode, recommended for coding tasks)
|
||||
* deepseek-reasoner — DeepSeek-V3 (thinking mode, for complex reasoning)
|
||||
*/
|
||||
|
||||
import { OpenMultiAgent } from '../src/index.js'
|
||||
import type { AgentConfig, OrchestratorEvent } from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Agent definitions (all using deepseek-chat)
|
||||
// ---------------------------------------------------------------------------
|
||||
const architect: AgentConfig = {
|
||||
name: 'architect',
|
||||
model: 'deepseek-reasoner',
|
||||
provider: 'deepseek',
|
||||
systemPrompt: `You are a software architect with deep experience in Node.js and REST API design.
|
||||
Your job is to design clear, production-quality API contracts and file/directory structures.
|
||||
Output concise plans in markdown — no unnecessary prose.`,
|
||||
tools: ['bash', 'file_write'],
|
||||
maxTurns: 5,
|
||||
temperature: 0.2,
|
||||
}
|
||||
|
||||
const developer: AgentConfig = {
|
||||
name: 'developer',
|
||||
model: 'deepseek-chat',
|
||||
provider: 'deepseek',
|
||||
systemPrompt: `You are a TypeScript/Node.js developer. You implement what the architect specifies.
|
||||
Write clean, runnable code with proper error handling. Use the tools to write files and run tests.`,
|
||||
tools: ['bash', 'file_read', 'file_write', 'file_edit'],
|
||||
maxTurns: 12,
|
||||
temperature: 0.1,
|
||||
}
|
||||
|
||||
const reviewer: AgentConfig = {
|
||||
name: 'reviewer',
|
||||
model: 'deepseek-chat',
|
||||
provider: 'deepseek',
|
||||
systemPrompt: `You are a senior code reviewer. Review code for correctness, security, and clarity.
|
||||
Provide a structured review with: LGTM items, suggestions, and any blocking issues.
|
||||
Read files using the tools before reviewing.`,
|
||||
tools: ['bash', 'file_read', 'grep'],
|
||||
maxTurns: 5,
|
||||
temperature: 0.3,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Progress tracking
|
||||
// ---------------------------------------------------------------------------
|
||||
const startTimes = new Map<string, number>()
|
||||
|
||||
function handleProgress(event: OrchestratorEvent): void {
|
||||
const ts = new Date().toISOString().slice(11, 23) // HH:MM:SS.mmm
|
||||
switch (event.type) {
|
||||
case 'agent_start':
|
||||
startTimes.set(event.agent ?? '', Date.now())
|
||||
console.log(`[${ts}] AGENT START → ${event.agent}`)
|
||||
break
|
||||
case 'agent_complete': {
|
||||
const elapsed = Date.now() - (startTimes.get(event.agent ?? '') ?? Date.now())
|
||||
console.log(`[${ts}] AGENT DONE ← ${event.agent} (${elapsed}ms)`)
|
||||
break
|
||||
}
|
||||
case 'task_start':
|
||||
console.log(`[${ts}] TASK START ↓ ${event.task}`)
|
||||
break
|
||||
case 'task_complete':
|
||||
console.log(`[${ts}] TASK DONE ↑ ${event.task}`)
|
||||
break
|
||||
case 'message':
|
||||
console.log(`[${ts}] MESSAGE • ${event.agent} → (team)`)
|
||||
break
|
||||
case 'error':
|
||||
console.error(`[${ts}] ERROR ✗ agent=${event.agent} task=${event.task}`)
|
||||
if (event.data instanceof Error) console.error(` ${event.data.message}`)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Orchestrate
|
||||
// ---------------------------------------------------------------------------
|
||||
const orchestrator = new OpenMultiAgent({
|
||||
defaultModel: 'deepseek-chat',
|
||||
defaultProvider: 'deepseek',
|
||||
maxConcurrency: 1, // sequential for readable output
|
||||
onProgress: handleProgress,
|
||||
})
|
||||
|
||||
const team = orchestrator.createTeam('api-team', {
|
||||
name: 'api-team',
|
||||
agents: [architect, developer, reviewer],
|
||||
sharedMemory: true,
|
||||
maxConcurrency: 1,
|
||||
})
|
||||
|
||||
console.log(`Team "${team.name}" created with agents: ${team.getAgents().map(a => a.name).join(', ')}`)
|
||||
console.log('\nStarting team run...\n')
|
||||
console.log('='.repeat(60))
|
||||
|
||||
const goal = `Create a minimal Express.js REST API in /tmp/express-api/ with:
|
||||
- GET /health → { status: "ok" }
|
||||
- GET /users → returns a hardcoded array of 2 user objects
|
||||
- POST /users → accepts { name, email } body, logs it, returns 201
|
||||
- Proper error handling middleware
|
||||
- The server should listen on port 3001
|
||||
- Include a package.json with the required dependencies`
|
||||
|
||||
const result = await orchestrator.runTeam(team, goal)
|
||||
|
||||
console.log('\n' + '='.repeat(60))
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Results
|
||||
// ---------------------------------------------------------------------------
|
||||
console.log('\nTeam run complete.')
|
||||
console.log(`Success: ${result.success}`)
|
||||
console.log(`Total tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`)
|
||||
|
||||
console.log('\nPer-agent results:')
|
||||
for (const [agentName, agentResult] of result.agentResults) {
|
||||
const status = agentResult.success ? 'OK' : 'FAILED'
|
||||
const tools = agentResult.toolCalls.length
|
||||
console.log(` ${agentName.padEnd(12)} [${status}] tool_calls=${tools}`)
|
||||
if (!agentResult.success) {
|
||||
console.log(` Error: ${agentResult.output.slice(0, 120)}`)
|
||||
}
|
||||
}
|
||||
|
||||
// Sample outputs
|
||||
const developerResult = result.agentResults.get('developer')
|
||||
if (developerResult?.success) {
|
||||
console.log('\nDeveloper output (last 600 chars):')
|
||||
console.log('─'.repeat(60))
|
||||
const out = developerResult.output
|
||||
console.log(out.length > 600 ? '...' + out.slice(-600) : out)
|
||||
console.log('─'.repeat(60))
|
||||
}
|
||||
|
||||
const reviewerResult = result.agentResults.get('reviewer')
|
||||
if (reviewerResult?.success) {
|
||||
console.log('\nReviewer output:')
|
||||
console.log('─'.repeat(60))
|
||||
console.log(reviewerResult.output)
|
||||
console.log('─'.repeat(60))
|
||||
}
|
||||
|
|
@ -0,0 +1,164 @@
|
|||
/**
|
||||
* Example 19 — Multi-Agent Team Collaboration with Groq
|
||||
*
|
||||
* Three specialized agents (architect, developer, reviewer) collaborate via `runTeam()`
|
||||
* to build a minimal Express.js REST API. Every agent uses Groq via the OpenAI-compatible adapter.
|
||||
*
|
||||
* Run:
|
||||
* npx tsx examples/19-groq.ts
|
||||
*
|
||||
* Prerequisites:
|
||||
* GROQ_API_KEY environment variable must be set.
|
||||
*
|
||||
* Available models:
|
||||
* llama-3.3-70b-versatile — Groq production model (recommended for coding tasks)
|
||||
* deepseek-r1-distill-llama-70b — Groq reasoning model
|
||||
*/
|
||||
|
||||
import { OpenMultiAgent } from '../src/index.js'
|
||||
import type { AgentConfig, OrchestratorEvent } from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Agent definitions (all using Groq via the OpenAI-compatible adapter)
|
||||
// ---------------------------------------------------------------------------
|
||||
const architect: AgentConfig = {
|
||||
name: 'architect',
|
||||
model: 'deepseek-r1-distill-llama-70b',
|
||||
provider: 'openai',
|
||||
baseURL: 'https://api.groq.com/openai/v1',
|
||||
apiKey: process.env.GROQ_API_KEY,
|
||||
systemPrompt: `You are a software architect with deep experience in Node.js and REST API design.
|
||||
Your job is to design clear, production-quality API contracts and file/directory structures.
|
||||
Output concise plans in markdown — no unnecessary prose.`,
|
||||
tools: ['bash', 'file_write'],
|
||||
maxTurns: 5,
|
||||
temperature: 0.2,
|
||||
}
|
||||
|
||||
const developer: AgentConfig = {
|
||||
name: 'developer',
|
||||
model: 'llama-3.3-70b-versatile',
|
||||
provider: 'openai',
|
||||
baseURL: 'https://api.groq.com/openai/v1',
|
||||
apiKey: process.env.GROQ_API_KEY,
|
||||
systemPrompt: `You are a TypeScript/Node.js developer. You implement what the architect specifies.
|
||||
Write clean, runnable code with proper error handling. Use the tools to write files and run tests.`,
|
||||
tools: ['bash', 'file_read', 'file_write', 'file_edit'],
|
||||
maxTurns: 12,
|
||||
temperature: 0.1,
|
||||
}
|
||||
|
||||
const reviewer: AgentConfig = {
|
||||
name: 'reviewer',
|
||||
model: 'llama-3.3-70b-versatile',
|
||||
provider: 'openai',
|
||||
baseURL: 'https://api.groq.com/openai/v1',
|
||||
apiKey: process.env.GROQ_API_KEY,
|
||||
systemPrompt: `You are a senior code reviewer. Review code for correctness, security, and clarity.
|
||||
Provide a structured review with: LGTM items, suggestions, and any blocking issues.
|
||||
Read files using the tools before reviewing.`,
|
||||
tools: ['bash', 'file_read', 'grep'],
|
||||
maxTurns: 5,
|
||||
temperature: 0.3,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Progress tracking
|
||||
// ---------------------------------------------------------------------------
|
||||
const startTimes = new Map<string, number>()
|
||||
|
||||
function handleProgress(event: OrchestratorEvent): void {
|
||||
const ts = new Date().toISOString().slice(11, 23) // HH:MM:SS.mmm
|
||||
switch (event.type) {
|
||||
case 'agent_start':
|
||||
startTimes.set(event.agent ?? '', Date.now())
|
||||
console.log(`[${ts}] AGENT START → ${event.agent}`)
|
||||
break
|
||||
case 'agent_complete': {
|
||||
const elapsed = Date.now() - (startTimes.get(event.agent ?? '') ?? Date.now())
|
||||
console.log(`[${ts}] AGENT DONE ← ${event.agent} (${elapsed}ms)`)
|
||||
break
|
||||
}
|
||||
case 'task_start':
|
||||
console.log(`[${ts}] TASK START ↓ ${event.task}`)
|
||||
break
|
||||
case 'task_complete':
|
||||
console.log(`[${ts}] TASK DONE ↑ ${event.task}`)
|
||||
break
|
||||
case 'message':
|
||||
console.log(`[${ts}] MESSAGE • ${event.agent} → (team)`)
|
||||
break
|
||||
case 'error':
|
||||
console.error(`[${ts}] ERROR ✗ agent=${event.agent} task=${event.task}`)
|
||||
if (event.data instanceof Error) console.error(` ${event.data.message}`)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Orchestrate
|
||||
// ---------------------------------------------------------------------------
|
||||
const orchestrator = new OpenMultiAgent({
|
||||
defaultModel: 'llama-3.3-70b-versatile',
|
||||
defaultProvider: 'openai',
|
||||
maxConcurrency: 1, // sequential for readable output
|
||||
onProgress: handleProgress,
|
||||
})
|
||||
|
||||
const team = orchestrator.createTeam('api-team', {
|
||||
name: 'api-team',
|
||||
agents: [architect, developer, reviewer],
|
||||
sharedMemory: true,
|
||||
maxConcurrency: 1,
|
||||
})
|
||||
|
||||
console.log(`Team "${team.name}" created with agents: ${team.getAgents().map(a => a.name).join(', ')}`)
|
||||
console.log('\nStarting team run...\n')
|
||||
console.log('='.repeat(60))
|
||||
|
||||
const goal = `Create a minimal Express.js REST API in /tmp/express-api/ with:
|
||||
- GET /health → { status: "ok" }
|
||||
- GET /users → returns a hardcoded array of 2 user objects
|
||||
- POST /users → accepts { name, email } body, logs it, returns 201
|
||||
- Proper error handling middleware
|
||||
- The server should listen on port 3001
|
||||
- Include a package.json with the required dependencies`
|
||||
|
||||
const result = await orchestrator.runTeam(team, goal)
|
||||
|
||||
console.log('\n' + '='.repeat(60))
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Results
|
||||
// ---------------------------------------------------------------------------
|
||||
console.log('\nTeam run complete.')
|
||||
console.log(`Success: ${result.success}`)
|
||||
console.log(`Total tokens — input: ${result.totalTokenUsage.input_tokens}, output: ${result.totalTokenUsage.output_tokens}`)
|
||||
|
||||
console.log('\nPer-agent results:')
|
||||
for (const [agentName, agentResult] of result.agentResults) {
|
||||
const status = agentResult.success ? 'OK' : 'FAILED'
|
||||
const tools = agentResult.toolCalls.length
|
||||
console.log(` ${agentName.padEnd(12)} [${status}] tool_calls=${tools}`)
|
||||
if (!agentResult.success) {
|
||||
console.log(` Error: ${agentResult.output.slice(0, 120)}`)
|
||||
}
|
||||
}
|
||||
|
||||
// Sample outputs
|
||||
const developerResult = result.agentResults.get('developer')
|
||||
if (developerResult?.success) {
|
||||
console.log('\nDeveloper output (last 600 chars):')
|
||||
console.log('─'.repeat(60))
|
||||
const out = developerResult.output
|
||||
console.log(out.length > 600 ? '...' + out.slice(-600) : out)
|
||||
console.log('─'.repeat(60))
|
||||
}
|
||||
|
||||
const reviewerResult = result.agentResults.get('reviewer')
|
||||
if (reviewerResult?.success) {
|
||||
console.log('\nReviewer output:')
|
||||
console.log('─'.repeat(60))
|
||||
console.log(reviewerResult.output)
|
||||
console.log('─'.repeat(60))
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
node_modules/
|
||||
.next/
|
||||
.env
|
||||
.env.local
|
||||
*.tsbuildinfo
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
# with-vercel-ai-sdk
|
||||
|
||||
A Next.js demo showing **open-multi-agent** (OMA) and **Vercel AI SDK** working together:
|
||||
|
||||
- **OMA** orchestrates a research team (researcher agent + writer agent) via `runTeam()`
|
||||
- **AI SDK** streams the result to a chat UI via `useChat` + `streamText`
|
||||
|
||||
## How it works
|
||||
|
||||
```
|
||||
User message
|
||||
│
|
||||
▼
|
||||
API route (app/api/chat/route.ts)
|
||||
│
|
||||
├─ Phase 1: OMA runTeam()
|
||||
│ coordinator decomposes goal → researcher gathers info → writer drafts article
|
||||
│
|
||||
└─ Phase 2: AI SDK streamText()
|
||||
streams the team's output to the browser
|
||||
│
|
||||
▼
|
||||
Chat UI (app/page.tsx) — useChat hook renders streamed response
|
||||
```
|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
# 1. From repo root, install OMA dependencies
|
||||
cd ../..
|
||||
npm install
|
||||
|
||||
# 2. Back to this example
|
||||
cd examples/with-vercel-ai-sdk
|
||||
npm install
|
||||
|
||||
# 3. Set your API key
|
||||
export ANTHROPIC_API_KEY=sk-ant-...
|
||||
|
||||
# 4. Run
|
||||
npm run dev
|
||||
```
|
||||
|
||||
`npm run dev` automatically builds OMA before starting Next.js (via the `predev` script).
|
||||
|
||||
Open [http://localhost:3000](http://localhost:3000), type a topic, and watch the research team work.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Node.js >= 18
|
||||
- `ANTHROPIC_API_KEY` environment variable (used by both OMA and AI SDK)
|
||||
|
||||
## Key files
|
||||
|
||||
| File | Role |
|
||||
|------|------|
|
||||
| `app/api/chat/route.ts` | Backend — OMA orchestration + AI SDK streaming |
|
||||
| `app/page.tsx` | Frontend — chat UI with `useChat` hook |
|
||||
| `package.json` | References OMA via `file:../../` (local link) |
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
import { streamText, convertToModelMessages, type UIMessage } from 'ai'
|
||||
import { createOpenAICompatible } from '@ai-sdk/openai-compatible'
|
||||
import { OpenMultiAgent } from '@jackchen_me/open-multi-agent'
|
||||
import type { AgentConfig } from '@jackchen_me/open-multi-agent'
|
||||
|
||||
export const maxDuration = 120
|
||||
|
||||
// --- DeepSeek via OpenAI-compatible API ---
|
||||
const DEEPSEEK_BASE_URL = 'https://api.deepseek.com'
|
||||
const DEEPSEEK_MODEL = 'deepseek-chat'
|
||||
|
||||
const deepseek = createOpenAICompatible({
|
||||
name: 'deepseek',
|
||||
baseURL: `${DEEPSEEK_BASE_URL}/v1`,
|
||||
apiKey: process.env.DEEPSEEK_API_KEY,
|
||||
})
|
||||
|
||||
const researcher: AgentConfig = {
|
||||
name: 'researcher',
|
||||
model: DEEPSEEK_MODEL,
|
||||
provider: 'openai',
|
||||
baseURL: DEEPSEEK_BASE_URL,
|
||||
apiKey: process.env.DEEPSEEK_API_KEY,
|
||||
systemPrompt: `You are a research specialist. Given a topic, provide thorough, factual research
|
||||
with key findings, relevant data points, and important context.
|
||||
Be concise but comprehensive. Output structured notes, not prose.`,
|
||||
maxTurns: 3,
|
||||
temperature: 0.2,
|
||||
}
|
||||
|
||||
const writer: AgentConfig = {
|
||||
name: 'writer',
|
||||
model: DEEPSEEK_MODEL,
|
||||
provider: 'openai',
|
||||
baseURL: DEEPSEEK_BASE_URL,
|
||||
apiKey: process.env.DEEPSEEK_API_KEY,
|
||||
systemPrompt: `You are an expert writer. Using research from team members (available in shared memory),
|
||||
write a well-structured, engaging article with clear headings and concise paragraphs.
|
||||
Do not repeat raw research — synthesize it into readable prose.`,
|
||||
maxTurns: 3,
|
||||
temperature: 0.4,
|
||||
}
|
||||
|
||||
function extractText(message: UIMessage): string {
|
||||
return message.parts
|
||||
.filter((p): p is { type: 'text'; text: string } => p.type === 'text')
|
||||
.map((p) => p.text)
|
||||
.join('')
|
||||
}
|
||||
|
||||
export async function POST(req: Request) {
|
||||
const { messages }: { messages: UIMessage[] } = await req.json()
|
||||
const lastText = extractText(messages.at(-1)!)
|
||||
|
||||
// --- Phase 1: OMA multi-agent orchestration ---
|
||||
const orchestrator = new OpenMultiAgent({
|
||||
defaultModel: DEEPSEEK_MODEL,
|
||||
defaultProvider: 'openai',
|
||||
defaultBaseURL: DEEPSEEK_BASE_URL,
|
||||
defaultApiKey: process.env.DEEPSEEK_API_KEY,
|
||||
})
|
||||
|
||||
const team = orchestrator.createTeam('research-writing', {
|
||||
name: 'research-writing',
|
||||
agents: [researcher, writer],
|
||||
sharedMemory: true,
|
||||
})
|
||||
|
||||
const teamResult = await orchestrator.runTeam(
|
||||
team,
|
||||
`Research and write an article about: ${lastText}`,
|
||||
)
|
||||
|
||||
const teamOutput = teamResult.agentResults.get('coordinator')?.output ?? ''
|
||||
|
||||
// --- Phase 2: Stream result via Vercel AI SDK ---
|
||||
const result = streamText({
|
||||
model: deepseek(DEEPSEEK_MODEL),
|
||||
system: `You are presenting research from a multi-agent team (researcher + writer).
|
||||
The team has already done the work. Your only job is to relay their output to the user
|
||||
in a well-formatted way. Keep the content faithful to the team output below.
|
||||
At the very end, add a one-line note that this was produced by a researcher agent
|
||||
and a writer agent collaborating via open-multi-agent.
|
||||
|
||||
## Team Output
|
||||
${teamOutput}`,
|
||||
messages: await convertToModelMessages(messages),
|
||||
})
|
||||
|
||||
return result.toUIMessageStreamResponse()
|
||||
}
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
import type { Metadata } from 'next'
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: 'OMA + Vercel AI SDK',
|
||||
description: 'Multi-agent research team powered by open-multi-agent, streamed via Vercel AI SDK',
|
||||
}
|
||||
|
||||
export default function RootLayout({ children }: { children: React.ReactNode }) {
|
||||
return (
|
||||
<html lang="en">
|
||||
<body style={{ margin: 0, background: '#fafafa' }}>{children}</body>
|
||||
</html>
|
||||
)
|
||||
}
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
import { useChat } from '@ai-sdk/react'
|
||||
|
||||
export default function Home() {
|
||||
const { messages, sendMessage, status, error } = useChat()
|
||||
const [input, setInput] = useState('')
|
||||
|
||||
const isLoading = status === 'submitted' || status === 'streaming'
|
||||
|
||||
const handleSubmit = async (e: React.FormEvent) => {
|
||||
e.preventDefault()
|
||||
if (!input.trim() || isLoading) return
|
||||
const text = input
|
||||
setInput('')
|
||||
await sendMessage({ text })
|
||||
}
|
||||
|
||||
return (
|
||||
<main
|
||||
style={{
|
||||
maxWidth: 720,
|
||||
margin: '0 auto',
|
||||
padding: '32px 16px',
|
||||
fontFamily: 'system-ui, -apple-system, sans-serif',
|
||||
}}
|
||||
>
|
||||
<h1 style={{ fontSize: 22, marginBottom: 4 }}>Research Team</h1>
|
||||
<p style={{ color: '#666', fontSize: 14, marginBottom: 28 }}>
|
||||
Enter a topic. A <strong>researcher</strong> agent gathers information, a{' '}
|
||||
<strong>writer</strong> agent composes an article — orchestrated by
|
||||
open-multi-agent, streamed via Vercel AI SDK.
|
||||
</p>
|
||||
|
||||
<div style={{ minHeight: 120 }}>
|
||||
{messages.map((m) => (
|
||||
<div key={m.id} style={{ marginBottom: 24, lineHeight: 1.7 }}>
|
||||
<div style={{ fontWeight: 600, fontSize: 13, color: '#999', marginBottom: 4 }}>
|
||||
{m.role === 'user' ? 'You' : 'Research Team'}
|
||||
</div>
|
||||
<div style={{ whiteSpace: 'pre-wrap', fontSize: 15 }}>
|
||||
{m.parts
|
||||
.filter((part): part is { type: 'text'; text: string } => part.type === 'text')
|
||||
.map((part) => part.text)
|
||||
.join('')}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
|
||||
{isLoading && status === 'submitted' && (
|
||||
<div style={{ color: '#888', fontSize: 14, padding: '8px 0' }}>
|
||||
Agents are collaborating — this may take a minute...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div style={{ color: '#c00', fontSize: 14, padding: '8px 0' }}>
|
||||
Error: {error.message}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<form onSubmit={handleSubmit} style={{ display: 'flex', gap: 8, marginTop: 32 }}>
|
||||
<input
|
||||
value={input}
|
||||
onChange={(e) => setInput(e.target.value)}
|
||||
placeholder="Enter a topic to research..."
|
||||
disabled={isLoading}
|
||||
style={{
|
||||
flex: 1,
|
||||
padding: '10px 14px',
|
||||
borderRadius: 8,
|
||||
border: '1px solid #ddd',
|
||||
fontSize: 15,
|
||||
outline: 'none',
|
||||
}}
|
||||
/>
|
||||
<button
|
||||
type="submit"
|
||||
disabled={isLoading || !input.trim()}
|
||||
style={{
|
||||
padding: '10px 20px',
|
||||
borderRadius: 8,
|
||||
border: 'none',
|
||||
background: isLoading ? '#ccc' : '#111',
|
||||
color: '#fff',
|
||||
cursor: isLoading ? 'not-allowed' : 'pointer',
|
||||
fontSize: 15,
|
||||
}}
|
||||
>
|
||||
Send
|
||||
</button>
|
||||
</form>
|
||||
</main>
|
||||
)
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
/// <reference types="next" />
|
||||
/// <reference types="next/image-types/global" />
|
||||
import "./.next/dev/types/routes.d.ts";
|
||||
|
||||
// NOTE: This file should not be edited
|
||||
// see https://nextjs.org/docs/app/api-reference/config/typescript for more information.
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
import type { NextConfig } from 'next'
|
||||
|
||||
const nextConfig: NextConfig = {
|
||||
serverExternalPackages: ['@jackchen_me/open-multi-agent'],
|
||||
}
|
||||
|
||||
export default nextConfig
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,25 @@
|
|||
{
|
||||
"name": "with-vercel-ai-sdk",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"predev": "cd ../.. && npm run build",
|
||||
"dev": "next dev",
|
||||
"build": "next build",
|
||||
"start": "next start"
|
||||
},
|
||||
"dependencies": {
|
||||
"@ai-sdk/openai-compatible": "^2.0.41",
|
||||
"@ai-sdk/react": "^3.0.0",
|
||||
"@jackchen_me/open-multi-agent": "file:../../",
|
||||
"ai": "^6.0.0",
|
||||
"next": "^16.0.0",
|
||||
"react": "^19.0.0",
|
||||
"react-dom": "^19.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.0.0",
|
||||
"@types/react": "^19.0.0",
|
||||
"@types/react-dom": "^19.0.0",
|
||||
"typescript": "^5.6.0"
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"lib": [
|
||||
"dom",
|
||||
"dom.iterable",
|
||||
"ES2022"
|
||||
],
|
||||
"allowJs": true,
|
||||
"skipLibCheck": true,
|
||||
"strict": true,
|
||||
"noEmit": true,
|
||||
"esModuleInterop": true,
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"jsx": "react-jsx",
|
||||
"incremental": true,
|
||||
"plugins": [
|
||||
{
|
||||
"name": "next"
|
||||
}
|
||||
],
|
||||
"paths": {
|
||||
"@/*": [
|
||||
"./*"
|
||||
]
|
||||
}
|
||||
},
|
||||
"include": [
|
||||
"next-env.d.ts",
|
||||
"**/*.ts",
|
||||
"**/*.tsx",
|
||||
".next/types/**/*.ts",
|
||||
".next/dev/types/**/*.ts"
|
||||
],
|
||||
"exclude": [
|
||||
"node_modules"
|
||||
]
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
25
package.json
25
package.json
|
|
@ -1,14 +1,27 @@
|
|||
{
|
||||
"name": "@jackchen_me/open-multi-agent",
|
||||
"version": "1.0.0",
|
||||
"description": "Production-grade multi-agent orchestration framework. Model-agnostic, supports team collaboration, task scheduling, and inter-agent communication.",
|
||||
"version": "1.1.0",
|
||||
"description": "TypeScript multi-agent framework — one runTeam() call from goal to result. Auto task decomposition, parallel execution. 3 dependencies, deploys anywhere Node.js runs.",
|
||||
"files": [
|
||||
"dist",
|
||||
"docs",
|
||||
"README.md",
|
||||
"LICENSE"
|
||||
],
|
||||
"type": "module",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
"bin": {
|
||||
"oma": "dist/cli/oma.js"
|
||||
},
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./dist/index.d.ts",
|
||||
"import": "./dist/index.js"
|
||||
},
|
||||
"./mcp": {
|
||||
"types": "./dist/mcp.d.ts",
|
||||
"import": "./dist/mcp.js"
|
||||
}
|
||||
},
|
||||
"scripts": {
|
||||
|
|
@ -17,6 +30,7 @@
|
|||
"test": "vitest run",
|
||||
"test:watch": "vitest",
|
||||
"lint": "tsc --noEmit",
|
||||
"test:e2e": "RUN_E2E=1 vitest run tests/e2e/",
|
||||
"prepublishOnly": "npm run build"
|
||||
},
|
||||
"keywords": [
|
||||
|
|
@ -42,15 +56,20 @@
|
|||
"zod": "^3.23.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@google/genai": "^1.48.0"
|
||||
"@google/genai": "^1.48.0",
|
||||
"@modelcontextprotocol/sdk": "^1.18.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@google/genai": {
|
||||
"optional": true
|
||||
},
|
||||
"@modelcontextprotocol/sdk": {
|
||||
"optional": true
|
||||
}
|
||||
},
|
||||
"devDependencies": {
|
||||
"@google/genai": "^1.48.0",
|
||||
"@modelcontextprotocol/sdk": "^1.18.0",
|
||||
"@types/node": "^22.0.0",
|
||||
"@vitest/coverage-v8": "^2.1.9",
|
||||
"tsx": "^4.21.0",
|
||||
|
|
|
|||
|
|
@ -146,10 +146,15 @@ export class Agent {
|
|||
maxTurns: this.config.maxTurns,
|
||||
maxTokens: this.config.maxTokens,
|
||||
temperature: this.config.temperature,
|
||||
toolPreset: this.config.toolPreset,
|
||||
allowedTools: this.config.tools,
|
||||
disallowedTools: this.config.disallowedTools,
|
||||
agentName: this.name,
|
||||
agentRole: this.config.systemPrompt?.slice(0, 50) ?? 'assistant',
|
||||
loopDetection: this.config.loopDetection,
|
||||
maxTokenBudget: this.config.maxTokenBudget,
|
||||
contextStrategy: this.config.contextStrategy,
|
||||
compressToolResults: this.config.compressToolResults,
|
||||
}
|
||||
|
||||
this.runner = new AgentRunner(
|
||||
|
|
@ -260,7 +265,7 @@ export class Agent {
|
|||
* The tool becomes available to the next LLM call — no restart required.
|
||||
*/
|
||||
addTool(tool: FrameworkToolDefinition): void {
|
||||
this._toolRegistry.register(tool)
|
||||
this._toolRegistry.register(tool, { runtimeAdded: true })
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -328,6 +333,16 @@ export class Agent {
|
|||
const result = await runner.run(messages, runOptions)
|
||||
this.state.tokenUsage = addUsage(this.state.tokenUsage, result.tokenUsage)
|
||||
|
||||
if (result.budgetExceeded) {
|
||||
let budgetResult = this.toAgentRunResult(result, false)
|
||||
if (this.config.afterRun) {
|
||||
budgetResult = await this.config.afterRun(budgetResult)
|
||||
}
|
||||
this.transitionTo('completed')
|
||||
this.emitAgentTrace(callerOptions, agentStartMs, budgetResult)
|
||||
return budgetResult
|
||||
}
|
||||
|
||||
// --- Structured output validation ---
|
||||
if (this.config.outputSchema) {
|
||||
let validated = await this.validateStructuredOutput(
|
||||
|
|
@ -461,6 +476,7 @@ export class Agent {
|
|||
tokenUsage: mergedTokenUsage,
|
||||
toolCalls: mergedToolCalls,
|
||||
structured: validated,
|
||||
...(retryResult.budgetExceeded ? { budgetExceeded: true } : {}),
|
||||
}
|
||||
} catch {
|
||||
// Retry also failed
|
||||
|
|
@ -472,6 +488,7 @@ export class Agent {
|
|||
tokenUsage: mergedTokenUsage,
|
||||
toolCalls: mergedToolCalls,
|
||||
structured: undefined,
|
||||
...(retryResult.budgetExceeded ? { budgetExceeded: true } : {}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -502,7 +519,7 @@ export class Agent {
|
|||
const result = event.data as import('./runner.js').RunResult
|
||||
this.state.tokenUsage = addUsage(this.state.tokenUsage, result.tokenUsage)
|
||||
|
||||
let agentResult = this.toAgentRunResult(result, true)
|
||||
let agentResult = this.toAgentRunResult(result, !result.budgetExceeded)
|
||||
if (this.config.afterRun) {
|
||||
agentResult = await this.config.afterRun(agentResult)
|
||||
}
|
||||
|
|
@ -598,6 +615,7 @@ export class Agent {
|
|||
toolCalls: result.toolCalls,
|
||||
structured,
|
||||
...(result.loopDetected ? { loopDetected: true } : {}),
|
||||
...(result.budgetExceeded ? { budgetExceeded: true } : {}),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -58,6 +58,14 @@ export interface PoolStatus {
|
|||
export class AgentPool {
|
||||
private readonly agents: Map<string, Agent> = new Map()
|
||||
private readonly semaphore: Semaphore
|
||||
/**
|
||||
* Per-agent mutex (Semaphore(1)) to serialize concurrent runs on the same
|
||||
* Agent instance. Without this, two tasks assigned to the same agent could
|
||||
* race on mutable instance state (`status`, `messages`, `tokenUsage`).
|
||||
*
|
||||
* @see https://github.com/anthropics/open-multi-agent/issues/72
|
||||
*/
|
||||
private readonly agentLocks: Map<string, Semaphore> = new Map()
|
||||
/** Cursor used by `runAny` for round-robin dispatch. */
|
||||
private roundRobinIndex = 0
|
||||
|
||||
|
|
@ -86,6 +94,7 @@ export class AgentPool {
|
|||
)
|
||||
}
|
||||
this.agents.set(agent.name, agent)
|
||||
this.agentLocks.set(agent.name, new Semaphore(1))
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -98,6 +107,7 @@ export class AgentPool {
|
|||
throw new Error(`AgentPool: agent '${name}' is not registered.`)
|
||||
}
|
||||
this.agents.delete(name)
|
||||
this.agentLocks.delete(name)
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -130,12 +140,20 @@ export class AgentPool {
|
|||
runOptions?: Partial<RunOptions>,
|
||||
): Promise<AgentRunResult> {
|
||||
const agent = this.requireAgent(agentName)
|
||||
const agentLock = this.agentLocks.get(agentName)!
|
||||
|
||||
await this.semaphore.acquire()
|
||||
// Acquire per-agent lock first so the second call for the same agent waits
|
||||
// here without consuming a pool slot. Then acquire the pool semaphore.
|
||||
await agentLock.acquire()
|
||||
try {
|
||||
return await agent.run(prompt, runOptions)
|
||||
await this.semaphore.acquire()
|
||||
try {
|
||||
return await agent.run(prompt, runOptions)
|
||||
} finally {
|
||||
this.semaphore.release()
|
||||
}
|
||||
} finally {
|
||||
this.semaphore.release()
|
||||
agentLock.release()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -200,11 +218,18 @@ export class AgentPool {
|
|||
const agent = allAgents[this.roundRobinIndex]!
|
||||
this.roundRobinIndex = (this.roundRobinIndex + 1) % allAgents.length
|
||||
|
||||
await this.semaphore.acquire()
|
||||
const agentLock = this.agentLocks.get(agent.name)!
|
||||
|
||||
await agentLock.acquire()
|
||||
try {
|
||||
return await agent.run(prompt)
|
||||
await this.semaphore.acquire()
|
||||
try {
|
||||
return await agent.run(prompt)
|
||||
} finally {
|
||||
this.semaphore.release()
|
||||
}
|
||||
} finally {
|
||||
this.semaphore.release()
|
||||
agentLock.release()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,12 +28,32 @@ import type {
|
|||
TraceEvent,
|
||||
LoopDetectionConfig,
|
||||
LoopDetectionInfo,
|
||||
LLMToolDef,
|
||||
ContextStrategy,
|
||||
} from '../types.js'
|
||||
import { TokenBudgetExceededError } from '../errors.js'
|
||||
import { LoopDetector } from './loop-detector.js'
|
||||
import { emitTrace } from '../utils/trace.js'
|
||||
import { estimateTokens } from '../utils/tokens.js'
|
||||
import type { ToolRegistry } from '../tool/framework.js'
|
||||
import type { ToolExecutor } from '../tool/executor.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tool presets
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Predefined tool sets for common agent use cases. */
|
||||
export const TOOL_PRESETS = {
|
||||
readonly: ['file_read', 'grep', 'glob'],
|
||||
readwrite: ['file_read', 'file_write', 'file_edit', 'grep', 'glob'],
|
||||
full: ['file_read', 'file_write', 'file_edit', 'grep', 'glob', 'bash'],
|
||||
} as const satisfies Record<string, readonly string[]>
|
||||
|
||||
/** Framework-level disallowed tools for safety rails. */
|
||||
export const AGENT_FRAMEWORK_DISALLOWED: readonly string[] = [
|
||||
// Empty for now, infrastructure for future built-in tools
|
||||
]
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public interfaces
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -59,17 +79,30 @@ export interface RunnerOptions {
|
|||
/** AbortSignal that cancels any in-flight adapter call and stops the loop. */
|
||||
readonly abortSignal?: AbortSignal
|
||||
/**
|
||||
* Whitelist of tool names this runner is allowed to use.
|
||||
* When provided, only tools whose name appears in this list are sent to the
|
||||
* LLM. When omitted, all registered tools are available.
|
||||
* Tool access control configuration.
|
||||
* - `toolPreset`: Predefined tool sets for common use cases
|
||||
* - `allowedTools`: Whitelist of tool names (allowlist)
|
||||
* - `disallowedTools`: Blacklist of tool names (denylist)
|
||||
* Tools are resolved in order: preset → allowlist → denylist
|
||||
*/
|
||||
readonly toolPreset?: 'readonly' | 'readwrite' | 'full'
|
||||
readonly allowedTools?: readonly string[]
|
||||
readonly disallowedTools?: readonly string[]
|
||||
/** Display name of the agent driving this runner (used in tool context). */
|
||||
readonly agentName?: string
|
||||
/** Short role description of the agent (used in tool context). */
|
||||
readonly agentRole?: string
|
||||
/** Loop detection configuration. When set, detects stuck agent loops. */
|
||||
readonly loopDetection?: LoopDetectionConfig
|
||||
/** Maximum cumulative tokens (input + output) allowed for this run. */
|
||||
readonly maxTokenBudget?: number
|
||||
/** Optional context compression strategy for long multi-turn runs. */
|
||||
readonly contextStrategy?: ContextStrategy
|
||||
/**
|
||||
* Compress tool results that the agent has already processed.
|
||||
* See {@link AgentConfig.compressToolResults} for details.
|
||||
*/
|
||||
readonly compressToolResults?: boolean | { readonly minChars?: number }
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -117,6 +150,8 @@ export interface RunResult {
|
|||
readonly turns: number
|
||||
/** True when the run was terminated or warned due to loop detection. */
|
||||
readonly loopDetected?: boolean
|
||||
/** True when the run was terminated due to token budget limits. */
|
||||
readonly budgetExceeded?: boolean
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -146,6 +181,34 @@ function addTokenUsage(a: TokenUsage, b: TokenUsage): TokenUsage {
|
|||
|
||||
const ZERO_USAGE: TokenUsage = { input_tokens: 0, output_tokens: 0 }
|
||||
|
||||
/** Default minimum content length before tool result compression kicks in. */
|
||||
const DEFAULT_MIN_COMPRESS_CHARS = 500
|
||||
|
||||
/**
|
||||
* Prepends synthetic framing text to the first user message so we never emit
|
||||
* consecutive `user` turns (Bedrock) and summaries do not concatenate onto
|
||||
* the original user prompt (direct API). If there is no user message yet,
|
||||
* inserts a single assistant text preamble.
|
||||
*/
|
||||
function prependSyntheticPrefixToFirstUser(
|
||||
messages: LLMMessage[],
|
||||
prefix: string,
|
||||
): LLMMessage[] {
|
||||
const userIdx = messages.findIndex(m => m.role === 'user')
|
||||
if (userIdx < 0) {
|
||||
return [{
|
||||
role: 'assistant',
|
||||
content: [{ type: 'text', text: prefix.trimEnd() }],
|
||||
}, ...messages]
|
||||
}
|
||||
const target = messages[userIdx]!
|
||||
const merged: LLMMessage = {
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: prefix }, ...target.content],
|
||||
}
|
||||
return [...messages.slice(0, userIdx), merged, ...messages.slice(userIdx + 1)]
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// AgentRunner
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -165,6 +228,10 @@ const ZERO_USAGE: TokenUsage = { input_tokens: 0, output_tokens: 0 }
|
|||
*/
|
||||
export class AgentRunner {
|
||||
private readonly maxTurns: number
|
||||
private summarizeCache: {
|
||||
oldSignature: string
|
||||
summaryPrefix: string
|
||||
} | null = null
|
||||
|
||||
constructor(
|
||||
private readonly adapter: LLMAdapter,
|
||||
|
|
@ -175,6 +242,242 @@ export class AgentRunner {
|
|||
this.maxTurns = options.maxTurns ?? 10
|
||||
}
|
||||
|
||||
private serializeMessage(message: LLMMessage): string {
|
||||
return JSON.stringify(message)
|
||||
}
|
||||
|
||||
private truncateToSlidingWindow(messages: LLMMessage[], maxTurns: number): LLMMessage[] {
|
||||
if (maxTurns <= 0) {
|
||||
return messages
|
||||
}
|
||||
|
||||
const firstUserIndex = messages.findIndex(m => m.role === 'user')
|
||||
const firstUser = firstUserIndex >= 0 ? messages[firstUserIndex]! : null
|
||||
const afterFirst = firstUserIndex >= 0
|
||||
? messages.slice(firstUserIndex + 1)
|
||||
: messages.slice()
|
||||
|
||||
if (afterFirst.length <= maxTurns * 2) {
|
||||
return messages
|
||||
}
|
||||
|
||||
const kept = afterFirst.slice(-maxTurns * 2)
|
||||
const result: LLMMessage[] = []
|
||||
|
||||
if (firstUser !== null) {
|
||||
result.push(firstUser)
|
||||
}
|
||||
|
||||
const droppedPairs = Math.floor((afterFirst.length - kept.length) / 2)
|
||||
if (droppedPairs > 0) {
|
||||
const notice =
|
||||
`[Earlier conversation history truncated — ${droppedPairs} turn(s) removed]\n\n`
|
||||
result.push(...prependSyntheticPrefixToFirstUser(kept, notice))
|
||||
return result
|
||||
}
|
||||
|
||||
result.push(...kept)
|
||||
return result
|
||||
}
|
||||
|
||||
private async summarizeMessages(
|
||||
messages: LLMMessage[],
|
||||
maxTokens: number,
|
||||
summaryModel: string | undefined,
|
||||
baseChatOptions: LLMChatOptions,
|
||||
turns: number,
|
||||
options: RunOptions,
|
||||
): Promise<{ messages: LLMMessage[]; usage: TokenUsage }> {
|
||||
const estimated = estimateTokens(messages)
|
||||
if (estimated <= maxTokens || messages.length < 4) {
|
||||
return { messages, usage: ZERO_USAGE }
|
||||
}
|
||||
|
||||
const firstUserIndex = messages.findIndex(m => m.role === 'user')
|
||||
if (firstUserIndex < 0 || firstUserIndex === messages.length - 1) {
|
||||
return { messages, usage: ZERO_USAGE }
|
||||
}
|
||||
|
||||
const firstUser = messages[firstUserIndex]!
|
||||
const rest = messages.slice(firstUserIndex + 1)
|
||||
if (rest.length < 2) {
|
||||
return { messages, usage: ZERO_USAGE }
|
||||
}
|
||||
|
||||
// Split on an even boundary so we never separate a tool_use assistant turn
|
||||
// from its tool_result user message (rest is user/assistant pairs).
|
||||
const splitAt = Math.max(2, Math.floor(rest.length / 4) * 2)
|
||||
const oldPortion = rest.slice(0, splitAt)
|
||||
const recentPortion = rest.slice(splitAt)
|
||||
|
||||
const oldSignature = oldPortion.map(m => this.serializeMessage(m)).join('\n')
|
||||
if (this.summarizeCache !== null && this.summarizeCache.oldSignature === oldSignature) {
|
||||
const mergedRecent = prependSyntheticPrefixToFirstUser(
|
||||
recentPortion,
|
||||
`${this.summarizeCache.summaryPrefix}\n\n`,
|
||||
)
|
||||
return { messages: [firstUser, ...mergedRecent], usage: ZERO_USAGE }
|
||||
}
|
||||
|
||||
const summaryPrompt = [
|
||||
'Summarize the following conversation history for an LLM.',
|
||||
'- Preserve user goals, constraints, and decisions.',
|
||||
'- Keep key tool outputs and unresolved questions.',
|
||||
'- Use concise bullets.',
|
||||
'- Do not fabricate details.',
|
||||
].join('\n')
|
||||
|
||||
const summaryInput: LLMMessage[] = [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: summaryPrompt },
|
||||
{ type: 'text', text: `\n\nConversation:\n${oldSignature}` },
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
const summaryOptions: LLMChatOptions = {
|
||||
...baseChatOptions,
|
||||
model: summaryModel ?? this.options.model,
|
||||
tools: undefined,
|
||||
}
|
||||
|
||||
const summaryStartMs = Date.now()
|
||||
const summaryResponse = await this.adapter.chat(summaryInput, summaryOptions)
|
||||
if (options.onTrace) {
|
||||
const summaryEndMs = Date.now()
|
||||
emitTrace(options.onTrace, {
|
||||
type: 'llm_call',
|
||||
runId: options.runId ?? '',
|
||||
taskId: options.taskId,
|
||||
agent: options.traceAgent ?? this.options.agentName ?? 'unknown',
|
||||
model: summaryOptions.model,
|
||||
phase: 'summary',
|
||||
turn: turns,
|
||||
tokens: summaryResponse.usage,
|
||||
startMs: summaryStartMs,
|
||||
endMs: summaryEndMs,
|
||||
durationMs: summaryEndMs - summaryStartMs,
|
||||
})
|
||||
}
|
||||
|
||||
const summaryText = extractText(summaryResponse.content).trim()
|
||||
const summaryPrefix = summaryText.length > 0
|
||||
? `[Conversation summary]\n${summaryText}`
|
||||
: '[Conversation summary unavailable]'
|
||||
|
||||
this.summarizeCache = { oldSignature, summaryPrefix }
|
||||
const mergedRecent = prependSyntheticPrefixToFirstUser(
|
||||
recentPortion,
|
||||
`${summaryPrefix}\n\n`,
|
||||
)
|
||||
return {
|
||||
messages: [firstUser, ...mergedRecent],
|
||||
usage: summaryResponse.usage,
|
||||
}
|
||||
}
|
||||
|
||||
private async applyContextStrategy(
|
||||
messages: LLMMessage[],
|
||||
strategy: ContextStrategy,
|
||||
baseChatOptions: LLMChatOptions,
|
||||
turns: number,
|
||||
options: RunOptions,
|
||||
): Promise<{ messages: LLMMessage[]; usage: TokenUsage }> {
|
||||
if (strategy.type === 'sliding-window') {
|
||||
return { messages: this.truncateToSlidingWindow(messages, strategy.maxTurns), usage: ZERO_USAGE }
|
||||
}
|
||||
|
||||
if (strategy.type === 'summarize') {
|
||||
return this.summarizeMessages(
|
||||
messages,
|
||||
strategy.maxTokens,
|
||||
strategy.summaryModel,
|
||||
baseChatOptions,
|
||||
turns,
|
||||
options,
|
||||
)
|
||||
}
|
||||
|
||||
if (strategy.type === 'compact') {
|
||||
return { messages: this.compactMessages(messages, strategy), usage: ZERO_USAGE }
|
||||
}
|
||||
|
||||
const estimated = estimateTokens(messages)
|
||||
const compressed = await strategy.compress(messages, estimated)
|
||||
if (!Array.isArray(compressed) || compressed.length === 0) {
|
||||
throw new Error('contextStrategy.custom.compress must return a non-empty LLMMessage[]')
|
||||
}
|
||||
return { messages: compressed, usage: ZERO_USAGE }
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Tool resolution
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Resolve the final set of tools available to this agent based on the
|
||||
* three-layer configuration: preset → allowlist → denylist → framework safety.
|
||||
*
|
||||
* Returns LLMToolDef[] for direct use with LLM adapters.
|
||||
*/
|
||||
private resolveTools(): LLMToolDef[] {
|
||||
// Validate configuration for contradictions
|
||||
if (this.options.toolPreset && this.options.allowedTools) {
|
||||
console.warn(
|
||||
'AgentRunner: both toolPreset and allowedTools are set. ' +
|
||||
'Final tool access will be the intersection of both.'
|
||||
)
|
||||
}
|
||||
|
||||
if (this.options.allowedTools && this.options.disallowedTools) {
|
||||
const overlap = this.options.allowedTools.filter(tool =>
|
||||
this.options.disallowedTools!.includes(tool)
|
||||
)
|
||||
if (overlap.length > 0) {
|
||||
console.warn(
|
||||
`AgentRunner: tools [${overlap.map(name => `"${name}"`).join(', ')}] appear in both allowedTools and disallowedTools. ` +
|
||||
'This is contradictory and may lead to unexpected behavior.'
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
const allTools = this.toolRegistry.toToolDefs()
|
||||
const runtimeCustomTools = this.toolRegistry.toRuntimeToolDefs()
|
||||
const runtimeCustomToolNames = new Set(runtimeCustomTools.map(t => t.name))
|
||||
let filteredTools = allTools.filter(t => !runtimeCustomToolNames.has(t.name))
|
||||
|
||||
// 1. Apply preset filter if set
|
||||
if (this.options.toolPreset) {
|
||||
const presetTools = new Set(TOOL_PRESETS[this.options.toolPreset] as readonly string[])
|
||||
filteredTools = filteredTools.filter(t => presetTools.has(t.name))
|
||||
}
|
||||
|
||||
// 2. Apply allowlist filter if set
|
||||
if (this.options.allowedTools) {
|
||||
filteredTools = filteredTools.filter(t => this.options.allowedTools!.includes(t.name))
|
||||
}
|
||||
|
||||
// 3. Apply denylist filter if set
|
||||
const denied = this.options.disallowedTools
|
||||
? new Set(this.options.disallowedTools)
|
||||
: undefined
|
||||
if (denied) {
|
||||
filteredTools = filteredTools.filter(t => !denied.has(t.name))
|
||||
}
|
||||
|
||||
// 4. Apply framework-level safety rails
|
||||
const frameworkDenied = new Set(AGENT_FRAMEWORK_DISALLOWED)
|
||||
filteredTools = filteredTools.filter(t => !frameworkDenied.has(t.name))
|
||||
|
||||
// Runtime-added custom tools bypass preset / allowlist but respect denylist.
|
||||
const finalRuntime = denied
|
||||
? runtimeCustomTools.filter(t => !denied.has(t.name))
|
||||
: runtimeCustomTools
|
||||
return [...filteredTools, ...finalRuntime]
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Public API
|
||||
// -------------------------------------------------------------------------
|
||||
|
|
@ -204,6 +507,8 @@ export class AgentRunner {
|
|||
for await (const event of this.stream(messages, options)) {
|
||||
if (event.type === 'done') {
|
||||
Object.assign(accumulated, event.data)
|
||||
} else if (event.type === 'error') {
|
||||
throw event.data
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -217,6 +522,7 @@ export class AgentRunner {
|
|||
* - `{ type: 'text', data: string }` for each text delta
|
||||
* - `{ type: 'tool_use', data: ToolUseBlock }` when the model requests a tool
|
||||
* - `{ type: 'tool_result', data: ToolResultBlock }` after each execution
|
||||
* - `{ type: 'budget_exceeded', data: TokenBudgetExceededError }` on budget trip
|
||||
* - `{ type: 'done', data: RunResult }` at the very end
|
||||
* - `{ type: 'error', data: Error }` on unrecoverable failure
|
||||
*/
|
||||
|
|
@ -225,21 +531,18 @@ export class AgentRunner {
|
|||
options: RunOptions = {},
|
||||
): AsyncGenerator<StreamEvent> {
|
||||
// Working copy of the conversation — mutated as turns progress.
|
||||
const conversationMessages: LLMMessage[] = [...initialMessages]
|
||||
let conversationMessages: LLMMessage[] = [...initialMessages]
|
||||
|
||||
// Accumulated state across all turns.
|
||||
let totalUsage: TokenUsage = ZERO_USAGE
|
||||
const allToolCalls: ToolCallRecord[] = []
|
||||
let finalOutput = ''
|
||||
let turns = 0
|
||||
let budgetExceeded = false
|
||||
|
||||
// Build the stable LLM options once; model / tokens / temp don't change.
|
||||
// toToolDefs() returns LLMToolDef[] (inputSchema, camelCase) — matches
|
||||
// LLMChatOptions.tools from types.ts directly.
|
||||
const allDefs = this.toolRegistry.toToolDefs()
|
||||
const toolDefs = this.options.allowedTools
|
||||
? allDefs.filter(d => this.options.allowedTools!.includes(d.name))
|
||||
: allDefs
|
||||
// resolveTools() returns LLMToolDef[] with three-layer filtering applied.
|
||||
const toolDefs = this.resolveTools()
|
||||
|
||||
// Per-call abortSignal takes precedence over the static one.
|
||||
const effectiveAbortSignal = options.abortSignal ?? this.options.abortSignal
|
||||
|
|
@ -278,6 +581,25 @@ export class AgentRunner {
|
|||
|
||||
turns++
|
||||
|
||||
// Compress consumed tool results before context strategy (lightweight,
|
||||
// no LLM calls) so the strategy operates on already-reduced messages.
|
||||
if (this.options.compressToolResults && turns > 1) {
|
||||
conversationMessages = this.compressConsumedToolResults(conversationMessages)
|
||||
}
|
||||
|
||||
// Optionally compact context before each LLM call after the first turn.
|
||||
if (this.options.contextStrategy && turns > 1) {
|
||||
const compacted = await this.applyContextStrategy(
|
||||
conversationMessages,
|
||||
this.options.contextStrategy,
|
||||
baseChatOptions,
|
||||
turns,
|
||||
options,
|
||||
)
|
||||
conversationMessages = compacted.messages
|
||||
totalUsage = addTokenUsage(totalUsage, compacted.usage)
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Step 1: Call the LLM and collect the full response for this turn.
|
||||
// ------------------------------------------------------------------
|
||||
|
|
@ -291,6 +613,7 @@ export class AgentRunner {
|
|||
taskId: options.taskId,
|
||||
agent: options.traceAgent ?? this.options.agentName ?? 'unknown',
|
||||
model: this.options.model,
|
||||
phase: 'turn',
|
||||
turn: turns,
|
||||
tokens: response.usage,
|
||||
startMs: llmStartMs,
|
||||
|
|
@ -318,6 +641,21 @@ export class AgentRunner {
|
|||
yield { type: 'text', data: turnText } satisfies StreamEvent
|
||||
}
|
||||
|
||||
const totalTokens = totalUsage.input_tokens + totalUsage.output_tokens
|
||||
if (this.options.maxTokenBudget !== undefined && totalTokens > this.options.maxTokenBudget) {
|
||||
budgetExceeded = true
|
||||
finalOutput = turnText
|
||||
yield {
|
||||
type: 'budget_exceeded',
|
||||
data: new TokenBudgetExceededError(
|
||||
this.options.agentName ?? 'unknown',
|
||||
totalTokens,
|
||||
this.options.maxTokenBudget,
|
||||
),
|
||||
} satisfies StreamEvent
|
||||
break
|
||||
}
|
||||
|
||||
// Extract tool-use blocks for detection and execution.
|
||||
const toolUseBlocks = extractToolUseBlocks(response.content)
|
||||
|
||||
|
|
@ -395,7 +733,7 @@ export class AgentRunner {
|
|||
// Parallel execution is critical for multi-tool responses where the
|
||||
// tools are independent (e.g. reading several files at once).
|
||||
// ------------------------------------------------------------------
|
||||
const toolContext: ToolUseContext = this.buildToolContext()
|
||||
const toolContext: ToolUseContext = this.buildToolContext(effectiveAbortSignal)
|
||||
|
||||
const executionPromises = toolUseBlocks.map(async (block): Promise<{
|
||||
resultBlock: ToolResultBlock
|
||||
|
|
@ -516,6 +854,7 @@ export class AgentRunner {
|
|||
tokenUsage: totalUsage,
|
||||
turns,
|
||||
...(loopDetected ? { loopDetected: true } : {}),
|
||||
...(budgetExceeded ? { budgetExceeded: true } : {}),
|
||||
}
|
||||
|
||||
yield { type: 'done', data: runResult } satisfies StreamEvent
|
||||
|
|
@ -525,18 +864,217 @@ export class AgentRunner {
|
|||
// Private helpers
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Rule-based selective context compaction (no LLM calls).
|
||||
*
|
||||
* Compresses old turns while preserving the conversation skeleton:
|
||||
* - tool_use blocks (decisions) are always kept
|
||||
* - Long tool_result content is replaced with a compact marker
|
||||
* - Long assistant text blocks are truncated with an excerpt
|
||||
* - Error tool_results are never compressed
|
||||
* - Recent turns (within `preserveRecentTurns`) are kept intact
|
||||
*/
|
||||
private compactMessages(
|
||||
messages: LLMMessage[],
|
||||
strategy: Extract<ContextStrategy, { type: 'compact' }>,
|
||||
): LLMMessage[] {
|
||||
const estimated = estimateTokens(messages)
|
||||
if (estimated <= strategy.maxTokens) {
|
||||
return messages
|
||||
}
|
||||
|
||||
const preserveRecent = strategy.preserveRecentTurns ?? 4
|
||||
const minToolResultChars = strategy.minToolResultChars ?? 200
|
||||
const minTextBlockChars = strategy.minTextBlockChars ?? 2000
|
||||
const textBlockExcerptChars = strategy.textBlockExcerptChars ?? 200
|
||||
|
||||
// Find the first user message — it is always preserved as-is.
|
||||
const firstUserIndex = messages.findIndex(m => m.role === 'user')
|
||||
if (firstUserIndex < 0 || firstUserIndex === messages.length - 1) {
|
||||
return messages
|
||||
}
|
||||
|
||||
// Walk backward to find the boundary between old and recent turns.
|
||||
// A "turn pair" is an assistant message followed by a user message.
|
||||
let boundary = messages.length
|
||||
let pairsFound = 0
|
||||
for (let i = messages.length - 1; i > firstUserIndex && pairsFound < preserveRecent; i--) {
|
||||
if (messages[i]!.role === 'user' && i > 0 && messages[i - 1]!.role === 'assistant') {
|
||||
pairsFound++
|
||||
boundary = i - 1
|
||||
}
|
||||
}
|
||||
|
||||
// If all turns fit within the recent window, nothing to compact.
|
||||
if (boundary <= firstUserIndex + 1) {
|
||||
return messages
|
||||
}
|
||||
|
||||
// Build a tool_use_id → tool name lookup from old assistant messages.
|
||||
const toolNameMap = new Map<string, string>()
|
||||
for (let i = firstUserIndex + 1; i < boundary; i++) {
|
||||
const msg = messages[i]!
|
||||
if (msg.role !== 'assistant') continue
|
||||
for (const block of msg.content) {
|
||||
if (block.type === 'tool_use') {
|
||||
toolNameMap.set(block.id, block.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process old messages (between first user and boundary).
|
||||
let anyChanged = false
|
||||
const result: LLMMessage[] = []
|
||||
|
||||
for (let i = 0; i < messages.length; i++) {
|
||||
// First user message and recent messages: keep intact.
|
||||
if (i <= firstUserIndex || i >= boundary) {
|
||||
result.push(messages[i]!)
|
||||
continue
|
||||
}
|
||||
|
||||
const msg = messages[i]!
|
||||
let msgChanged = false
|
||||
const newContent = msg.content.map((block): ContentBlock => {
|
||||
if (msg.role === 'assistant') {
|
||||
// tool_use blocks: always preserve (decisions).
|
||||
if (block.type === 'tool_use') return block
|
||||
// Long text blocks: truncate with excerpt.
|
||||
if (block.type === 'text' && block.text.length >= minTextBlockChars) {
|
||||
msgChanged = true
|
||||
return {
|
||||
type: 'text',
|
||||
text: `${block.text.slice(0, textBlockExcerptChars)}... [truncated — ${block.text.length} chars total]`,
|
||||
} satisfies TextBlock
|
||||
}
|
||||
// Image blocks in old turns: replace with marker.
|
||||
if (block.type === 'image') {
|
||||
msgChanged = true
|
||||
return { type: 'text', text: '[Image compacted]' } satisfies TextBlock
|
||||
}
|
||||
return block
|
||||
}
|
||||
|
||||
// User messages in old zone.
|
||||
if (block.type === 'tool_result') {
|
||||
// Error results: always preserve.
|
||||
if (block.is_error) return block
|
||||
// Already compressed by compressToolResults or a prior compact pass.
|
||||
if (
|
||||
block.content.startsWith('[Tool output compressed') ||
|
||||
block.content.startsWith('[Tool result:')
|
||||
) {
|
||||
return block
|
||||
}
|
||||
// Short results: preserve.
|
||||
if (block.content.length < minToolResultChars) return block
|
||||
// Compress.
|
||||
const toolName = toolNameMap.get(block.tool_use_id) ?? 'unknown'
|
||||
msgChanged = true
|
||||
return {
|
||||
type: 'tool_result',
|
||||
tool_use_id: block.tool_use_id,
|
||||
content: `[Tool result: ${toolName} — ${block.content.length} chars, compacted]`,
|
||||
} satisfies ToolResultBlock
|
||||
}
|
||||
return block
|
||||
})
|
||||
|
||||
if (msgChanged) {
|
||||
anyChanged = true
|
||||
result.push({ role: msg.role, content: newContent } as LLMMessage)
|
||||
} else {
|
||||
result.push(msg)
|
||||
}
|
||||
}
|
||||
|
||||
return anyChanged ? result : messages
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace consumed tool results with compact markers.
|
||||
*
|
||||
* A tool_result is "consumed" when the assistant has produced a response
|
||||
* after seeing it (i.e. there is an assistant message following the user
|
||||
* message that contains the tool_result). The most recent user message
|
||||
* with tool results is always kept intact — the LLM is about to see it.
|
||||
*
|
||||
* Error results and results shorter than `minChars` are never compressed.
|
||||
*/
|
||||
private compressConsumedToolResults(messages: LLMMessage[]): LLMMessage[] {
|
||||
const config = this.options.compressToolResults
|
||||
if (!config) return messages
|
||||
|
||||
const minChars = typeof config === 'object'
|
||||
? (config.minChars ?? DEFAULT_MIN_COMPRESS_CHARS)
|
||||
: DEFAULT_MIN_COMPRESS_CHARS
|
||||
|
||||
// Find the last user message that carries tool_result blocks.
|
||||
let lastToolResultUserIdx = -1
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
if (
|
||||
messages[i]!.role === 'user' &&
|
||||
messages[i]!.content.some(b => b.type === 'tool_result')
|
||||
) {
|
||||
lastToolResultUserIdx = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Nothing to compress if there's at most one tool-result user message.
|
||||
if (lastToolResultUserIdx <= 0) return messages
|
||||
|
||||
let anyChanged = false
|
||||
const result = messages.map((msg, idx) => {
|
||||
// Only compress user messages that appear before the last one.
|
||||
if (msg.role !== 'user' || idx >= lastToolResultUserIdx) return msg
|
||||
|
||||
const hasToolResult = msg.content.some(b => b.type === 'tool_result')
|
||||
if (!hasToolResult) return msg
|
||||
|
||||
let msgChanged = false
|
||||
const newContent = msg.content.map((block): ContentBlock => {
|
||||
if (block.type !== 'tool_result') return block
|
||||
|
||||
// Never compress error results — they carry diagnostic value.
|
||||
if (block.is_error) return block
|
||||
|
||||
// Skip already-compressed results — avoid re-compression with wrong char count.
|
||||
if (block.content.startsWith('[Tool output compressed')) return block
|
||||
|
||||
// Skip short results — the marker itself has overhead.
|
||||
if (block.content.length < minChars) return block
|
||||
|
||||
msgChanged = true
|
||||
return {
|
||||
type: 'tool_result',
|
||||
tool_use_id: block.tool_use_id,
|
||||
content: `[Tool output compressed — ${block.content.length} chars, already processed]`,
|
||||
} satisfies ToolResultBlock
|
||||
})
|
||||
|
||||
if (msgChanged) {
|
||||
anyChanged = true
|
||||
return { role: msg.role, content: newContent } as LLMMessage
|
||||
}
|
||||
return msg
|
||||
})
|
||||
|
||||
return anyChanged ? result : messages
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the {@link ToolUseContext} passed to every tool execution.
|
||||
* Identifies this runner as the invoking agent.
|
||||
*/
|
||||
private buildToolContext(): ToolUseContext {
|
||||
private buildToolContext(abortSignal?: AbortSignal): ToolUseContext {
|
||||
return {
|
||||
agent: {
|
||||
name: this.options.agentName ?? 'runner',
|
||||
role: this.options.agentRole ?? 'assistant',
|
||||
model: this.options.model,
|
||||
},
|
||||
abortSignal: this.options.abortSignal,
|
||||
abortSignal,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,443 @@
|
|||
#!/usr/bin/env node
|
||||
/**
|
||||
* Thin shell/CI wrapper over OpenMultiAgent — no interactive session, cwd binding,
|
||||
* approvals, or persistence.
|
||||
*
|
||||
* Exit codes:
|
||||
* 0 — finished; team run succeeded
|
||||
* 1 — finished; team run reported failure (agents/tasks)
|
||||
* 2 — invalid usage, I/O, or JSON validation
|
||||
* 3 — unexpected runtime error (including LLM errors)
|
||||
*/
|
||||
|
||||
import { readFileSync } from 'node:fs'
|
||||
import { resolve } from 'node:path'
|
||||
import { fileURLToPath } from 'node:url'
|
||||
|
||||
import { OpenMultiAgent } from '../orchestrator/orchestrator.js'
|
||||
import type { SupportedProvider } from '../llm/adapter.js'
|
||||
import type { AgentRunResult, CoordinatorConfig, OrchestratorConfig, TeamConfig, TeamRunResult } from '../types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Exit codes
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const EXIT = {
|
||||
SUCCESS: 0,
|
||||
RUN_FAILED: 1,
|
||||
USAGE: 2,
|
||||
INTERNAL: 3,
|
||||
} as const
|
||||
|
||||
class OmaValidationError extends Error {
|
||||
override readonly name = 'OmaValidationError'
|
||||
constructor(message: string) {
|
||||
super(message)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Provider helper (static reference data)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const PROVIDER_REFERENCE: ReadonlyArray<{
|
||||
id: SupportedProvider
|
||||
apiKeyEnv: readonly string[]
|
||||
baseUrlSupported: boolean
|
||||
notes?: string
|
||||
}> = [
|
||||
{ id: 'anthropic', apiKeyEnv: ['ANTHROPIC_API_KEY'], baseUrlSupported: true },
|
||||
{ id: 'openai', apiKeyEnv: ['OPENAI_API_KEY'], baseUrlSupported: true, notes: 'Set baseURL for Ollama / vLLM / LM Studio; apiKey may be a placeholder.' },
|
||||
{ id: 'gemini', apiKeyEnv: ['GEMINI_API_KEY', 'GOOGLE_API_KEY'], baseUrlSupported: false },
|
||||
{ id: 'grok', apiKeyEnv: ['XAI_API_KEY'], baseUrlSupported: true },
|
||||
{ id: 'minimax', apiKeyEnv: ['MINIMAX_API_KEY'], baseUrlSupported: true, notes: 'Global endpoint: https://api.minimax.io/v1 (default). China endpoint: https://api.minimaxi.com/v1. Set MINIMAX_BASE_URL to choose, or pass baseURL in agent config.' },
|
||||
{ id: 'deepseek', apiKeyEnv: ['DEEPSEEK_API_KEY'], baseUrlSupported: true, notes: 'OpenAI-compatible endpoint at https://api.deepseek.com/v1. Models: deepseek-chat (V3), deepseek-reasoner (thinking).' },
|
||||
{
|
||||
id: 'copilot',
|
||||
apiKeyEnv: ['GITHUB_COPILOT_TOKEN', 'GITHUB_TOKEN'],
|
||||
baseUrlSupported: false,
|
||||
notes: 'If no token env is set, Copilot adapter may start an interactive OAuth device flow (avoid in CI).',
|
||||
},
|
||||
]
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// argv / JSON helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function parseArgs(argv: string[]): {
|
||||
_: string[]
|
||||
flags: Set<string>
|
||||
kv: Map<string, string>
|
||||
} {
|
||||
const _ = argv.slice(2)
|
||||
const flags = new Set<string>()
|
||||
const kv = new Map<string, string>()
|
||||
let i = 0
|
||||
while (i < _.length) {
|
||||
const a = _[i]!
|
||||
if (a === '--') {
|
||||
break
|
||||
}
|
||||
if (a.startsWith('--')) {
|
||||
const eq = a.indexOf('=')
|
||||
if (eq !== -1) {
|
||||
kv.set(a.slice(2, eq), a.slice(eq + 1))
|
||||
i++
|
||||
continue
|
||||
}
|
||||
const key = a.slice(2)
|
||||
const next = _[i + 1]
|
||||
if (next !== undefined && !next.startsWith('--')) {
|
||||
kv.set(key, next)
|
||||
i += 2
|
||||
} else {
|
||||
flags.add(key)
|
||||
i++
|
||||
}
|
||||
continue
|
||||
}
|
||||
i++
|
||||
}
|
||||
return { _, flags, kv }
|
||||
}
|
||||
|
||||
function getOpt(kv: Map<string, string>, flags: Set<string>, key: string): string | undefined {
|
||||
if (flags.has(key)) return ''
|
||||
return kv.get(key)
|
||||
}
|
||||
|
||||
function readJson(path: string): unknown {
|
||||
const abs = resolve(path)
|
||||
const raw = readFileSync(abs, 'utf8')
|
||||
try {
|
||||
return JSON.parse(raw) as unknown
|
||||
} catch (e) {
|
||||
if (e instanceof SyntaxError) {
|
||||
throw new Error(`Invalid JSON in ${abs}: ${e.message}`)
|
||||
}
|
||||
throw e
|
||||
}
|
||||
}
|
||||
|
||||
function isObject(v: unknown): v is Record<string, unknown> {
|
||||
return typeof v === 'object' && v !== null && !Array.isArray(v)
|
||||
}
|
||||
|
||||
function asTeamConfig(v: unknown, label: string): TeamConfig {
|
||||
if (!isObject(v)) throw new OmaValidationError(`${label}: expected a JSON object`)
|
||||
const name = v['name']
|
||||
const agents = v['agents']
|
||||
if (typeof name !== 'string' || !name) throw new OmaValidationError(`${label}.name: non-empty string required`)
|
||||
if (!Array.isArray(agents) || agents.length === 0) {
|
||||
throw new OmaValidationError(`${label}.agents: non-empty array required`)
|
||||
}
|
||||
for (const a of agents) {
|
||||
if (!isObject(a)) throw new OmaValidationError(`${label}.agents[]: each agent must be an object`)
|
||||
if (typeof a['name'] !== 'string' || !a['name']) throw new OmaValidationError(`agent.name required`)
|
||||
if (typeof a['model'] !== 'string' || !a['model']) {
|
||||
throw new OmaValidationError(`agent.model required for "${String(a['name'])}"`)
|
||||
}
|
||||
}
|
||||
return v as unknown as TeamConfig
|
||||
}
|
||||
|
||||
function asOrchestratorPartial(v: unknown, label: string): OrchestratorConfig {
|
||||
if (!isObject(v)) throw new OmaValidationError(`${label}: expected a JSON object`)
|
||||
return v as OrchestratorConfig
|
||||
}
|
||||
|
||||
function asCoordinatorPartial(v: unknown, label: string): CoordinatorConfig {
|
||||
if (!isObject(v)) throw new OmaValidationError(`${label}: expected a JSON object`)
|
||||
return v as CoordinatorConfig
|
||||
}
|
||||
|
||||
function asTaskSpecs(v: unknown, label: string): ReadonlyArray<{
|
||||
title: string
|
||||
description: string
|
||||
assignee?: string
|
||||
dependsOn?: string[]
|
||||
memoryScope?: 'dependencies' | 'all'
|
||||
maxRetries?: number
|
||||
retryDelayMs?: number
|
||||
retryBackoff?: number
|
||||
}> {
|
||||
if (!Array.isArray(v)) throw new OmaValidationError(`${label}: expected a JSON array`)
|
||||
const out: Array<{
|
||||
title: string
|
||||
description: string
|
||||
assignee?: string
|
||||
dependsOn?: string[]
|
||||
memoryScope?: 'dependencies' | 'all'
|
||||
maxRetries?: number
|
||||
retryDelayMs?: number
|
||||
retryBackoff?: number
|
||||
}> = []
|
||||
let i = 0
|
||||
for (const item of v) {
|
||||
if (!isObject(item)) throw new OmaValidationError(`${label}[${i}]: object expected`)
|
||||
if (typeof item['title'] !== 'string' || typeof item['description'] !== 'string') {
|
||||
throw new OmaValidationError(`${label}[${i}]: title and description strings required`)
|
||||
}
|
||||
const row: (typeof out)[0] = {
|
||||
title: item['title'],
|
||||
description: item['description'],
|
||||
}
|
||||
if (typeof item['assignee'] === 'string') row.assignee = item['assignee']
|
||||
if (Array.isArray(item['dependsOn'])) {
|
||||
row.dependsOn = item['dependsOn'].filter((x): x is string => typeof x === 'string')
|
||||
}
|
||||
if (item['memoryScope'] === 'all' || item['memoryScope'] === 'dependencies') {
|
||||
row.memoryScope = item['memoryScope']
|
||||
}
|
||||
if (typeof item['maxRetries'] === 'number') row.maxRetries = item['maxRetries']
|
||||
if (typeof item['retryDelayMs'] === 'number') row.retryDelayMs = item['retryDelayMs']
|
||||
if (typeof item['retryBackoff'] === 'number') row.retryBackoff = item['retryBackoff']
|
||||
out.push(row)
|
||||
i++
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
export interface CliJsonOptions {
|
||||
readonly pretty: boolean
|
||||
readonly includeMessages: boolean
|
||||
}
|
||||
|
||||
export function serializeAgentResult(r: AgentRunResult, includeMessages: boolean): Record<string, unknown> {
|
||||
const base: Record<string, unknown> = {
|
||||
success: r.success,
|
||||
output: r.output,
|
||||
tokenUsage: r.tokenUsage,
|
||||
toolCalls: r.toolCalls,
|
||||
structured: r.structured,
|
||||
loopDetected: r.loopDetected,
|
||||
budgetExceeded: r.budgetExceeded,
|
||||
}
|
||||
if (includeMessages) base['messages'] = r.messages
|
||||
return base
|
||||
}
|
||||
|
||||
export function serializeTeamRunResult(result: TeamRunResult, opts: CliJsonOptions): Record<string, unknown> {
|
||||
const agentResults: Record<string, unknown> = {}
|
||||
for (const [k, v] of result.agentResults) {
|
||||
agentResults[k] = serializeAgentResult(v, opts.includeMessages)
|
||||
}
|
||||
return {
|
||||
success: result.success,
|
||||
totalTokenUsage: result.totalTokenUsage,
|
||||
agentResults,
|
||||
}
|
||||
}
|
||||
|
||||
function printJson(data: unknown, pretty: boolean): void {
|
||||
const s = pretty ? JSON.stringify(data, null, 2) : JSON.stringify(data)
|
||||
process.stdout.write(`${s}\n`)
|
||||
}
|
||||
|
||||
function help(): string {
|
||||
return [
|
||||
'open-multi-agent CLI (oma)',
|
||||
'',
|
||||
'Usage:',
|
||||
' oma run --goal <text> --team <team.json> [--orchestrator <orch.json>] [--coordinator <coord.json>]',
|
||||
' oma task --file <tasks.json> [--team <team.json>]',
|
||||
' oma provider [list | template <provider>]',
|
||||
'',
|
||||
'Flags:',
|
||||
' --pretty Pretty-print JSON to stdout',
|
||||
' --include-messages Include full LLM message arrays in run output (large)',
|
||||
'',
|
||||
'team.json may be a TeamConfig object, or { "team": TeamConfig, "orchestrator": { ... } }.',
|
||||
'tasks.json: { "team": TeamConfig, "tasks": [ ... ], "orchestrator"?: { ... } }.',
|
||||
' Optional --team overrides the embedded team object.',
|
||||
'',
|
||||
'Exit codes: 0 success, 1 run failed, 2 usage/validation, 3 internal',
|
||||
].join('\n')
|
||||
}
|
||||
|
||||
const DEFAULT_MODEL_HINT: Record<SupportedProvider, string> = {
|
||||
anthropic: 'claude-opus-4-6',
|
||||
openai: 'gpt-4o',
|
||||
gemini: 'gemini-2.0-flash',
|
||||
grok: 'grok-2-latest',
|
||||
copilot: 'gpt-4o',
|
||||
minimax: 'MiniMax-M2.7',
|
||||
deepseek: 'deepseek-chat',
|
||||
}
|
||||
|
||||
async function cmdProvider(sub: string | undefined, arg: string | undefined, pretty: boolean): Promise<number> {
|
||||
if (sub === undefined || sub === 'list') {
|
||||
printJson({ providers: PROVIDER_REFERENCE }, pretty)
|
||||
return EXIT.SUCCESS
|
||||
}
|
||||
if (sub === 'template') {
|
||||
const id = arg as SupportedProvider | undefined
|
||||
const row = PROVIDER_REFERENCE.find((p) => p.id === id)
|
||||
if (!id || !row) {
|
||||
printJson(
|
||||
{
|
||||
error: {
|
||||
kind: 'usage',
|
||||
message: `usage: oma provider template <${PROVIDER_REFERENCE.map((p) => p.id).join('|')}>`,
|
||||
},
|
||||
},
|
||||
pretty,
|
||||
)
|
||||
return EXIT.USAGE
|
||||
}
|
||||
printJson(
|
||||
{
|
||||
orchestrator: {
|
||||
defaultProvider: id,
|
||||
defaultModel: DEFAULT_MODEL_HINT[id],
|
||||
},
|
||||
agent: {
|
||||
name: 'worker',
|
||||
model: DEFAULT_MODEL_HINT[id],
|
||||
provider: id,
|
||||
systemPrompt: 'You are a helpful assistant.',
|
||||
},
|
||||
env: Object.fromEntries(row.apiKeyEnv.map((k) => [k, `<set ${k} in environment>`])),
|
||||
notes: row.notes,
|
||||
},
|
||||
pretty,
|
||||
)
|
||||
return EXIT.SUCCESS
|
||||
}
|
||||
printJson({ error: { kind: 'usage', message: `unknown provider subcommand: ${sub}` } }, pretty)
|
||||
return EXIT.USAGE
|
||||
}
|
||||
|
||||
function mergeOrchestrator(base: OrchestratorConfig, ...partials: OrchestratorConfig[]): OrchestratorConfig {
|
||||
let o: OrchestratorConfig = { ...base }
|
||||
for (const p of partials) {
|
||||
o = { ...o, ...p }
|
||||
}
|
||||
return o
|
||||
}
|
||||
|
||||
async function main(): Promise<number> {
|
||||
const argv = parseArgs(process.argv)
|
||||
const cmd = argv._[0]
|
||||
const pretty = argv.flags.has('pretty')
|
||||
const includeMessages = argv.flags.has('include-messages')
|
||||
|
||||
if (cmd === undefined || cmd === 'help' || cmd === '-h' || cmd === '--help') {
|
||||
process.stdout.write(`${help()}\n`)
|
||||
return EXIT.SUCCESS
|
||||
}
|
||||
|
||||
if (cmd === 'provider') {
|
||||
return cmdProvider(argv._[1], argv._[2], pretty)
|
||||
}
|
||||
|
||||
const jsonOpts: CliJsonOptions = { pretty, includeMessages }
|
||||
|
||||
try {
|
||||
if (cmd === 'run') {
|
||||
const goal = getOpt(argv.kv, argv.flags, 'goal')
|
||||
const teamPath = getOpt(argv.kv, argv.flags, 'team')
|
||||
const orchPath = getOpt(argv.kv, argv.flags, 'orchestrator')
|
||||
const coordPath = getOpt(argv.kv, argv.flags, 'coordinator')
|
||||
if (!goal || !teamPath) {
|
||||
printJson({ error: { kind: 'usage', message: '--goal and --team are required' } }, pretty)
|
||||
return EXIT.USAGE
|
||||
}
|
||||
|
||||
const teamRaw = readJson(teamPath)
|
||||
let teamCfg: TeamConfig
|
||||
let orchParts: OrchestratorConfig[] = []
|
||||
if (isObject(teamRaw) && teamRaw['team'] !== undefined) {
|
||||
teamCfg = asTeamConfig(teamRaw['team'], 'team')
|
||||
if (teamRaw['orchestrator'] !== undefined) {
|
||||
orchParts.push(asOrchestratorPartial(teamRaw['orchestrator'], 'orchestrator'))
|
||||
}
|
||||
} else {
|
||||
teamCfg = asTeamConfig(teamRaw, 'team')
|
||||
}
|
||||
if (orchPath) {
|
||||
orchParts.push(asOrchestratorPartial(readJson(orchPath), 'orchestrator file'))
|
||||
}
|
||||
|
||||
const orchestrator = new OpenMultiAgent(mergeOrchestrator({}, ...orchParts))
|
||||
const team = orchestrator.createTeam(teamCfg.name, teamCfg)
|
||||
let coordinator: CoordinatorConfig | undefined
|
||||
if (coordPath) {
|
||||
coordinator = asCoordinatorPartial(readJson(coordPath), 'coordinator file')
|
||||
}
|
||||
const result = await orchestrator.runTeam(team, goal, coordinator ? { coordinator } : undefined)
|
||||
await orchestrator.shutdown()
|
||||
const payload = { command: 'run' as const, ...serializeTeamRunResult(result, jsonOpts) }
|
||||
printJson(payload, pretty)
|
||||
return result.success ? EXIT.SUCCESS : EXIT.RUN_FAILED
|
||||
}
|
||||
|
||||
if (cmd === 'task') {
|
||||
const file = getOpt(argv.kv, argv.flags, 'file')
|
||||
const teamOverride = getOpt(argv.kv, argv.flags, 'team')
|
||||
if (!file) {
|
||||
printJson({ error: { kind: 'usage', message: '--file is required' } }, pretty)
|
||||
return EXIT.USAGE
|
||||
}
|
||||
const doc = readJson(file)
|
||||
if (!isObject(doc)) {
|
||||
throw new OmaValidationError('tasks file root must be an object')
|
||||
}
|
||||
const orchParts: OrchestratorConfig[] = []
|
||||
if (doc['orchestrator'] !== undefined) {
|
||||
orchParts.push(asOrchestratorPartial(doc['orchestrator'], 'orchestrator'))
|
||||
}
|
||||
const teamCfg = teamOverride
|
||||
? asTeamConfig(readJson(teamOverride), 'team (--team)')
|
||||
: asTeamConfig(doc['team'], 'team')
|
||||
|
||||
const tasks = asTaskSpecs(doc['tasks'], 'tasks')
|
||||
if (tasks.length === 0) {
|
||||
throw new OmaValidationError('tasks array must not be empty')
|
||||
}
|
||||
|
||||
const orchestrator = new OpenMultiAgent(mergeOrchestrator({}, ...orchParts))
|
||||
const team = orchestrator.createTeam(teamCfg.name, teamCfg)
|
||||
const result = await orchestrator.runTasks(team, tasks)
|
||||
await orchestrator.shutdown()
|
||||
const payload = { command: 'task' as const, ...serializeTeamRunResult(result, jsonOpts) }
|
||||
printJson(payload, pretty)
|
||||
return result.success ? EXIT.SUCCESS : EXIT.RUN_FAILED
|
||||
}
|
||||
|
||||
printJson({ error: { kind: 'usage', message: `unknown command: ${cmd}` } }, pretty)
|
||||
return EXIT.USAGE
|
||||
} catch (e) {
|
||||
const message = e instanceof Error ? e.message : String(e)
|
||||
const { kind, exit } = classifyCliError(e, message)
|
||||
printJson({ error: { kind, message } }, pretty)
|
||||
return exit
|
||||
}
|
||||
}
|
||||
|
||||
function classifyCliError(e: unknown, message: string): { kind: string; exit: number } {
|
||||
if (e instanceof OmaValidationError) return { kind: 'validation', exit: EXIT.USAGE }
|
||||
if (message.includes('Invalid JSON')) return { kind: 'validation', exit: EXIT.USAGE }
|
||||
if (message.includes('ENOENT') || message.includes('EACCES')) return { kind: 'io', exit: EXIT.USAGE }
|
||||
return { kind: 'runtime', exit: EXIT.INTERNAL }
|
||||
}
|
||||
|
||||
const isMain = (() => {
|
||||
const argv1 = process.argv[1]
|
||||
if (!argv1) return false
|
||||
try {
|
||||
return fileURLToPath(import.meta.url) === resolve(argv1)
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
})()
|
||||
|
||||
if (isMain) {
|
||||
main()
|
||||
.then((code) => process.exit(code))
|
||||
.catch((e) => {
|
||||
const message = e instanceof Error ? e.message : String(e)
|
||||
process.stdout.write(`${JSON.stringify({ error: { kind: 'internal', message } })}\n`)
|
||||
process.exit(EXIT.INTERNAL)
|
||||
})
|
||||
}
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
/**
|
||||
* @fileoverview Framework-specific error classes.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Raised when an agent or orchestrator run exceeds its configured token budget.
|
||||
*/
|
||||
export class TokenBudgetExceededError extends Error {
|
||||
readonly code = 'TOKEN_BUDGET_EXCEEDED'
|
||||
|
||||
constructor(
|
||||
readonly agent: string,
|
||||
readonly tokensUsed: number,
|
||||
readonly budget: number,
|
||||
) {
|
||||
super(`Agent "${agent}" exceeded token budget: ${tokensUsed} tokens used (budget: ${budget})`)
|
||||
this.name = 'TokenBudgetExceededError'
|
||||
}
|
||||
}
|
||||
|
|
@ -89,7 +89,7 @@ export type { TaskQueueEvent } from './task/queue.js'
|
|||
// ---------------------------------------------------------------------------
|
||||
|
||||
export { defineTool, ToolRegistry, zodToJsonSchema } from './tool/framework.js'
|
||||
export { ToolExecutor } from './tool/executor.js'
|
||||
export { ToolExecutor, truncateToolOutput } from './tool/executor.js'
|
||||
export type { ToolExecutorOptions, BatchToolCall } from './tool/executor.js'
|
||||
export {
|
||||
registerBuiltInTools,
|
||||
|
|
@ -98,6 +98,7 @@ export {
|
|||
fileReadTool,
|
||||
fileWriteTool,
|
||||
fileEditTool,
|
||||
globTool,
|
||||
grepTool,
|
||||
} from './tool/built-in/index.js'
|
||||
|
||||
|
|
@ -107,6 +108,7 @@ export {
|
|||
|
||||
export { createAdapter } from './llm/adapter.js'
|
||||
export type { SupportedProvider } from './llm/adapter.js'
|
||||
export { TokenBudgetExceededError } from './errors.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Memory
|
||||
|
|
@ -152,6 +154,7 @@ export type {
|
|||
ToolCallRecord,
|
||||
LoopDetectionConfig,
|
||||
LoopDetectionInfo,
|
||||
ContextStrategy,
|
||||
|
||||
// Team
|
||||
TeamConfig,
|
||||
|
|
@ -164,6 +167,7 @@ export type {
|
|||
// Orchestrator
|
||||
OrchestratorConfig,
|
||||
OrchestratorEvent,
|
||||
CoordinatorConfig,
|
||||
|
||||
// Trace
|
||||
TraceEventType,
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ import type { LLMAdapter } from '../types.js'
|
|||
* Additional providers can be integrated by implementing {@link LLMAdapter}
|
||||
* directly and bypassing this factory.
|
||||
*/
|
||||
export type SupportedProvider = 'anthropic' | 'copilot' | 'grok' | 'openai' | 'gemini'
|
||||
export type SupportedProvider = 'anthropic' | 'copilot' | 'deepseek' | 'grok' | 'minimax' | 'openai' | 'gemini'
|
||||
|
||||
/**
|
||||
* Instantiate the appropriate {@link LLMAdapter} for the given provider.
|
||||
|
|
@ -49,6 +49,8 @@ export type SupportedProvider = 'anthropic' | 'copilot' | 'grok' | 'openai' | 'g
|
|||
* - `openai` → `OPENAI_API_KEY`
|
||||
* - `gemini` → `GEMINI_API_KEY` / `GOOGLE_API_KEY`
|
||||
* - `grok` → `XAI_API_KEY`
|
||||
* - `minimax` → `MINIMAX_API_KEY`
|
||||
* - `deepseek` → `DEEPSEEK_API_KEY`
|
||||
* - `copilot` → `GITHUB_COPILOT_TOKEN` / `GITHUB_TOKEN`, or interactive
|
||||
* OAuth2 device flow if neither is set
|
||||
*
|
||||
|
|
@ -89,6 +91,14 @@ export async function createAdapter(
|
|||
const { GrokAdapter } = await import('./grok.js')
|
||||
return new GrokAdapter(apiKey, baseURL)
|
||||
}
|
||||
case 'minimax': {
|
||||
const { MiniMaxAdapter } = await import('./minimax.js')
|
||||
return new MiniMaxAdapter(apiKey, baseURL)
|
||||
}
|
||||
case 'deepseek': {
|
||||
const { DeepSeekAdapter } = await import('./deepseek.js')
|
||||
return new DeepSeekAdapter(apiKey, baseURL)
|
||||
}
|
||||
default: {
|
||||
// The `never` cast here makes TypeScript enforce exhaustiveness.
|
||||
const _exhaustive: never = provider
|
||||
|
|
|
|||
|
|
@ -0,0 +1,29 @@
|
|||
/**
|
||||
* @fileoverview DeepSeek adapter.
|
||||
*
|
||||
* Thin wrapper around OpenAIAdapter that hard-codes the official DeepSeek
|
||||
* OpenAI-compatible endpoint and DEEPSEEK_API_KEY environment variable fallback.
|
||||
*/
|
||||
|
||||
import { OpenAIAdapter } from './openai.js'
|
||||
|
||||
/**
|
||||
* LLM adapter for DeepSeek models (deepseek-chat, deepseek-reasoner, and future models).
|
||||
*
|
||||
* Thread-safe. Can be shared across agents.
|
||||
*
|
||||
* Usage:
|
||||
* provider: 'deepseek'
|
||||
* model: 'deepseek-chat' (or 'deepseek-reasoner' for the thinking model)
|
||||
*/
|
||||
export class DeepSeekAdapter extends OpenAIAdapter {
|
||||
readonly name = 'deepseek'
|
||||
|
||||
constructor(apiKey?: string, baseURL?: string) {
|
||||
// Allow override of baseURL (for proxies or future changes) but default to official DeepSeek endpoint.
|
||||
super(
|
||||
apiKey ?? process.env['DEEPSEEK_API_KEY'],
|
||||
baseURL ?? 'https://api.deepseek.com/v1'
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
@ -163,6 +163,7 @@ function buildConfig(
|
|||
toolConfig: options.tools
|
||||
? { functionCallingConfig: { mode: FunctionCallingConfigMode.AUTO } }
|
||||
: undefined,
|
||||
abortSignal: options.abortSignal,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,29 @@
|
|||
/**
|
||||
* @fileoverview MiniMax adapter.
|
||||
*
|
||||
* Thin wrapper around OpenAIAdapter that hard-codes the official MiniMax
|
||||
* OpenAI-compatible endpoint and MINIMAX_API_KEY environment variable fallback.
|
||||
*/
|
||||
|
||||
import { OpenAIAdapter } from './openai.js'
|
||||
|
||||
/**
|
||||
* LLM adapter for MiniMax models (MiniMax-M2.7 series and future models).
|
||||
*
|
||||
* Thread-safe. Can be shared across agents.
|
||||
*
|
||||
* Usage:
|
||||
* provider: 'minimax'
|
||||
* model: 'MiniMax-M2.7' (or any current MiniMax model name)
|
||||
*/
|
||||
export class MiniMaxAdapter extends OpenAIAdapter {
|
||||
readonly name = 'minimax'
|
||||
|
||||
constructor(apiKey?: string, baseURL?: string) {
|
||||
// Allow override of baseURL (for proxies or future changes) but default to official MiniMax endpoint.
|
||||
super(
|
||||
apiKey ?? process.env['MINIMAX_API_KEY'],
|
||||
baseURL ?? process.env['MINIMAX_BASE_URL'] ?? 'https://api.minimax.io/v1'
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
export type {
|
||||
ConnectMCPToolsConfig,
|
||||
ConnectedMCPTools,
|
||||
} from './tool/mcp.js'
|
||||
export { connectMCPTools } from './tool/mcp.js'
|
||||
|
|
@ -124,8 +124,18 @@ export class SharedMemory {
|
|||
* - plan: Implement feature X using const type params
|
||||
* ```
|
||||
*/
|
||||
async getSummary(): Promise<string> {
|
||||
const all = await this.store.list()
|
||||
async getSummary(filter?: { taskIds?: string[] }): Promise<string> {
|
||||
let all = await this.store.list()
|
||||
if (filter?.taskIds && filter.taskIds.length > 0) {
|
||||
const taskIds = new Set(filter.taskIds)
|
||||
all = all.filter((entry) => {
|
||||
const slashIdx = entry.key.indexOf('/')
|
||||
const localKey = slashIdx === -1 ? entry.key : entry.key.slice(slashIdx + 1)
|
||||
if (!localKey.startsWith('task:') || !localKey.endsWith(':result')) return false
|
||||
const taskId = localKey.slice('task:'.length, localKey.length - ':result'.length)
|
||||
return taskIds.has(taskId)
|
||||
})
|
||||
}
|
||||
if (all.length === 0) return ''
|
||||
|
||||
// Group entries by agent name.
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@
|
|||
import type {
|
||||
AgentConfig,
|
||||
AgentRunResult,
|
||||
CoordinatorConfig,
|
||||
OrchestratorConfig,
|
||||
OrchestratorEvent,
|
||||
Task,
|
||||
|
|
@ -63,6 +64,8 @@ import { Team } from '../team/team.js'
|
|||
import { TaskQueue } from '../task/queue.js'
|
||||
import { createTask } from '../task/task.js'
|
||||
import { Scheduler } from './scheduler.js'
|
||||
import { TokenBudgetExceededError } from '../errors.js'
|
||||
import { extractKeywords, keywordScore } from '../utils/keywords.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal constants
|
||||
|
|
@ -72,6 +75,119 @@ const ZERO_USAGE: TokenUsage = { input_tokens: 0, output_tokens: 0 }
|
|||
const DEFAULT_MAX_CONCURRENCY = 5
|
||||
const DEFAULT_MODEL = 'claude-opus-4-6'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Short-circuit helpers (exported for testability)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Regex patterns that indicate a goal requires multi-agent coordination.
|
||||
*
|
||||
* Each pattern targets a distinct complexity signal:
|
||||
* - Sequencing: "first … then", "step 1 / step 2", numbered lists
|
||||
* - Coordination: "collaborate", "coordinate", "review each other"
|
||||
* - Parallel work: "in parallel", "at the same time", "concurrently"
|
||||
* - Multi-phase: "phase", "stage", multiple distinct action verbs joined by connectives
|
||||
*/
|
||||
const COMPLEXITY_PATTERNS: RegExp[] = [
|
||||
// Explicit sequencing
|
||||
/\bfirst\b.{3,60}\bthen\b/i,
|
||||
/\bstep\s*\d/i,
|
||||
/\bphase\s*\d/i,
|
||||
/\bstage\s*\d/i,
|
||||
/^\s*\d+[\.\)]/m, // numbered list items ("1. …", "2) …")
|
||||
|
||||
// Coordination language — must be an imperative directive aimed at the agents
|
||||
// ("collaborate with X", "coordinate the team", "agents should coordinate"),
|
||||
// not a descriptive use ("how does X coordinate with Y" / "what does collaboration mean").
|
||||
// Match either an explicit preposition or a noun-phrase that names a group.
|
||||
/\bcollaborat(?:e|ing)\b\s+(?:with|on|to)\b/i,
|
||||
/\bcoordinat(?:e|ing)\b\s+(?:with|on|across|between|the\s+(?:team|agents?|workers?|effort|work))\b/i,
|
||||
/\breview\s+each\s+other/i,
|
||||
/\bwork\s+together\b/i,
|
||||
|
||||
// Parallel execution
|
||||
/\bin\s+parallel\b/i,
|
||||
/\bconcurrently\b/i,
|
||||
/\bat\s+the\s+same\s+time\b/i,
|
||||
|
||||
// Multiple deliverables joined by connectives
|
||||
// Matches patterns like "build X, then deploy Y and test Z"
|
||||
/\b(?:build|create|implement|design|write|develop)\b.{5,80}\b(?:and|then)\b.{5,80}\b(?:build|create|implement|design|write|develop|test|review|deploy)\b/i,
|
||||
]
|
||||
|
||||
|
||||
/**
|
||||
* Maximum goal length (in characters) below which a goal *may* be simple.
|
||||
*
|
||||
* Goals longer than this threshold almost always contain enough detail to
|
||||
* warrant multi-agent decomposition. The value is generous — short-circuit
|
||||
* is meant for genuinely simple, single-action goals.
|
||||
*/
|
||||
const SIMPLE_GOAL_MAX_LENGTH = 200
|
||||
|
||||
/**
|
||||
* Determine whether a goal is simple enough to skip coordinator decomposition.
|
||||
*
|
||||
* A goal is considered "simple" when ALL of the following hold:
|
||||
* 1. Its length is ≤ {@link SIMPLE_GOAL_MAX_LENGTH}.
|
||||
* 2. It does not match any {@link COMPLEXITY_PATTERNS}.
|
||||
*
|
||||
* The complexity patterns are deliberately conservative — they only fire on
|
||||
* imperative coordination directives (e.g. "collaborate with the team",
|
||||
* "coordinate the workers"), so descriptive uses ("how do pods coordinate
|
||||
* state", "explain microservice collaboration") remain classified as simple.
|
||||
*
|
||||
* Exported for unit testing.
|
||||
*/
|
||||
export function isSimpleGoal(goal: string): boolean {
|
||||
if (goal.length > SIMPLE_GOAL_MAX_LENGTH) return false
|
||||
return !COMPLEXITY_PATTERNS.some((re) => re.test(goal))
|
||||
}
|
||||
|
||||
/**
|
||||
* Select the best-matching agent for a goal using keyword affinity scoring.
|
||||
*
|
||||
* The scoring logic mirrors {@link Scheduler}'s `capability-match` strategy
|
||||
* exactly, including its asymmetric use of the agent's `model` field:
|
||||
*
|
||||
* - `agentKeywords` is computed from `name + systemPrompt + model` so that
|
||||
* a goal which mentions a model name (e.g. "haiku") can boost an agent
|
||||
* bound to that model.
|
||||
* - `agentText` (used for the reverse direction) is computed from
|
||||
* `name + systemPrompt` only — model names should not bias the
|
||||
* text-vs-goal-keywords match.
|
||||
*
|
||||
* The two-direction sum (`scoreA + scoreB`) ensures both "agent describes
|
||||
* goal" and "goal mentions agent capability" contribute to the final score.
|
||||
*
|
||||
* Exported for unit testing.
|
||||
*/
|
||||
export function selectBestAgent(goal: string, agents: AgentConfig[]): AgentConfig {
|
||||
if (agents.length <= 1) return agents[0]!
|
||||
|
||||
const goalKeywords = extractKeywords(goal)
|
||||
|
||||
let bestAgent = agents[0]!
|
||||
let bestScore = -1
|
||||
|
||||
for (const agent of agents) {
|
||||
const agentText = `${agent.name} ${agent.systemPrompt ?? ''}`
|
||||
// Mirror Scheduler.capability-match: include `model` here only.
|
||||
const agentKeywords = extractKeywords(`${agent.name} ${agent.systemPrompt ?? ''} ${agent.model}`)
|
||||
|
||||
const scoreA = keywordScore(agentText, goalKeywords)
|
||||
const scoreB = keywordScore(goal, agentKeywords)
|
||||
const score = scoreA + scoreB
|
||||
|
||||
if (score > bestScore) {
|
||||
bestScore = score
|
||||
bestAgent = agent
|
||||
}
|
||||
}
|
||||
|
||||
return bestAgent
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -83,6 +199,12 @@ function addUsage(a: TokenUsage, b: TokenUsage): TokenUsage {
|
|||
}
|
||||
}
|
||||
|
||||
function resolveTokenBudget(primary?: number, fallback?: number): number | undefined {
|
||||
if (primary === undefined) return fallback
|
||||
if (fallback === undefined) return primary
|
||||
return Math.min(primary, fallback)
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a minimal {@link Agent} with its own fresh registry/executor.
|
||||
* Registers all built-in tools so coordinator/worker agents can use them.
|
||||
|
|
@ -90,7 +212,16 @@ function addUsage(a: TokenUsage, b: TokenUsage): TokenUsage {
|
|||
function buildAgent(config: AgentConfig): Agent {
|
||||
const registry = new ToolRegistry()
|
||||
registerBuiltInTools(registry)
|
||||
const executor = new ToolExecutor(registry)
|
||||
if (config.customTools) {
|
||||
for (const tool of config.customTools) {
|
||||
registry.register(tool, { runtimeAdded: true })
|
||||
}
|
||||
}
|
||||
const executor = new ToolExecutor(registry, {
|
||||
...(config.maxToolOutputChars !== undefined
|
||||
? { maxToolOutputChars: config.maxToolOutputChars }
|
||||
: {}),
|
||||
})
|
||||
return new Agent(config, registry, executor)
|
||||
}
|
||||
|
||||
|
|
@ -202,6 +333,10 @@ interface ParsedTaskSpec {
|
|||
description: string
|
||||
assignee?: string
|
||||
dependsOn?: string[]
|
||||
memoryScope?: 'dependencies' | 'all'
|
||||
maxRetries?: number
|
||||
retryDelayMs?: number
|
||||
retryBackoff?: number
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -240,6 +375,10 @@ function parseTaskSpecs(raw: string): ParsedTaskSpec[] | null {
|
|||
dependsOn: Array.isArray(obj['dependsOn'])
|
||||
? (obj['dependsOn'] as unknown[]).filter((x): x is string => typeof x === 'string')
|
||||
: undefined,
|
||||
memoryScope: obj['memoryScope'] === 'all' ? 'all' : undefined,
|
||||
maxRetries: typeof obj['maxRetries'] === 'number' ? obj['maxRetries'] : undefined,
|
||||
retryDelayMs: typeof obj['retryDelayMs'] === 'number' ? obj['retryDelayMs'] : undefined,
|
||||
retryBackoff: typeof obj['retryBackoff'] === 'number' ? obj['retryBackoff'] : undefined,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -264,6 +403,12 @@ interface RunContext {
|
|||
readonly config: OrchestratorConfig
|
||||
/** Trace run ID, present when `onTrace` is configured. */
|
||||
readonly runId?: string
|
||||
/** AbortSignal for run-level cancellation. Checked between task dispatch rounds. */
|
||||
readonly abortSignal?: AbortSignal
|
||||
cumulativeUsage: TokenUsage
|
||||
readonly maxTokenBudget?: number
|
||||
budgetExceededTriggered: boolean
|
||||
budgetExceededReason?: string
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -295,6 +440,12 @@ async function executeQueue(
|
|||
: undefined
|
||||
|
||||
while (true) {
|
||||
// Check for cancellation before each dispatch round.
|
||||
if (ctx.abortSignal?.aborted) {
|
||||
queue.skipRemaining('Skipped: run aborted.')
|
||||
break
|
||||
}
|
||||
|
||||
// Re-run auto-assignment each iteration so tasks that were unblocked since
|
||||
// the last round (and thus have no assignee yet) get assigned before dispatch.
|
||||
scheduler.autoAssign(queue, team.getAgents())
|
||||
|
|
@ -355,13 +506,13 @@ async function executeQueue(
|
|||
data: task,
|
||||
} satisfies OrchestratorEvent)
|
||||
|
||||
// Build the prompt: inject shared memory context + task description
|
||||
const prompt = await buildTaskPrompt(task, team)
|
||||
// Build the prompt: task description + dependency-only context by default.
|
||||
const prompt = await buildTaskPrompt(task, team, queue)
|
||||
|
||||
// Build trace context for this task's agent run
|
||||
const traceOptions: Partial<RunOptions> | undefined = config.onTrace
|
||||
? { onTrace: config.onTrace, runId: ctx.runId ?? '', taskId: task.id, traceAgent: assignee }
|
||||
: undefined
|
||||
? { onTrace: config.onTrace, runId: ctx.runId ?? '', taskId: task.id, traceAgent: assignee, abortSignal: ctx.abortSignal }
|
||||
: ctx.abortSignal ? { abortSignal: ctx.abortSignal } : undefined
|
||||
|
||||
const taskStartMs = config.onTrace ? Date.now() : 0
|
||||
let retryCount = 0
|
||||
|
|
@ -398,6 +549,23 @@ async function executeQueue(
|
|||
}
|
||||
|
||||
ctx.agentResults.set(`${assignee}:${task.id}`, result)
|
||||
ctx.cumulativeUsage = addUsage(ctx.cumulativeUsage, result.tokenUsage)
|
||||
const totalTokens = ctx.cumulativeUsage.input_tokens + ctx.cumulativeUsage.output_tokens
|
||||
if (
|
||||
!ctx.budgetExceededTriggered
|
||||
&& ctx.maxTokenBudget !== undefined
|
||||
&& totalTokens > ctx.maxTokenBudget
|
||||
) {
|
||||
ctx.budgetExceededTriggered = true
|
||||
const err = new TokenBudgetExceededError('orchestrator', totalTokens, ctx.maxTokenBudget)
|
||||
ctx.budgetExceededReason = err.message
|
||||
config.onProgress?.({
|
||||
type: 'budget_exceeded',
|
||||
agent: assignee,
|
||||
task: task.id,
|
||||
data: err,
|
||||
} satisfies OrchestratorEvent)
|
||||
}
|
||||
|
||||
if (result.success) {
|
||||
// Persist result into shared memory so other agents can read it
|
||||
|
|
@ -435,6 +603,10 @@ async function executeQueue(
|
|||
|
||||
// Wait for the entire parallel batch before checking for newly-unblocked tasks.
|
||||
await Promise.all(dispatchPromises)
|
||||
if (ctx.budgetExceededTriggered) {
|
||||
queue.skipRemaining(ctx.budgetExceededReason ?? 'Skipped: token budget exceeded.')
|
||||
break
|
||||
}
|
||||
|
||||
// --- Approval gate ---
|
||||
// After the batch completes, check if the caller wants to approve
|
||||
|
|
@ -468,22 +640,37 @@ async function executeQueue(
|
|||
*
|
||||
* Injects:
|
||||
* - Task title and description
|
||||
* - Dependency results from shared memory (if available)
|
||||
* - Direct dependency task results by default (clean slate when none)
|
||||
* - Optional full shared-memory context when `task.memoryScope === 'all'`
|
||||
* - Any messages addressed to this agent from the team bus
|
||||
*/
|
||||
async function buildTaskPrompt(task: Task, team: Team): Promise<string> {
|
||||
async function buildTaskPrompt(task: Task, team: Team, queue: TaskQueue): Promise<string> {
|
||||
const lines: string[] = [
|
||||
`# Task: ${task.title}`,
|
||||
'',
|
||||
task.description,
|
||||
]
|
||||
|
||||
// Inject shared memory summary so the agent sees its teammates' work
|
||||
const sharedMem = team.getSharedMemoryInstance()
|
||||
if (sharedMem) {
|
||||
const summary = await sharedMem.getSummary()
|
||||
if (summary) {
|
||||
lines.push('', summary)
|
||||
if (task.memoryScope === 'all') {
|
||||
// Explicit opt-in for full visibility (legacy/shared-memory behavior).
|
||||
const sharedMem = team.getSharedMemoryInstance()
|
||||
if (sharedMem) {
|
||||
const summary = await sharedMem.getSummary()
|
||||
if (summary) {
|
||||
lines.push('', summary)
|
||||
}
|
||||
}
|
||||
} else if (task.dependsOn && task.dependsOn.length > 0) {
|
||||
// Default-deny: inject only explicit prerequisite outputs.
|
||||
const depResults: string[] = []
|
||||
for (const depId of task.dependsOn) {
|
||||
const depTask = queue.get(depId)
|
||||
if (depTask?.status === 'completed' && depTask.result) {
|
||||
depResults.push(`### ${depTask.title} (by ${depTask.assignee ?? 'unknown'})\n${depTask.result}`)
|
||||
}
|
||||
}
|
||||
if (depResults.length > 0) {
|
||||
lines.push('', '## Context from prerequisite tasks', '', ...depResults)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -513,8 +700,8 @@ async function buildTaskPrompt(task: Task, team: Team): Promise<string> {
|
|||
*/
|
||||
export class OpenMultiAgent {
|
||||
private readonly config: Required<
|
||||
Omit<OrchestratorConfig, 'onApproval' | 'onProgress' | 'onTrace' | 'defaultBaseURL' | 'defaultApiKey'>
|
||||
> & Pick<OrchestratorConfig, 'onApproval' | 'onProgress' | 'onTrace' | 'defaultBaseURL' | 'defaultApiKey'>
|
||||
Omit<OrchestratorConfig, 'onApproval' | 'onProgress' | 'onTrace' | 'defaultBaseURL' | 'defaultApiKey' | 'maxTokenBudget'>
|
||||
> & Pick<OrchestratorConfig, 'onApproval' | 'onProgress' | 'onTrace' | 'defaultBaseURL' | 'defaultApiKey' | 'maxTokenBudget'>
|
||||
|
||||
private readonly teams: Map<string, Team> = new Map()
|
||||
private completedTaskCount = 0
|
||||
|
|
@ -534,6 +721,7 @@ export class OpenMultiAgent {
|
|||
defaultProvider: config.defaultProvider ?? 'anthropic',
|
||||
defaultBaseURL: config.defaultBaseURL,
|
||||
defaultApiKey: config.defaultApiKey,
|
||||
maxTokenBudget: config.maxTokenBudget,
|
||||
onApproval: config.onApproval,
|
||||
onProgress: config.onProgress,
|
||||
onTrace: config.onTrace,
|
||||
|
|
@ -580,12 +768,18 @@ export class OpenMultiAgent {
|
|||
* @param config - Agent configuration.
|
||||
* @param prompt - The user prompt to send.
|
||||
*/
|
||||
async runAgent(config: AgentConfig, prompt: string): Promise<AgentRunResult> {
|
||||
async runAgent(
|
||||
config: AgentConfig,
|
||||
prompt: string,
|
||||
options?: { abortSignal?: AbortSignal },
|
||||
): Promise<AgentRunResult> {
|
||||
const effectiveBudget = resolveTokenBudget(config.maxTokenBudget, this.config.maxTokenBudget)
|
||||
const effective: AgentConfig = {
|
||||
...config,
|
||||
provider: config.provider ?? this.config.defaultProvider,
|
||||
baseURL: config.baseURL ?? this.config.defaultBaseURL,
|
||||
apiKey: config.apiKey ?? this.config.defaultApiKey,
|
||||
maxTokenBudget: effectiveBudget,
|
||||
}
|
||||
const agent = buildAgent(effective)
|
||||
this.config.onProgress?.({
|
||||
|
|
@ -594,11 +788,34 @@ export class OpenMultiAgent {
|
|||
data: { prompt },
|
||||
})
|
||||
|
||||
const traceOptions: Partial<RunOptions> | undefined = this.config.onTrace
|
||||
? { onTrace: this.config.onTrace, runId: generateRunId(), traceAgent: config.name }
|
||||
: undefined
|
||||
// Build run-time options: trace + optional abort signal. RunOptions has
|
||||
// readonly fields, so we assemble the literal in one shot.
|
||||
const traceFields = this.config.onTrace
|
||||
? {
|
||||
onTrace: this.config.onTrace,
|
||||
runId: generateRunId(),
|
||||
traceAgent: config.name,
|
||||
}
|
||||
: null
|
||||
const abortFields = options?.abortSignal ? { abortSignal: options.abortSignal } : null
|
||||
const runOptions: Partial<RunOptions> | undefined =
|
||||
traceFields || abortFields
|
||||
? { ...(traceFields ?? {}), ...(abortFields ?? {}) }
|
||||
: undefined
|
||||
|
||||
const result = await agent.run(prompt, traceOptions)
|
||||
const result = await agent.run(prompt, runOptions)
|
||||
|
||||
if (result.budgetExceeded) {
|
||||
this.config.onProgress?.({
|
||||
type: 'budget_exceeded',
|
||||
agent: config.name,
|
||||
data: new TokenBudgetExceededError(
|
||||
config.name,
|
||||
result.tokenUsage.input_tokens + result.tokenUsage.output_tokens,
|
||||
effectiveBudget ?? 0,
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
this.config.onProgress?.({
|
||||
type: 'agent_complete',
|
||||
|
|
@ -638,20 +855,98 @@ export class OpenMultiAgent {
|
|||
* @param team - A team created via {@link createTeam} (or `new Team(...)`).
|
||||
* @param goal - High-level natural-language goal for the team.
|
||||
*/
|
||||
async runTeam(team: Team, goal: string): Promise<TeamRunResult> {
|
||||
async runTeam(
|
||||
team: Team,
|
||||
goal: string,
|
||||
options?: { abortSignal?: AbortSignal; coordinator?: CoordinatorConfig },
|
||||
): Promise<TeamRunResult> {
|
||||
const agentConfigs = team.getAgents()
|
||||
const coordinatorOverrides = options?.coordinator
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Short-circuit: skip coordinator for simple, single-action goals.
|
||||
//
|
||||
// When the goal is short and contains no multi-step / coordination
|
||||
// signals, dispatching it to a single agent is faster and cheaper
|
||||
// than spinning up a coordinator for decomposition + synthesis.
|
||||
//
|
||||
// The best-matching agent is selected via keyword affinity scoring
|
||||
// (same algorithm as the `capability-match` scheduler strategy).
|
||||
// ------------------------------------------------------------------
|
||||
if (agentConfigs.length > 0 && isSimpleGoal(goal)) {
|
||||
const bestAgent = selectBestAgent(goal, agentConfigs)
|
||||
|
||||
// Use buildAgent() + agent.run() directly instead of this.runAgent()
|
||||
// to avoid duplicate progress events and double completedTaskCount.
|
||||
// Events are emitted here; counting is handled by buildTeamRunResult().
|
||||
const effectiveBudget = resolveTokenBudget(bestAgent.maxTokenBudget, this.config.maxTokenBudget)
|
||||
const effective: AgentConfig = {
|
||||
...bestAgent,
|
||||
provider: bestAgent.provider ?? this.config.defaultProvider,
|
||||
baseURL: bestAgent.baseURL ?? this.config.defaultBaseURL,
|
||||
apiKey: bestAgent.apiKey ?? this.config.defaultApiKey,
|
||||
maxTokenBudget: effectiveBudget,
|
||||
}
|
||||
const agent = buildAgent(effective)
|
||||
|
||||
this.config.onProgress?.({
|
||||
type: 'agent_start',
|
||||
agent: bestAgent.name,
|
||||
data: { phase: 'short-circuit', goal },
|
||||
})
|
||||
|
||||
const traceFields = this.config.onTrace
|
||||
? { onTrace: this.config.onTrace, runId: generateRunId(), traceAgent: bestAgent.name }
|
||||
: null
|
||||
const abortFields = options?.abortSignal ? { abortSignal: options.abortSignal } : null
|
||||
const runOptions: Partial<RunOptions> | undefined =
|
||||
traceFields || abortFields
|
||||
? { ...(traceFields ?? {}), ...(abortFields ?? {}) }
|
||||
: undefined
|
||||
|
||||
const result = await agent.run(goal, runOptions)
|
||||
|
||||
if (result.budgetExceeded) {
|
||||
this.config.onProgress?.({
|
||||
type: 'budget_exceeded',
|
||||
agent: bestAgent.name,
|
||||
data: new TokenBudgetExceededError(
|
||||
bestAgent.name,
|
||||
result.tokenUsage.input_tokens + result.tokenUsage.output_tokens,
|
||||
effectiveBudget ?? 0,
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
this.config.onProgress?.({
|
||||
type: 'agent_complete',
|
||||
agent: bestAgent.name,
|
||||
data: { phase: 'short-circuit', result },
|
||||
})
|
||||
|
||||
const agentResults = new Map<string, AgentRunResult>()
|
||||
agentResults.set(bestAgent.name, result)
|
||||
return this.buildTeamRunResult(agentResults)
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Step 1: Coordinator decomposes goal into tasks
|
||||
// ------------------------------------------------------------------
|
||||
const coordinatorConfig: AgentConfig = {
|
||||
name: 'coordinator',
|
||||
model: this.config.defaultModel,
|
||||
provider: this.config.defaultProvider,
|
||||
baseURL: this.config.defaultBaseURL,
|
||||
apiKey: this.config.defaultApiKey,
|
||||
systemPrompt: this.buildCoordinatorSystemPrompt(agentConfigs),
|
||||
maxTurns: 3,
|
||||
model: coordinatorOverrides?.model ?? this.config.defaultModel,
|
||||
provider: coordinatorOverrides?.provider ?? this.config.defaultProvider,
|
||||
baseURL: coordinatorOverrides?.baseURL ?? this.config.defaultBaseURL,
|
||||
apiKey: coordinatorOverrides?.apiKey ?? this.config.defaultApiKey,
|
||||
systemPrompt: this.buildCoordinatorPrompt(agentConfigs, coordinatorOverrides),
|
||||
maxTurns: coordinatorOverrides?.maxTurns ?? 3,
|
||||
maxTokens: coordinatorOverrides?.maxTokens,
|
||||
temperature: coordinatorOverrides?.temperature,
|
||||
toolPreset: coordinatorOverrides?.toolPreset,
|
||||
tools: coordinatorOverrides?.tools,
|
||||
disallowedTools: coordinatorOverrides?.disallowedTools,
|
||||
loopDetection: coordinatorOverrides?.loopDetection,
|
||||
timeoutMs: coordinatorOverrides?.timeoutMs,
|
||||
}
|
||||
|
||||
const decompositionPrompt = this.buildDecompositionPrompt(goal, agentConfigs)
|
||||
|
|
@ -665,11 +960,29 @@ export class OpenMultiAgent {
|
|||
})
|
||||
|
||||
const decompTraceOptions: Partial<RunOptions> | undefined = this.config.onTrace
|
||||
? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator' }
|
||||
: undefined
|
||||
? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator', abortSignal: options?.abortSignal }
|
||||
: options?.abortSignal ? { abortSignal: options.abortSignal } : undefined
|
||||
const decompositionResult = await coordinatorAgent.run(decompositionPrompt, decompTraceOptions)
|
||||
const agentResults = new Map<string, AgentRunResult>()
|
||||
agentResults.set('coordinator:decompose', decompositionResult)
|
||||
const maxTokenBudget = this.config.maxTokenBudget
|
||||
let cumulativeUsage = addUsage(ZERO_USAGE, decompositionResult.tokenUsage)
|
||||
|
||||
if (
|
||||
maxTokenBudget !== undefined
|
||||
&& cumulativeUsage.input_tokens + cumulativeUsage.output_tokens > maxTokenBudget
|
||||
) {
|
||||
this.config.onProgress?.({
|
||||
type: 'budget_exceeded',
|
||||
agent: 'coordinator',
|
||||
data: new TokenBudgetExceededError(
|
||||
'coordinator',
|
||||
cumulativeUsage.input_tokens + cumulativeUsage.output_tokens,
|
||||
maxTokenBudget,
|
||||
),
|
||||
})
|
||||
return this.buildTeamRunResult(agentResults)
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Step 2: Parse tasks from coordinator output
|
||||
|
|
@ -712,19 +1025,46 @@ export class OpenMultiAgent {
|
|||
agentResults,
|
||||
config: this.config,
|
||||
runId,
|
||||
abortSignal: options?.abortSignal,
|
||||
cumulativeUsage,
|
||||
maxTokenBudget,
|
||||
budgetExceededTriggered: false,
|
||||
budgetExceededReason: undefined,
|
||||
}
|
||||
|
||||
await executeQueue(queue, ctx)
|
||||
cumulativeUsage = ctx.cumulativeUsage
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Step 5: Coordinator synthesises final result
|
||||
// ------------------------------------------------------------------
|
||||
if (
|
||||
maxTokenBudget !== undefined
|
||||
&& cumulativeUsage.input_tokens + cumulativeUsage.output_tokens > maxTokenBudget
|
||||
) {
|
||||
return this.buildTeamRunResult(agentResults)
|
||||
}
|
||||
const synthesisPrompt = await this.buildSynthesisPrompt(goal, queue.list(), team)
|
||||
const synthTraceOptions: Partial<RunOptions> | undefined = this.config.onTrace
|
||||
? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator' }
|
||||
: undefined
|
||||
const synthesisResult = await coordinatorAgent.run(synthesisPrompt, synthTraceOptions)
|
||||
agentResults.set('coordinator', synthesisResult)
|
||||
cumulativeUsage = addUsage(cumulativeUsage, synthesisResult.tokenUsage)
|
||||
if (
|
||||
maxTokenBudget !== undefined
|
||||
&& cumulativeUsage.input_tokens + cumulativeUsage.output_tokens > maxTokenBudget
|
||||
) {
|
||||
this.config.onProgress?.({
|
||||
type: 'budget_exceeded',
|
||||
agent: 'coordinator',
|
||||
data: new TokenBudgetExceededError(
|
||||
'coordinator',
|
||||
cumulativeUsage.input_tokens + cumulativeUsage.output_tokens,
|
||||
maxTokenBudget,
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
this.config.onProgress?.({
|
||||
type: 'agent_complete',
|
||||
|
|
@ -760,10 +1100,12 @@ export class OpenMultiAgent {
|
|||
description: string
|
||||
assignee?: string
|
||||
dependsOn?: string[]
|
||||
memoryScope?: 'dependencies' | 'all'
|
||||
maxRetries?: number
|
||||
retryDelayMs?: number
|
||||
retryBackoff?: number
|
||||
}>,
|
||||
options?: { abortSignal?: AbortSignal },
|
||||
): Promise<TeamRunResult> {
|
||||
const agentConfigs = team.getAgents()
|
||||
const queue = new TaskQueue()
|
||||
|
|
@ -775,6 +1117,7 @@ export class OpenMultiAgent {
|
|||
description: t.description,
|
||||
assignee: t.assignee,
|
||||
dependsOn: t.dependsOn,
|
||||
memoryScope: t.memoryScope,
|
||||
maxRetries: t.maxRetries,
|
||||
retryDelayMs: t.retryDelayMs,
|
||||
retryBackoff: t.retryBackoff,
|
||||
|
|
@ -794,6 +1137,11 @@ export class OpenMultiAgent {
|
|||
agentResults,
|
||||
config: this.config,
|
||||
runId: this.config.onTrace ? generateRunId() : undefined,
|
||||
abortSignal: options?.abortSignal,
|
||||
cumulativeUsage: ZERO_USAGE,
|
||||
maxTokenBudget: this.config.maxTokenBudget,
|
||||
budgetExceededTriggered: false,
|
||||
budgetExceededReason: undefined,
|
||||
}
|
||||
|
||||
await executeQueue(queue, ctx)
|
||||
|
|
@ -845,6 +1193,47 @@ export class OpenMultiAgent {
|
|||
|
||||
/** Build the system prompt given to the coordinator agent. */
|
||||
private buildCoordinatorSystemPrompt(agents: AgentConfig[]): string {
|
||||
return [
|
||||
'You are a task coordinator responsible for decomposing high-level goals',
|
||||
'into concrete, actionable tasks and assigning them to the right team members.',
|
||||
'',
|
||||
this.buildCoordinatorRosterSection(agents),
|
||||
'',
|
||||
this.buildCoordinatorOutputFormatSection(),
|
||||
'',
|
||||
this.buildCoordinatorSynthesisSection(),
|
||||
].join('\n')
|
||||
}
|
||||
|
||||
/** Build coordinator system prompt with optional caller overrides. */
|
||||
private buildCoordinatorPrompt(agents: AgentConfig[], config?: CoordinatorConfig): string {
|
||||
if (config?.systemPrompt) {
|
||||
return [
|
||||
config.systemPrompt,
|
||||
'',
|
||||
this.buildCoordinatorRosterSection(agents),
|
||||
'',
|
||||
this.buildCoordinatorOutputFormatSection(),
|
||||
'',
|
||||
this.buildCoordinatorSynthesisSection(),
|
||||
].join('\n')
|
||||
}
|
||||
|
||||
const base = this.buildCoordinatorSystemPrompt(agents)
|
||||
if (!config?.instructions) {
|
||||
return base
|
||||
}
|
||||
|
||||
return [
|
||||
base,
|
||||
'',
|
||||
'## Additional Instructions',
|
||||
config.instructions,
|
||||
].join('\n')
|
||||
}
|
||||
|
||||
/** Build the coordinator team roster section. */
|
||||
private buildCoordinatorRosterSection(agents: AgentConfig[]): string {
|
||||
const roster = agents
|
||||
.map(
|
||||
(a) =>
|
||||
|
|
@ -853,12 +1242,14 @@ export class OpenMultiAgent {
|
|||
.join('\n')
|
||||
|
||||
return [
|
||||
'You are a task coordinator responsible for decomposing high-level goals',
|
||||
'into concrete, actionable tasks and assigning them to the right team members.',
|
||||
'',
|
||||
'## Team Roster',
|
||||
roster,
|
||||
'',
|
||||
].join('\n')
|
||||
}
|
||||
|
||||
/** Build the coordinator JSON output-format section. */
|
||||
private buildCoordinatorOutputFormatSection(): string {
|
||||
return [
|
||||
'## Output Format',
|
||||
'When asked to decompose a goal, respond ONLY with a JSON array of task objects.',
|
||||
'Each task must have:',
|
||||
|
|
@ -869,7 +1260,12 @@ export class OpenMultiAgent {
|
|||
'',
|
||||
'Wrap the JSON in a ```json code fence.',
|
||||
'Do not include any text outside the code fence.',
|
||||
'',
|
||||
].join('\n')
|
||||
}
|
||||
|
||||
/** Build the coordinator synthesis guidance section. */
|
||||
private buildCoordinatorSynthesisSection(): string {
|
||||
return [
|
||||
'## When synthesising results',
|
||||
'You will be given completed task outputs and asked to synthesise a final answer.',
|
||||
'Write a clear, comprehensive response that addresses the original goal.',
|
||||
|
|
@ -943,6 +1339,7 @@ export class OpenMultiAgent {
|
|||
*/
|
||||
private loadSpecsIntoQueue(
|
||||
specs: ReadonlyArray<ParsedTaskSpec & {
|
||||
memoryScope?: 'dependencies' | 'all'
|
||||
maxRetries?: number
|
||||
retryDelayMs?: number
|
||||
retryBackoff?: number
|
||||
|
|
@ -963,6 +1360,7 @@ export class OpenMultiAgent {
|
|||
assignee: spec.assignee && agentNames.has(spec.assignee)
|
||||
? spec.assignee
|
||||
: undefined,
|
||||
memoryScope: spec.memoryScope,
|
||||
maxRetries: spec.maxRetries,
|
||||
retryDelayMs: spec.retryDelayMs,
|
||||
retryBackoff: spec.retryBackoff,
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
|
||||
import type { AgentConfig, Task } from '../types.js'
|
||||
import type { TaskQueue } from '../task/queue.js'
|
||||
import { extractKeywords, keywordScore } from '../utils/keywords.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public types
|
||||
|
|
@ -74,38 +75,6 @@ function countBlockedDependents(taskId: string, allTasks: Task[]): number {
|
|||
return visited.size
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a simple keyword-overlap score between `text` and `keywords`.
|
||||
*
|
||||
* Both the text and keywords are normalised to lower-case before comparison.
|
||||
* Each keyword that appears in the text contributes +1 to the score.
|
||||
*/
|
||||
function keywordScore(text: string, keywords: string[]): number {
|
||||
const lower = text.toLowerCase()
|
||||
return keywords.reduce((acc, kw) => acc + (lower.includes(kw.toLowerCase()) ? 1 : 0), 0)
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a list of meaningful keywords from a string for capability matching.
|
||||
*
|
||||
* Strips common stop-words so that incidental matches (e.g. "the", "and") do
|
||||
* not inflate scores. Returns unique words longer than three characters.
|
||||
*/
|
||||
function extractKeywords(text: string): string[] {
|
||||
const STOP_WORDS = new Set([
|
||||
'the', 'and', 'for', 'that', 'this', 'with', 'are', 'from', 'have',
|
||||
'will', 'your', 'you', 'can', 'all', 'each', 'when', 'then', 'they',
|
||||
'them', 'their', 'about', 'into', 'more', 'also', 'should', 'must',
|
||||
])
|
||||
|
||||
return [...new Set(
|
||||
text
|
||||
.toLowerCase()
|
||||
.split(/\W+/)
|
||||
.filter((w) => w.length > 3 && !STOP_WORDS.has(w)),
|
||||
)]
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Scheduler
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -289,6 +289,11 @@ export class TaskQueue {
|
|||
return this.list().filter((t) => t.status === status)
|
||||
}
|
||||
|
||||
/** Returns a task by ID, if present. */
|
||||
get(taskId: string): Task | undefined {
|
||||
return this.tasks.get(taskId)
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` when every task in the queue has reached a terminal state
|
||||
* (`'completed'`, `'failed'`, or `'skipped'`), **or** the queue is empty.
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ export function createTask(input: {
|
|||
description: string
|
||||
assignee?: string
|
||||
dependsOn?: string[]
|
||||
memoryScope?: 'dependencies' | 'all'
|
||||
maxRetries?: number
|
||||
retryDelayMs?: number
|
||||
retryBackoff?: number
|
||||
|
|
@ -43,6 +44,7 @@ export function createTask(input: {
|
|||
status: 'pending' as TaskStatus,
|
||||
assignee: input.assignee,
|
||||
dependsOn: input.dependsOn ? [...input.dependsOn] : undefined,
|
||||
memoryScope: input.memoryScope,
|
||||
result: undefined,
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,97 @@
|
|||
/**
|
||||
* Shared recursive directory walk for built-in file tools.
|
||||
*
|
||||
* Used by {@link grepTool} and {@link globTool} so glob filtering and skip
|
||||
* rules stay consistent.
|
||||
*/
|
||||
|
||||
import { readdir, stat } from 'fs/promises'
|
||||
import { join } from 'path'
|
||||
|
||||
/** Directories that are almost never useful to traverse for code search. */
|
||||
export const SKIP_DIRS = new Set([
|
||||
'.git',
|
||||
'.svn',
|
||||
'.hg',
|
||||
'node_modules',
|
||||
'.next',
|
||||
'dist',
|
||||
'build',
|
||||
])
|
||||
|
||||
export interface CollectFilesOptions {
|
||||
/** When set, stop collecting once this many paths are gathered. */
|
||||
readonly maxFiles?: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively walk `dir` and return file paths, honouring {@link SKIP_DIRS}
|
||||
* and an optional filename glob pattern.
|
||||
*/
|
||||
export async function collectFiles(
|
||||
dir: string,
|
||||
glob: string | undefined,
|
||||
signal: AbortSignal | undefined,
|
||||
options?: CollectFilesOptions,
|
||||
): Promise<string[]> {
|
||||
const results: string[] = []
|
||||
await walk(dir, glob, results, signal, options?.maxFiles)
|
||||
return results
|
||||
}
|
||||
|
||||
async function walk(
|
||||
dir: string,
|
||||
glob: string | undefined,
|
||||
results: string[],
|
||||
signal: AbortSignal | undefined,
|
||||
maxFiles: number | undefined,
|
||||
): Promise<void> {
|
||||
if (signal?.aborted === true) return
|
||||
if (maxFiles !== undefined && results.length >= maxFiles) return
|
||||
|
||||
let entryNames: string[]
|
||||
try {
|
||||
entryNames = await readdir(dir, { encoding: 'utf8' })
|
||||
} catch {
|
||||
return
|
||||
}
|
||||
|
||||
for (const entryName of entryNames) {
|
||||
if (signal !== undefined && signal.aborted) return
|
||||
if (maxFiles !== undefined && results.length >= maxFiles) return
|
||||
|
||||
const fullPath = join(dir, entryName)
|
||||
|
||||
let entryInfo: Awaited<ReturnType<typeof stat>>
|
||||
try {
|
||||
entryInfo = await stat(fullPath)
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
|
||||
if (entryInfo.isDirectory()) {
|
||||
if (!SKIP_DIRS.has(entryName)) {
|
||||
await walk(fullPath, glob, results, signal, maxFiles)
|
||||
}
|
||||
} else if (entryInfo.isFile()) {
|
||||
if (glob === undefined || matchesGlob(entryName, glob)) {
|
||||
results.push(fullPath)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Minimal glob match supporting `*.ext` and `**<pattern>` forms.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
export function matchesGlob(filename: string, glob: string): boolean {
|
||||
const pattern = glob.startsWith('**/') ? glob.slice(3) : glob
|
||||
const regexSource = pattern
|
||||
.replace(/[.+^${}()|[\]\\]/g, '\\$&')
|
||||
.replace(/\*/g, '.*')
|
||||
.replace(/\?/g, '.')
|
||||
const re = new RegExp(`^${regexSource}$`, 'i')
|
||||
return re.test(filename)
|
||||
}
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
/**
|
||||
* Built-in glob tool.
|
||||
*
|
||||
* Lists file paths under a directory matching an optional filename glob.
|
||||
* Does not read file contents — use {@link grepTool} to search inside files.
|
||||
*/
|
||||
|
||||
import { stat } from 'fs/promises'
|
||||
import { basename, relative } from 'path'
|
||||
import { z } from 'zod'
|
||||
import type { ToolResult } from '../../types.js'
|
||||
import { collectFiles, matchesGlob } from './fs-walk.js'
|
||||
import { defineTool } from '../framework.js'
|
||||
|
||||
const DEFAULT_MAX_FILES = 500
|
||||
|
||||
export const globTool = defineTool({
|
||||
name: 'glob',
|
||||
description:
|
||||
'List file paths under a directory that match an optional filename glob. ' +
|
||||
'Does not read file contents — use `grep` to search inside files. ' +
|
||||
'Skips common bulky directories (node_modules, .git, dist, etc.). ' +
|
||||
'Paths in the result are relative to the process working directory. ' +
|
||||
'Results are capped by `maxFiles`.',
|
||||
|
||||
inputSchema: z.object({
|
||||
path: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe(
|
||||
'Directory to list files under. Defaults to the current working directory.',
|
||||
),
|
||||
pattern: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe(
|
||||
'Filename glob (e.g. "*.ts", "**/*.json"). When omitted, every file ' +
|
||||
'under the directory is listed (subject to maxFiles and skipped dirs).',
|
||||
),
|
||||
maxFiles: z
|
||||
.number()
|
||||
.int()
|
||||
.positive()
|
||||
.optional()
|
||||
.describe(
|
||||
`Maximum number of file paths to return. Defaults to ${DEFAULT_MAX_FILES}.`,
|
||||
),
|
||||
}),
|
||||
|
||||
execute: async (input, context): Promise<ToolResult> => {
|
||||
const root = input.path ?? process.cwd()
|
||||
const maxFiles = input.maxFiles ?? DEFAULT_MAX_FILES
|
||||
const signal = context.abortSignal
|
||||
|
||||
let linesOut: string[]
|
||||
let truncated = false
|
||||
|
||||
try {
|
||||
const info = await stat(root)
|
||||
if (info.isFile()) {
|
||||
const name = basename(root)
|
||||
if (
|
||||
input.pattern !== undefined &&
|
||||
!matchesGlob(name, input.pattern)
|
||||
) {
|
||||
return { data: 'No files matched.', isError: false }
|
||||
}
|
||||
linesOut = [relative(process.cwd(), root) || root]
|
||||
} else {
|
||||
const collected = await collectFiles(root, input.pattern, signal, {
|
||||
maxFiles: maxFiles + 1,
|
||||
})
|
||||
truncated = collected.length > maxFiles
|
||||
const capped = collected.slice(0, maxFiles)
|
||||
linesOut = capped.map((f) => relative(process.cwd(), f) || f)
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : 'Unknown error'
|
||||
return {
|
||||
data: `Cannot access path "${root}": ${message}`,
|
||||
isError: true,
|
||||
}
|
||||
}
|
||||
|
||||
if (linesOut.length === 0) {
|
||||
return { data: 'No files matched.', isError: false }
|
||||
}
|
||||
|
||||
const sorted = [...linesOut].sort((a, b) => a.localeCompare(b))
|
||||
const truncationNote = truncated
|
||||
? `\n\n(listing capped at ${maxFiles} paths; raise maxFiles for more)`
|
||||
: ''
|
||||
|
||||
return {
|
||||
data: sorted.join('\n') + truncationNote,
|
||||
isError: false,
|
||||
}
|
||||
},
|
||||
})
|
||||
|
|
@ -8,28 +8,18 @@
|
|||
*/
|
||||
|
||||
import { spawn } from 'child_process'
|
||||
import { readdir, readFile, stat } from 'fs/promises'
|
||||
// Note: readdir is used with { encoding: 'utf8' } to return string[] directly.
|
||||
import { join, relative } from 'path'
|
||||
import { readFile, stat } from 'fs/promises'
|
||||
import { relative } from 'path'
|
||||
import { z } from 'zod'
|
||||
import type { ToolResult } from '../../types.js'
|
||||
import { defineTool } from '../framework.js'
|
||||
import { collectFiles } from './fs-walk.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const DEFAULT_MAX_RESULTS = 100
|
||||
// Directories that are almost never useful to search inside
|
||||
const SKIP_DIRS = new Set([
|
||||
'.git',
|
||||
'.svn',
|
||||
'.hg',
|
||||
'node_modules',
|
||||
'.next',
|
||||
'dist',
|
||||
'build',
|
||||
])
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tool definition
|
||||
|
|
@ -42,6 +32,7 @@ export const grepTool = defineTool({
|
|||
'Returns matching lines with their file paths and 1-based line numbers. ' +
|
||||
'Use the `glob` parameter to restrict the search to specific file types ' +
|
||||
'(e.g. "*.ts"). ' +
|
||||
'To list matching file paths without reading contents, use the `glob` tool. ' +
|
||||
'Results are capped by `maxResults` to keep the response manageable.',
|
||||
|
||||
inputSchema: z.object({
|
||||
|
|
@ -270,79 +261,6 @@ async function runNodeSearch(
|
|||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// File collection with glob filtering
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Recursively walk `dir` and return file paths, honouring `SKIP_DIRS` and an
|
||||
* optional glob pattern.
|
||||
*/
|
||||
async function collectFiles(
|
||||
dir: string,
|
||||
glob: string | undefined,
|
||||
signal: AbortSignal | undefined,
|
||||
): Promise<string[]> {
|
||||
const results: string[] = []
|
||||
await walk(dir, glob, results, signal)
|
||||
return results
|
||||
}
|
||||
|
||||
async function walk(
|
||||
dir: string,
|
||||
glob: string | undefined,
|
||||
results: string[],
|
||||
signal: AbortSignal | undefined,
|
||||
): Promise<void> {
|
||||
if (signal?.aborted === true) return
|
||||
|
||||
let entryNames: string[]
|
||||
try {
|
||||
// Read as plain strings so we don't have to deal with Buffer Dirent variants.
|
||||
entryNames = await readdir(dir, { encoding: 'utf8' })
|
||||
} catch {
|
||||
return
|
||||
}
|
||||
|
||||
for (const entryName of entryNames) {
|
||||
if (signal !== undefined && signal.aborted) return
|
||||
|
||||
const fullPath = join(dir, entryName)
|
||||
|
||||
let entryInfo: Awaited<ReturnType<typeof stat>>
|
||||
try {
|
||||
entryInfo = await stat(fullPath)
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
|
||||
if (entryInfo.isDirectory()) {
|
||||
if (!SKIP_DIRS.has(entryName)) {
|
||||
await walk(fullPath, glob, results, signal)
|
||||
}
|
||||
} else if (entryInfo.isFile()) {
|
||||
if (glob === undefined || matchesGlob(entryName, glob)) {
|
||||
results.push(fullPath)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Minimal glob match supporting `*.ext` and `**\/<pattern>` forms.
|
||||
*/
|
||||
function matchesGlob(filename: string, glob: string): boolean {
|
||||
// Strip leading **/ prefix — we already recurse into all directories
|
||||
const pattern = glob.startsWith('**/') ? glob.slice(3) : glob
|
||||
// Convert shell glob characters to regex equivalents
|
||||
const regexSource = pattern
|
||||
.replace(/[.+^${}()|[\]\\]/g, '\\$&') // escape special regex chars first
|
||||
.replace(/\*/g, '.*') // * -> .*
|
||||
.replace(/\?/g, '.') // ? -> .
|
||||
const re = new RegExp(`^${regexSource}$`, 'i')
|
||||
return re.test(filename)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ripgrep availability check (cached per process)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -11,9 +11,10 @@ import { bashTool } from './bash.js'
|
|||
import { fileEditTool } from './file-edit.js'
|
||||
import { fileReadTool } from './file-read.js'
|
||||
import { fileWriteTool } from './file-write.js'
|
||||
import { globTool } from './glob.js'
|
||||
import { grepTool } from './grep.js'
|
||||
|
||||
export { bashTool, fileEditTool, fileReadTool, fileWriteTool, grepTool }
|
||||
export { bashTool, fileEditTool, fileReadTool, fileWriteTool, globTool, grepTool }
|
||||
|
||||
/**
|
||||
* The ordered list of all built-in tools. Import this when you need to
|
||||
|
|
@ -29,6 +30,7 @@ export const BUILT_IN_TOOLS: ToolDefinition<any>[] = [
|
|||
fileWriteTool,
|
||||
fileEditTool,
|
||||
grepTool,
|
||||
globTool,
|
||||
]
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -24,6 +24,11 @@ export interface ToolExecutorOptions {
|
|||
* Defaults to 4.
|
||||
*/
|
||||
maxConcurrency?: number
|
||||
/**
|
||||
* Agent-level default for maximum tool output length in characters.
|
||||
* Per-tool `maxOutputChars` takes priority over this value.
|
||||
*/
|
||||
maxToolOutputChars?: number
|
||||
}
|
||||
|
||||
/** Describes one call in a batch. */
|
||||
|
|
@ -47,10 +52,12 @@ export interface BatchToolCall {
|
|||
export class ToolExecutor {
|
||||
private readonly registry: ToolRegistry
|
||||
private readonly semaphore: Semaphore
|
||||
private readonly maxToolOutputChars?: number
|
||||
|
||||
constructor(registry: ToolRegistry, options: ToolExecutorOptions = {}) {
|
||||
this.registry = registry
|
||||
this.semaphore = new Semaphore(options.maxConcurrency ?? 4)
|
||||
this.maxToolOutputChars = options.maxToolOutputChars
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
|
|
@ -156,7 +163,7 @@ export class ToolExecutor {
|
|||
// --- Execute ---
|
||||
try {
|
||||
const result = await tool.execute(parseResult.data, context)
|
||||
return result
|
||||
return this.maybeTruncate(tool, result)
|
||||
} catch (err) {
|
||||
const message =
|
||||
err instanceof Error
|
||||
|
|
@ -164,10 +171,26 @@ export class ToolExecutor {
|
|||
: typeof err === 'string'
|
||||
? err
|
||||
: JSON.stringify(err)
|
||||
return this.errorResult(`Tool "${tool.name}" threw an error: ${message}`)
|
||||
return this.maybeTruncate(tool, this.errorResult(`Tool "${tool.name}" threw an error: ${message}`))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply truncation to a tool result if a character limit is configured.
|
||||
* Priority: per-tool `maxOutputChars` > agent-level `maxToolOutputChars`.
|
||||
*/
|
||||
private maybeTruncate(
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
tool: ToolDefinition<any>,
|
||||
result: ToolResult,
|
||||
): ToolResult {
|
||||
const maxChars = tool.maxOutputChars ?? this.maxToolOutputChars
|
||||
if (maxChars === undefined || maxChars <= 0 || result.data.length <= maxChars) {
|
||||
return result
|
||||
}
|
||||
return { ...result, data: truncateToolOutput(result.data, maxChars) }
|
||||
}
|
||||
|
||||
/** Construct an error ToolResult. */
|
||||
private errorResult(message: string): ToolResult {
|
||||
return {
|
||||
|
|
@ -176,3 +199,37 @@ export class ToolExecutor {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Truncation helper
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Truncate tool output to fit within `maxChars`, preserving the head (~70%)
|
||||
* and tail (~30%) with a marker indicating how many characters were removed.
|
||||
*
|
||||
* The marker itself is counted against the budget so the returned string
|
||||
* never exceeds `maxChars`. When `maxChars` is too small to fit any
|
||||
* content alongside the marker, a marker-only string is returned.
|
||||
*/
|
||||
export function truncateToolOutput(data: string, maxChars: number): string {
|
||||
if (data.length <= maxChars) return data
|
||||
|
||||
// Estimate marker length (digit count may shrink after subtracting content,
|
||||
// but using data.length gives a safe upper-bound for the digit count).
|
||||
const markerTemplate = '\n\n[...truncated characters...]\n\n'
|
||||
const markerOverhead = markerTemplate.length + String(data.length).length
|
||||
|
||||
// When maxChars is too small to fit any content alongside the marker,
|
||||
// fall back to a hard slice so the result never exceeds maxChars.
|
||||
if (maxChars <= markerOverhead) {
|
||||
return data.slice(0, maxChars)
|
||||
}
|
||||
|
||||
const available = maxChars - markerOverhead
|
||||
const headChars = Math.floor(available * 0.7)
|
||||
const tailChars = available - headChars
|
||||
const truncatedCount = data.length - headChars - tailChars
|
||||
|
||||
return `${data.slice(0, headChars)}\n\n[...truncated ${truncatedCount} characters...]\n\n${data.slice(-tailChars)}`
|
||||
}
|
||||
|
|
|
|||
|
|
@ -72,12 +72,28 @@ export function defineTool<TInput>(config: {
|
|||
name: string
|
||||
description: string
|
||||
inputSchema: ZodSchema<TInput>
|
||||
/**
|
||||
* Optional JSON Schema for the LLM (bypasses Zod → JSON Schema conversion).
|
||||
*/
|
||||
llmInputSchema?: Record<string, unknown>
|
||||
/**
|
||||
* Per-tool maximum output length in characters. When set, tool output
|
||||
* exceeding this limit is truncated (head + tail with a marker in between).
|
||||
* Takes priority over agent-level `maxToolOutputChars`.
|
||||
*/
|
||||
maxOutputChars?: number
|
||||
execute: (input: TInput, context: ToolUseContext) => Promise<ToolResult>
|
||||
}): ToolDefinition<TInput> {
|
||||
return {
|
||||
name: config.name,
|
||||
description: config.description,
|
||||
inputSchema: config.inputSchema,
|
||||
...(config.llmInputSchema !== undefined
|
||||
? { llmInputSchema: config.llmInputSchema }
|
||||
: {}),
|
||||
...(config.maxOutputChars !== undefined
|
||||
? { maxOutputChars: config.maxOutputChars }
|
||||
: {}),
|
||||
execute: config.execute,
|
||||
}
|
||||
}
|
||||
|
|
@ -93,13 +109,17 @@ export function defineTool<TInput>(config: {
|
|||
export class ToolRegistry {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
private readonly tools = new Map<string, ToolDefinition<any>>()
|
||||
private readonly runtimeToolNames = new Set<string>()
|
||||
|
||||
/**
|
||||
* Add a tool to the registry. Throws if a tool with the same name has
|
||||
* already been registered — prevents silent overwrites.
|
||||
*/
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
register(tool: ToolDefinition<any>): void {
|
||||
register(
|
||||
tool: ToolDefinition<any>,
|
||||
options?: { runtimeAdded?: boolean },
|
||||
): void {
|
||||
if (this.tools.has(tool.name)) {
|
||||
throw new Error(
|
||||
`ToolRegistry: a tool named "${tool.name}" is already registered. ` +
|
||||
|
|
@ -107,6 +127,9 @@ export class ToolRegistry {
|
|||
)
|
||||
}
|
||||
this.tools.set(tool.name, tool)
|
||||
if (options?.runtimeAdded === true) {
|
||||
this.runtimeToolNames.add(tool.name)
|
||||
}
|
||||
}
|
||||
|
||||
/** Return a tool by name, or `undefined` if not found. */
|
||||
|
|
@ -147,11 +170,12 @@ export class ToolRegistry {
|
|||
*/
|
||||
unregister(name: string): void {
|
||||
this.tools.delete(name)
|
||||
this.runtimeToolNames.delete(name)
|
||||
}
|
||||
|
||||
/** Alias for {@link unregister} — available for symmetry with `register`. */
|
||||
deregister(name: string): void {
|
||||
this.tools.delete(name)
|
||||
this.unregister(name)
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -161,7 +185,8 @@ export class ToolRegistry {
|
|||
*/
|
||||
toToolDefs(): LLMToolDef[] {
|
||||
return Array.from(this.tools.values()).map((tool) => {
|
||||
const schema = zodToJsonSchema(tool.inputSchema)
|
||||
const schema =
|
||||
tool.llmInputSchema ?? zodToJsonSchema(tool.inputSchema)
|
||||
return {
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
|
|
@ -170,6 +195,14 @@ export class ToolRegistry {
|
|||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Return only tools that were added dynamically at runtime (e.g. via
|
||||
* `agent.addTool()`), in LLM definition format.
|
||||
*/
|
||||
toRuntimeToolDefs(): LLMToolDef[] {
|
||||
return this.toToolDefs().filter(tool => this.runtimeToolNames.has(tool.name))
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert all registered tools to the Anthropic-style `input_schema`
|
||||
* format. Prefer {@link toToolDefs} for normal use; this method is exposed
|
||||
|
|
@ -178,13 +211,20 @@ export class ToolRegistry {
|
|||
toLLMTools(): Array<{
|
||||
name: string
|
||||
description: string
|
||||
input_schema: {
|
||||
type: 'object'
|
||||
properties: Record<string, JSONSchemaProperty>
|
||||
required?: string[]
|
||||
}
|
||||
/** Anthropic-style tool input JSON Schema (`type` is usually `object`). */
|
||||
input_schema: Record<string, unknown>
|
||||
}> {
|
||||
return Array.from(this.tools.values()).map((tool) => {
|
||||
if (tool.llmInputSchema !== undefined) {
|
||||
return {
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
input_schema: {
|
||||
type: 'object' as const,
|
||||
...(tool.llmInputSchema as Record<string, unknown>),
|
||||
},
|
||||
}
|
||||
}
|
||||
const schema = zodToJsonSchema(tool.inputSchema)
|
||||
return {
|
||||
name: tool.name,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,296 @@
|
|||
import { z } from 'zod'
|
||||
import { defineTool } from './framework.js'
|
||||
import type { ToolDefinition } from '../types.js'
|
||||
|
||||
interface MCPToolDescriptor {
|
||||
name: string
|
||||
description?: string
|
||||
/** MCP tool JSON Schema; same shape LLM APIs expect for object parameters. */
|
||||
inputSchema?: Record<string, unknown>
|
||||
}
|
||||
|
||||
interface MCPListToolsResponse {
|
||||
tools?: MCPToolDescriptor[]
|
||||
nextCursor?: string
|
||||
}
|
||||
|
||||
interface MCPCallToolResponse {
|
||||
content?: Array<Record<string, unknown>>
|
||||
structuredContent?: unknown
|
||||
isError?: boolean
|
||||
toolResult?: unknown
|
||||
}
|
||||
|
||||
interface MCPClientLike {
|
||||
connect(transport: unknown, options?: { timeout?: number; signal?: AbortSignal }): Promise<void>
|
||||
listTools(
|
||||
params?: { cursor?: string },
|
||||
options?: { timeout?: number; signal?: AbortSignal },
|
||||
): Promise<MCPListToolsResponse>
|
||||
callTool(
|
||||
request: { name: string; arguments: Record<string, unknown> },
|
||||
resultSchema?: unknown,
|
||||
options?: { timeout?: number; signal?: AbortSignal },
|
||||
): Promise<MCPCallToolResponse>
|
||||
close?: () => Promise<void>
|
||||
}
|
||||
|
||||
type MCPClientConstructor = new (
|
||||
info: { name: string; version: string },
|
||||
options: { capabilities: Record<string, unknown> },
|
||||
) => MCPClientLike
|
||||
|
||||
type StdioTransportConstructor = new (config: {
|
||||
command: string
|
||||
args?: string[]
|
||||
env?: Record<string, string | undefined>
|
||||
cwd?: string
|
||||
}) => { close?: () => Promise<void> }
|
||||
|
||||
interface MCPModules {
|
||||
Client: MCPClientConstructor
|
||||
StdioClientTransport: StdioTransportConstructor
|
||||
}
|
||||
|
||||
const DEFAULT_MCP_REQUEST_TIMEOUT_MS = 60_000
|
||||
|
||||
async function loadMCPModules(): Promise<MCPModules> {
|
||||
const [{ Client }, { StdioClientTransport }] = await Promise.all([
|
||||
import('@modelcontextprotocol/sdk/client/index.js') as Promise<{
|
||||
Client: MCPClientConstructor
|
||||
}>,
|
||||
import('@modelcontextprotocol/sdk/client/stdio.js') as Promise<{
|
||||
StdioClientTransport: StdioTransportConstructor
|
||||
}>,
|
||||
])
|
||||
return { Client, StdioClientTransport }
|
||||
}
|
||||
|
||||
export interface ConnectMCPToolsConfig {
|
||||
command: string
|
||||
args?: string[]
|
||||
env?: Record<string, string | undefined>
|
||||
cwd?: string
|
||||
/**
|
||||
* Optional segment prepended to MCP tool names for the framework tool (and LLM) name.
|
||||
* Example: prefix `github` + MCP tool `search_issues` → `github_search_issues`.
|
||||
*/
|
||||
namePrefix?: string
|
||||
/**
|
||||
* Timeout (ms) for MCP connect and each `tools/list` page. Defaults to 60000.
|
||||
*/
|
||||
requestTimeoutMs?: number
|
||||
/**
|
||||
* Client metadata sent to the MCP server.
|
||||
*/
|
||||
clientName?: string
|
||||
clientVersion?: string
|
||||
}
|
||||
|
||||
export interface ConnectedMCPTools {
|
||||
tools: ToolDefinition[]
|
||||
disconnect: () => Promise<void>
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an LLM-safe tool name: MCP and prior examples used `prefix/name`, but
|
||||
* Anthropic and other providers reject `/` in tool names.
|
||||
*/
|
||||
function normalizeToolName(rawName: string, namePrefix?: string): string {
|
||||
const trimmedPrefix = namePrefix?.trim()
|
||||
const base =
|
||||
trimmedPrefix !== undefined && trimmedPrefix !== ''
|
||||
? `${trimmedPrefix}_${rawName}`
|
||||
: rawName
|
||||
return base.replace(/\//g, '_')
|
||||
}
|
||||
|
||||
/** MCP `tools/list` JSON Schema; forwarded to the LLM as-is (runtime validation stays `z.any()`). */
|
||||
function mcpLlmInputSchema(
|
||||
schema: Record<string, unknown> | undefined,
|
||||
): Record<string, unknown> {
|
||||
if (schema !== undefined && typeof schema === 'object' && !Array.isArray(schema)) {
|
||||
return schema
|
||||
}
|
||||
return { type: 'object' }
|
||||
}
|
||||
|
||||
function contentBlockToText(block: Record<string, unknown>): string | undefined {
|
||||
const typ = block.type
|
||||
if (typ === 'text' && typeof block.text === 'string') {
|
||||
return block.text
|
||||
}
|
||||
if (typ === 'image' && typeof block.data === 'string') {
|
||||
const mime =
|
||||
typeof block.mimeType === 'string' ? block.mimeType : 'image/*'
|
||||
return `[image ${mime}; base64 length=${block.data.length}]`
|
||||
}
|
||||
if (typ === 'audio' && typeof block.data === 'string') {
|
||||
const mime =
|
||||
typeof block.mimeType === 'string' ? block.mimeType : 'audio/*'
|
||||
return `[audio ${mime}; base64 length=${block.data.length}]`
|
||||
}
|
||||
if (
|
||||
typ === 'resource' &&
|
||||
block.resource !== null &&
|
||||
typeof block.resource === 'object'
|
||||
) {
|
||||
const r = block.resource as Record<string, unknown>
|
||||
const uri = typeof r.uri === 'string' ? r.uri : ''
|
||||
if (typeof r.text === 'string') {
|
||||
return `[resource ${uri}]\n${r.text}`
|
||||
}
|
||||
if (typeof r.blob === 'string') {
|
||||
const mime = typeof r.mimeType === 'string' ? r.mimeType : ''
|
||||
return `[resource ${uri}; mimeType=${mime}; blob base64 length=${r.blob.length}]`
|
||||
}
|
||||
return `[resource ${uri}]`
|
||||
}
|
||||
if (typ === 'resource_link') {
|
||||
const uri = typeof block.uri === 'string' ? block.uri : ''
|
||||
const name = typeof block.name === 'string' ? block.name : ''
|
||||
const desc =
|
||||
typeof block.description === 'string' ? block.description : ''
|
||||
const head = `[resource_link name=${JSON.stringify(name)} uri=${JSON.stringify(uri)}]`
|
||||
return desc === '' ? head : `${head}\n${desc}`
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
function toToolResultData(result: MCPCallToolResponse): string {
|
||||
if ('toolResult' in result && result.toolResult !== undefined) {
|
||||
try {
|
||||
return JSON.stringify(result.toolResult, null, 2)
|
||||
} catch {
|
||||
return String(result.toolResult)
|
||||
}
|
||||
}
|
||||
|
||||
const lines: string[] = []
|
||||
for (const block of result.content ?? []) {
|
||||
if (block === null || typeof block !== 'object') continue
|
||||
const rec = block as Record<string, unknown>
|
||||
const line = contentBlockToText(rec)
|
||||
if (line !== undefined) {
|
||||
lines.push(line)
|
||||
continue
|
||||
}
|
||||
try {
|
||||
lines.push(
|
||||
`[${String(rec.type ?? 'unknown')}]\n${JSON.stringify(rec, null, 2)}`,
|
||||
)
|
||||
} catch {
|
||||
lines.push('[mcp content block]')
|
||||
}
|
||||
}
|
||||
|
||||
if (lines.length > 0) {
|
||||
return lines.join('\n')
|
||||
}
|
||||
|
||||
if (result.structuredContent !== undefined) {
|
||||
try {
|
||||
return JSON.stringify(result.structuredContent, null, 2)
|
||||
} catch {
|
||||
return String(result.structuredContent)
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
return JSON.stringify(result)
|
||||
} catch {
|
||||
return 'MCP tool completed with non-text output.'
|
||||
}
|
||||
}
|
||||
|
||||
async function listAllMcpTools(
|
||||
client: MCPClientLike,
|
||||
requestOpts: { timeout: number },
|
||||
): Promise<MCPToolDescriptor[]> {
|
||||
const acc: MCPToolDescriptor[] = []
|
||||
let cursor: string | undefined
|
||||
do {
|
||||
const page = await client.listTools(
|
||||
cursor !== undefined ? { cursor } : {},
|
||||
requestOpts,
|
||||
)
|
||||
acc.push(...(page.tools ?? []))
|
||||
cursor =
|
||||
typeof page.nextCursor === 'string' && page.nextCursor !== ''
|
||||
? page.nextCursor
|
||||
: undefined
|
||||
} while (cursor !== undefined)
|
||||
return acc
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to an MCP server over stdio and convert exposed MCP tools into
|
||||
* open-multi-agent ToolDefinitions.
|
||||
*/
|
||||
export async function connectMCPTools(
|
||||
config: ConnectMCPToolsConfig,
|
||||
): Promise<ConnectedMCPTools> {
|
||||
const { Client, StdioClientTransport } = await loadMCPModules()
|
||||
|
||||
const transport = new StdioClientTransport({
|
||||
command: config.command,
|
||||
args: config.args ?? [],
|
||||
env: config.env,
|
||||
cwd: config.cwd,
|
||||
})
|
||||
|
||||
const client = new Client(
|
||||
{
|
||||
name: config.clientName ?? 'open-multi-agent',
|
||||
version: config.clientVersion ?? '0.0.0',
|
||||
},
|
||||
{ capabilities: {} },
|
||||
)
|
||||
|
||||
const requestOpts = {
|
||||
timeout: config.requestTimeoutMs ?? DEFAULT_MCP_REQUEST_TIMEOUT_MS,
|
||||
}
|
||||
|
||||
await client.connect(transport, requestOpts)
|
||||
|
||||
const mcpTools = await listAllMcpTools(client, requestOpts)
|
||||
|
||||
const tools: ToolDefinition[] = mcpTools.map((tool) =>
|
||||
defineTool({
|
||||
name: normalizeToolName(tool.name, config.namePrefix),
|
||||
description: tool.description ?? `MCP tool: ${tool.name}`,
|
||||
inputSchema: z.any(),
|
||||
llmInputSchema: mcpLlmInputSchema(tool.inputSchema),
|
||||
execute: async (input: Record<string, unknown>) => {
|
||||
try {
|
||||
const result = await client.callTool(
|
||||
{
|
||||
name: tool.name,
|
||||
arguments: input,
|
||||
},
|
||||
undefined,
|
||||
requestOpts,
|
||||
)
|
||||
return {
|
||||
data: toToolResultData(result),
|
||||
isError: result.isError === true,
|
||||
}
|
||||
} catch (error) {
|
||||
const message =
|
||||
error instanceof Error ? error.message : String(error)
|
||||
return {
|
||||
data: `MCP tool "${tool.name}" failed: ${message}`,
|
||||
isError: true,
|
||||
}
|
||||
}
|
||||
},
|
||||
}),
|
||||
)
|
||||
|
||||
return {
|
||||
tools,
|
||||
disconnect: async () => {
|
||||
await client.close?.()
|
||||
},
|
||||
}
|
||||
}
|
||||
132
src/types.ts
132
src/types.ts
|
|
@ -65,6 +65,31 @@ export interface LLMMessage {
|
|||
readonly content: ContentBlock[]
|
||||
}
|
||||
|
||||
/** Context management strategy for long-running agent conversations. */
|
||||
export type ContextStrategy =
|
||||
| { type: 'sliding-window'; maxTurns: number }
|
||||
| { type: 'summarize'; maxTokens: number; summaryModel?: string }
|
||||
| {
|
||||
type: 'compact'
|
||||
/** Estimated token threshold that triggers compaction. Compaction is skipped when below this. */
|
||||
maxTokens: number
|
||||
/** Number of recent turn pairs (assistant+user) to keep intact. Default: 4. */
|
||||
preserveRecentTurns?: number
|
||||
/** Minimum chars in a tool_result content to qualify for compaction. Default: 200. */
|
||||
minToolResultChars?: number
|
||||
/** Minimum chars in an assistant text block to qualify for truncation. Default: 2000. */
|
||||
minTextBlockChars?: number
|
||||
/** Maximum chars to keep from a truncated text block (head excerpt). Default: 200. */
|
||||
textBlockExcerptChars?: number
|
||||
}
|
||||
| {
|
||||
type: 'custom'
|
||||
compress: (
|
||||
messages: LLMMessage[],
|
||||
estimatedTokens: number,
|
||||
) => Promise<LLMMessage[]> | LLMMessage[]
|
||||
}
|
||||
|
||||
/** Token accounting for a single API call. */
|
||||
export interface TokenUsage {
|
||||
readonly input_tokens: number
|
||||
|
|
@ -90,11 +115,12 @@ export interface LLMResponse {
|
|||
* - `text` — incremental text delta
|
||||
* - `tool_use` — the model has begun or completed a tool-use block
|
||||
* - `tool_result` — a tool result has been appended to the stream
|
||||
* - `budget_exceeded` — token budget threshold reached for this run
|
||||
* - `done` — the stream has ended; `data` is the final {@link LLMResponse}
|
||||
* - `error` — an unrecoverable error occurred; `data` is an `Error`
|
||||
*/
|
||||
export interface StreamEvent {
|
||||
readonly type: 'text' | 'tool_use' | 'tool_result' | 'loop_detected' | 'done' | 'error'
|
||||
readonly type: 'text' | 'tool_use' | 'tool_result' | 'loop_detected' | 'budget_exceeded' | 'done' | 'error'
|
||||
readonly data: unknown
|
||||
}
|
||||
|
||||
|
|
@ -169,12 +195,24 @@ export interface ToolResult {
|
|||
* A tool registered with the framework.
|
||||
*
|
||||
* `inputSchema` is a Zod schema used for validation before `execute` is called.
|
||||
* At API call time it is converted to JSON Schema via {@link LLMToolDef}.
|
||||
* At API call time it is converted to JSON Schema for {@link LLMToolDef}, unless
|
||||
* `llmInputSchema` is set (e.g. MCP tools ship JSON Schema from the server).
|
||||
*/
|
||||
export interface ToolDefinition<TInput = Record<string, unknown>> {
|
||||
readonly name: string
|
||||
readonly description: string
|
||||
readonly inputSchema: ZodSchema<TInput>
|
||||
/**
|
||||
* When present, used as {@link LLMToolDef.inputSchema} as-is instead of
|
||||
* deriving JSON Schema from `inputSchema` (Zod).
|
||||
*/
|
||||
readonly llmInputSchema?: Record<string, unknown>
|
||||
/**
|
||||
* Per-tool maximum output length in characters. When set, tool output
|
||||
* exceeding this limit is truncated (head + tail with a marker in between).
|
||||
* Takes priority over {@link AgentConfig.maxToolOutputChars}.
|
||||
*/
|
||||
readonly maxOutputChars?: number
|
||||
execute(input: TInput, context: ToolUseContext): Promise<ToolResult>
|
||||
}
|
||||
|
||||
|
|
@ -204,10 +242,28 @@ export interface AgentConfig {
|
|||
/** API key override; falls back to the provider's standard env var. */
|
||||
readonly apiKey?: string
|
||||
readonly systemPrompt?: string
|
||||
/**
|
||||
* Custom tool definitions to register alongside built-in tools.
|
||||
* Created via `defineTool()`. Custom tools bypass `tools` (allowlist)
|
||||
* and `toolPreset` filtering, but can still be blocked by `disallowedTools`.
|
||||
*
|
||||
* Tool names must not collide with built-in tool names; a duplicate name
|
||||
* will throw at registration time.
|
||||
*/
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
readonly customTools?: readonly ToolDefinition<any>[]
|
||||
/** Names of tools (from the tool registry) available to this agent. */
|
||||
readonly tools?: readonly string[]
|
||||
/** Names of tools explicitly disallowed for this agent. */
|
||||
readonly disallowedTools?: readonly string[]
|
||||
/** Predefined tool preset for common use cases. */
|
||||
readonly toolPreset?: 'readonly' | 'readwrite' | 'full'
|
||||
readonly maxTurns?: number
|
||||
readonly maxTokens?: number
|
||||
/** Maximum cumulative tokens (input + output) allowed for this run. */
|
||||
readonly maxTokenBudget?: number
|
||||
/** Optional context compression policy to control input growth across turns. */
|
||||
readonly contextStrategy?: ContextStrategy
|
||||
readonly temperature?: number
|
||||
/**
|
||||
* Maximum wall-clock time (in milliseconds) for the entire agent run.
|
||||
|
|
@ -220,6 +276,28 @@ export interface AgentConfig {
|
|||
* calls and text outputs to detect stuck loops before `maxTurns` is reached.
|
||||
*/
|
||||
readonly loopDetection?: LoopDetectionConfig
|
||||
/**
|
||||
* Maximum tool output length in characters for all tools used by this agent.
|
||||
* When set, tool outputs exceeding this limit are truncated (head + tail
|
||||
* with a marker in between). Per-tool {@link ToolDefinition.maxOutputChars}
|
||||
* takes priority over this value.
|
||||
*/
|
||||
readonly maxToolOutputChars?: number
|
||||
/**
|
||||
* Compress tool results that the agent has already processed.
|
||||
*
|
||||
* In multi-turn runs, tool results persist in the conversation even after the
|
||||
* agent has acted on them. When enabled, consumed tool results (those followed
|
||||
* by an assistant response) are replaced with a short marker before the next
|
||||
* LLM call, freeing context budget for new reasoning.
|
||||
*
|
||||
* - `true` — enable with default threshold (500 chars)
|
||||
* - `{ minChars: N }` — only compress results longer than N characters
|
||||
* - `false` / `undefined` — disabled (default)
|
||||
*
|
||||
* Error tool results are never compressed.
|
||||
*/
|
||||
readonly compressToolResults?: boolean | { readonly minChars?: number }
|
||||
/**
|
||||
* Optional Zod schema for structured output. When set, the agent's final
|
||||
* output is parsed as JSON and validated against this schema. A single
|
||||
|
|
@ -307,6 +385,8 @@ export interface AgentRunResult {
|
|||
readonly structured?: unknown
|
||||
/** True when the run was terminated or warned due to loop detection. */
|
||||
readonly loopDetected?: boolean
|
||||
/** True when the run stopped because token budget was exceeded. */
|
||||
readonly budgetExceeded?: boolean
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -346,6 +426,12 @@ export interface Task {
|
|||
assignee?: string
|
||||
/** IDs of tasks that must complete before this one can start. */
|
||||
dependsOn?: readonly string[]
|
||||
/**
|
||||
* Controls what prior team context is injected into this task's prompt.
|
||||
* - `dependencies` (default): only direct dependency task results
|
||||
* - `all`: full shared-memory summary
|
||||
*/
|
||||
readonly memoryScope?: 'dependencies' | 'all'
|
||||
result?: string
|
||||
readonly createdAt: Date
|
||||
updatedAt: Date
|
||||
|
|
@ -375,6 +461,7 @@ export interface OrchestratorEvent {
|
|||
| 'task_complete'
|
||||
| 'task_skipped'
|
||||
| 'task_retry'
|
||||
| 'budget_exceeded'
|
||||
| 'message'
|
||||
| 'error'
|
||||
readonly agent?: string
|
||||
|
|
@ -385,6 +472,8 @@ export interface OrchestratorEvent {
|
|||
/** Top-level configuration for the orchestrator. */
|
||||
export interface OrchestratorConfig {
|
||||
readonly maxConcurrency?: number
|
||||
/** Maximum cumulative tokens (input + output) allowed per orchestrator run. */
|
||||
readonly maxTokenBudget?: number
|
||||
readonly defaultModel?: string
|
||||
readonly defaultProvider?: 'anthropic' | 'copilot' | 'grok' | 'openai' | 'gemini'
|
||||
readonly defaultBaseURL?: string
|
||||
|
|
@ -410,6 +499,43 @@ export interface OrchestratorConfig {
|
|||
readonly onApproval?: (completedTasks: readonly Task[], nextTasks: readonly Task[]) => Promise<boolean>
|
||||
}
|
||||
|
||||
/**
|
||||
* Optional overrides for the temporary coordinator agent created by `runTeam`.
|
||||
*
|
||||
* All fields are optional. Unset fields fall back to orchestrator defaults
|
||||
* (or coordinator built-in defaults where applicable).
|
||||
*/
|
||||
export interface CoordinatorConfig {
|
||||
/** Coordinator model. Defaults to `OrchestratorConfig.defaultModel`. */
|
||||
readonly model?: string
|
||||
readonly provider?: 'anthropic' | 'copilot' | 'grok' | 'openai' | 'gemini'
|
||||
readonly baseURL?: string
|
||||
readonly apiKey?: string
|
||||
/**
|
||||
* Full system prompt override. When set, this replaces the default
|
||||
* coordinator preamble and decomposition guidance.
|
||||
*
|
||||
* Team roster, output format, and synthesis sections are still appended.
|
||||
*/
|
||||
readonly systemPrompt?: string
|
||||
/**
|
||||
* Additional instructions appended to the default coordinator prompt.
|
||||
* Ignored when `systemPrompt` is provided.
|
||||
*/
|
||||
readonly instructions?: string
|
||||
readonly maxTurns?: number
|
||||
readonly maxTokens?: number
|
||||
readonly temperature?: number
|
||||
/** Predefined tool preset for common coordinator use cases. */
|
||||
readonly toolPreset?: 'readonly' | 'readwrite' | 'full'
|
||||
/** Tool names available to the coordinator. */
|
||||
readonly tools?: readonly string[]
|
||||
/** Tool names explicitly denied to the coordinator. */
|
||||
readonly disallowedTools?: readonly string[]
|
||||
readonly loopDetection?: LoopDetectionConfig
|
||||
readonly timeoutMs?: number
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Trace events — lightweight observability spans
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -438,6 +564,8 @@ export interface TraceEventBase {
|
|||
export interface LLMCallTrace extends TraceEventBase {
|
||||
readonly type: 'llm_call'
|
||||
readonly model: string
|
||||
/** Distinguishes normal turn calls from context-summary calls. */
|
||||
readonly phase?: 'turn' | 'summary'
|
||||
readonly turn: number
|
||||
readonly tokens: TokenUsage
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
* Shared keyword-affinity helpers used by capability-match scheduling
|
||||
* and short-circuit agent selection. Kept in one place so behaviour
|
||||
* can't drift between Scheduler and Orchestrator.
|
||||
*/
|
||||
|
||||
export const STOP_WORDS: ReadonlySet<string> = new Set([
|
||||
'the', 'and', 'for', 'that', 'this', 'with', 'are', 'from', 'have',
|
||||
'will', 'your', 'you', 'can', 'all', 'each', 'when', 'then', 'they',
|
||||
'them', 'their', 'about', 'into', 'more', 'also', 'should', 'must',
|
||||
])
|
||||
|
||||
/**
|
||||
* Tokenise `text` into a deduplicated set of lower-cased keywords.
|
||||
* Words shorter than 4 characters and entries in {@link STOP_WORDS}
|
||||
* are filtered out.
|
||||
*/
|
||||
export function extractKeywords(text: string): string[] {
|
||||
return [
|
||||
...new Set(
|
||||
text
|
||||
.toLowerCase()
|
||||
.split(/\W+/)
|
||||
.filter((w) => w.length > 3 && !STOP_WORDS.has(w)),
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Count how many `keywords` appear (case-insensitively) in `text`.
|
||||
* Each keyword contributes at most 1 to the score.
|
||||
*/
|
||||
export function keywordScore(text: string, keywords: readonly string[]): number {
|
||||
const lower = text.toLowerCase()
|
||||
return keywords.reduce(
|
||||
(acc, kw) => acc + (lower.includes(kw.toLowerCase()) ? 1 : 0),
|
||||
0,
|
||||
)
|
||||
}
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
import type { LLMMessage } from '../types.js'
|
||||
|
||||
/**
|
||||
* Estimate token count using a lightweight character heuristic.
|
||||
* This intentionally avoids model-specific tokenizer dependencies.
|
||||
*/
|
||||
export function estimateTokens(messages: LLMMessage[]): number {
|
||||
let chars = 0
|
||||
|
||||
for (const message of messages) {
|
||||
for (const block of message.content) {
|
||||
if (block.type === 'text') {
|
||||
chars += block.text.length
|
||||
} else if (block.type === 'tool_result') {
|
||||
chars += block.content.length
|
||||
} else if (block.type === 'tool_use') {
|
||||
chars += JSON.stringify(block.input).length
|
||||
} else if (block.type === 'image') {
|
||||
// Account for non-text payloads with a small fixed cost.
|
||||
chars += 64
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Conservative English heuristic: ~4 chars per token.
|
||||
return Math.ceil(chars / 4)
|
||||
}
|
||||
|
|
@ -0,0 +1,279 @@
|
|||
/**
|
||||
* Targeted tests for abort signal propagation fixes (#99, #100, #101).
|
||||
*
|
||||
* - #99: Per-call abortSignal must reach tool execution context
|
||||
* - #100: Abort path in executeQueue must skip blocked tasks and emit events
|
||||
* - #101: Gemini adapter must forward abortSignal to the SDK
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
||||
import { AgentRunner } from '../src/agent/runner.js'
|
||||
import { ToolRegistry, defineTool } from '../src/tool/framework.js'
|
||||
import { ToolExecutor } from '../src/tool/executor.js'
|
||||
import { TaskQueue } from '../src/task/queue.js'
|
||||
import { createTask } from '../src/task/task.js'
|
||||
import { z } from 'zod'
|
||||
import type { LLMAdapter, LLMMessage, ToolUseContext } from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// #99 — Per-call abortSignal propagated to tool context
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('Per-call abortSignal reaches tool context (#99)', () => {
|
||||
it('tool receives per-call abortSignal, not static runner signal', async () => {
|
||||
// Track the abortSignal passed to the tool
|
||||
let receivedSignal: AbortSignal | undefined
|
||||
|
||||
const spy = defineTool({
|
||||
name: 'spy',
|
||||
description: 'Captures the abort signal from context.',
|
||||
inputSchema: z.object({}),
|
||||
execute: async (_input, context) => {
|
||||
receivedSignal = context.abortSignal
|
||||
return { data: 'ok', isError: false }
|
||||
},
|
||||
})
|
||||
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(spy)
|
||||
const executor = new ToolExecutor(registry)
|
||||
|
||||
// Adapter returns one tool_use then end_turn
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
chat: vi.fn()
|
||||
.mockResolvedValueOnce({
|
||||
id: '1',
|
||||
content: [{ type: 'tool_use', id: 'call-1', name: 'spy', input: {} }],
|
||||
model: 'mock',
|
||||
stop_reason: 'tool_use',
|
||||
usage: { input_tokens: 0, output_tokens: 0 },
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
id: '2',
|
||||
content: [{ type: 'text', text: 'done' }],
|
||||
model: 'mock',
|
||||
stop_reason: 'end_turn',
|
||||
usage: { input_tokens: 0, output_tokens: 0 },
|
||||
}),
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
|
||||
const perCallController = new AbortController()
|
||||
|
||||
// Runner created WITHOUT a static abortSignal
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock',
|
||||
agentName: 'test',
|
||||
})
|
||||
|
||||
const messages: LLMMessage[] = [
|
||||
{ role: 'user', content: [{ type: 'text', text: 'go' }] },
|
||||
]
|
||||
|
||||
await runner.run(messages, { abortSignal: perCallController.signal })
|
||||
|
||||
// The tool must have received the per-call signal, not undefined
|
||||
expect(receivedSignal).toBe(perCallController.signal)
|
||||
})
|
||||
|
||||
it('tool receives static signal when no per-call signal is provided', async () => {
|
||||
let receivedSignal: AbortSignal | undefined
|
||||
|
||||
const spy = defineTool({
|
||||
name: 'spy',
|
||||
description: 'Captures the abort signal from context.',
|
||||
inputSchema: z.object({}),
|
||||
execute: async (_input, context) => {
|
||||
receivedSignal = context.abortSignal
|
||||
return { data: 'ok', isError: false }
|
||||
},
|
||||
})
|
||||
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(spy)
|
||||
const executor = new ToolExecutor(registry)
|
||||
|
||||
const staticController = new AbortController()
|
||||
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
chat: vi.fn()
|
||||
.mockResolvedValueOnce({
|
||||
id: '1',
|
||||
content: [{ type: 'tool_use', id: 'call-1', name: 'spy', input: {} }],
|
||||
model: 'mock',
|
||||
stop_reason: 'tool_use',
|
||||
usage: { input_tokens: 0, output_tokens: 0 },
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
id: '2',
|
||||
content: [{ type: 'text', text: 'done' }],
|
||||
model: 'mock',
|
||||
stop_reason: 'end_turn',
|
||||
usage: { input_tokens: 0, output_tokens: 0 },
|
||||
}),
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
|
||||
// Runner created WITH a static abortSignal, no per-call signal
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock',
|
||||
agentName: 'test',
|
||||
abortSignal: staticController.signal,
|
||||
})
|
||||
|
||||
const messages: LLMMessage[] = [
|
||||
{ role: 'user', content: [{ type: 'text', text: 'go' }] },
|
||||
]
|
||||
|
||||
await runner.run(messages)
|
||||
|
||||
expect(receivedSignal).toBe(staticController.signal)
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// #100 — Abort path skips blocked tasks and emits events
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('Abort path skips blocked tasks and emits events (#100)', () => {
|
||||
function task(id: string, opts: { dependsOn?: string[]; assignee?: string } = {}) {
|
||||
const t = createTask({ title: id, description: `task ${id}`, assignee: opts.assignee })
|
||||
return { ...t, id, dependsOn: opts.dependsOn } as ReturnType<typeof createTask>
|
||||
}
|
||||
|
||||
it('skipRemaining transitions blocked tasks to skipped', () => {
|
||||
const q = new TaskQueue()
|
||||
q.add(task('a'))
|
||||
q.add(task('b', { dependsOn: ['a'] }))
|
||||
|
||||
// 'b' should be blocked because it depends on 'a'
|
||||
expect(q.getByStatus('blocked').length).toBe(1)
|
||||
|
||||
q.skipRemaining('Skipped: run aborted.')
|
||||
|
||||
// Both tasks should be skipped — including the blocked one
|
||||
const all = q.list()
|
||||
expect(all.every(t => t.status === 'skipped')).toBe(true)
|
||||
expect(q.getByStatus('blocked').length).toBe(0)
|
||||
})
|
||||
|
||||
it('skipRemaining emits task:skipped for every non-terminal task', () => {
|
||||
const q = new TaskQueue()
|
||||
q.add(task('a'))
|
||||
q.add(task('b', { dependsOn: ['a'] }))
|
||||
|
||||
const handler = vi.fn()
|
||||
q.on('task:skipped', handler)
|
||||
|
||||
q.skipRemaining('Skipped: run aborted.')
|
||||
|
||||
// Both pending 'a' and blocked 'b' must trigger events
|
||||
expect(handler).toHaveBeenCalledTimes(2)
|
||||
const ids = handler.mock.calls.map((c: any[]) => c[0].id)
|
||||
expect(ids).toContain('a')
|
||||
expect(ids).toContain('b')
|
||||
})
|
||||
|
||||
it('skipRemaining fires all:complete after skipping', () => {
|
||||
const q = new TaskQueue()
|
||||
q.add(task('a'))
|
||||
q.add(task('b', { dependsOn: ['a'] }))
|
||||
|
||||
const completeHandler = vi.fn()
|
||||
q.on('all:complete', completeHandler)
|
||||
|
||||
q.skipRemaining('Skipped: run aborted.')
|
||||
|
||||
expect(completeHandler).toHaveBeenCalledTimes(1)
|
||||
expect(q.isComplete()).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// #101 — Gemini adapter forwards abortSignal to SDK config
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const mockGenerateContent = vi.hoisted(() => vi.fn())
|
||||
const mockGenerateContentStream = vi.hoisted(() => vi.fn())
|
||||
const GoogleGenAIMock = vi.hoisted(() =>
|
||||
vi.fn(() => ({
|
||||
models: {
|
||||
generateContent: mockGenerateContent,
|
||||
generateContentStream: mockGenerateContentStream,
|
||||
},
|
||||
})),
|
||||
)
|
||||
|
||||
vi.mock('@google/genai', () => ({
|
||||
GoogleGenAI: GoogleGenAIMock,
|
||||
FunctionCallingConfigMode: { AUTO: 'AUTO' },
|
||||
}))
|
||||
|
||||
import { GeminiAdapter } from '../src/llm/gemini.js'
|
||||
|
||||
describe('Gemini adapter forwards abortSignal (#101)', () => {
|
||||
let adapter: GeminiAdapter
|
||||
|
||||
function makeGeminiResponse(parts: Array<Record<string, unknown>>) {
|
||||
return {
|
||||
candidates: [{
|
||||
content: { parts },
|
||||
finishReason: 'STOP',
|
||||
}],
|
||||
usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 },
|
||||
}
|
||||
}
|
||||
|
||||
async function* asyncGen<T>(items: T[]): AsyncGenerator<T> {
|
||||
for (const item of items) yield item
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
adapter = new GeminiAdapter('test-key')
|
||||
})
|
||||
|
||||
it('chat() passes abortSignal in config', async () => {
|
||||
mockGenerateContent.mockResolvedValue(makeGeminiResponse([{ text: 'hi' }]))
|
||||
|
||||
const controller = new AbortController()
|
||||
await adapter.chat(
|
||||
[{ role: 'user', content: [{ type: 'text' as const, text: 'hello' }] }],
|
||||
{ model: 'gemini-2.5-flash', abortSignal: controller.signal },
|
||||
)
|
||||
|
||||
const callArgs = mockGenerateContent.mock.calls[0][0]
|
||||
expect(callArgs.config.abortSignal).toBe(controller.signal)
|
||||
})
|
||||
|
||||
it('chat() does not include abortSignal when not provided', async () => {
|
||||
mockGenerateContent.mockResolvedValue(makeGeminiResponse([{ text: 'hi' }]))
|
||||
|
||||
await adapter.chat(
|
||||
[{ role: 'user', content: [{ type: 'text' as const, text: 'hello' }] }],
|
||||
{ model: 'gemini-2.5-flash' },
|
||||
)
|
||||
|
||||
const callArgs = mockGenerateContent.mock.calls[0][0]
|
||||
expect(callArgs.config.abortSignal).toBeUndefined()
|
||||
})
|
||||
|
||||
it('stream() passes abortSignal in config', async () => {
|
||||
const chunk = makeGeminiResponse([{ text: 'hi' }])
|
||||
mockGenerateContentStream.mockResolvedValue(asyncGen([chunk]))
|
||||
|
||||
const controller = new AbortController()
|
||||
const events: unknown[] = []
|
||||
for await (const e of adapter.stream(
|
||||
[{ role: 'user', content: [{ type: 'text' as const, text: 'hello' }] }],
|
||||
{ model: 'gemini-2.5-flash', abortSignal: controller.signal },
|
||||
)) {
|
||||
events.push(e)
|
||||
}
|
||||
|
||||
const callArgs = mockGenerateContentStream.mock.calls[0][0]
|
||||
expect(callArgs.config.abortSignal).toBe(controller.signal)
|
||||
})
|
||||
})
|
||||
|
|
@ -0,0 +1,107 @@
|
|||
import { describe, it, expect, vi } from 'vitest'
|
||||
import { OpenMultiAgent } from '../src/orchestrator/orchestrator.js'
|
||||
import { Team } from '../src/team/team.js'
|
||||
|
||||
describe('AbortSignal support for runTeam and runTasks', () => {
|
||||
it('runTeam should accept an abortSignal option', async () => {
|
||||
const orchestrator = new OpenMultiAgent({
|
||||
defaultModel: 'test-model',
|
||||
defaultProvider: 'openai',
|
||||
})
|
||||
|
||||
// Verify the API accepts the option without throwing
|
||||
const controller = new AbortController()
|
||||
const team = new Team({
|
||||
name: 'test',
|
||||
agents: [
|
||||
{ name: 'agent1', model: 'test-model', systemPrompt: 'test' },
|
||||
],
|
||||
})
|
||||
|
||||
// Abort immediately so the run won't actually execute LLM calls
|
||||
controller.abort()
|
||||
|
||||
// runTeam should return gracefully (no unhandled rejection)
|
||||
const result = await orchestrator.runTeam(team, 'test goal', {
|
||||
abortSignal: controller.signal,
|
||||
})
|
||||
|
||||
// With immediate abort, coordinator may or may not have run,
|
||||
// but the function should not throw.
|
||||
expect(result).toBeDefined()
|
||||
expect(result.agentResults).toBeInstanceOf(Map)
|
||||
})
|
||||
|
||||
it('runTasks should accept an abortSignal option', async () => {
|
||||
const orchestrator = new OpenMultiAgent({
|
||||
defaultModel: 'test-model',
|
||||
defaultProvider: 'openai',
|
||||
})
|
||||
|
||||
const controller = new AbortController()
|
||||
const team = new Team({
|
||||
name: 'test',
|
||||
agents: [
|
||||
{ name: 'agent1', model: 'test-model', systemPrompt: 'test' },
|
||||
],
|
||||
})
|
||||
|
||||
controller.abort()
|
||||
|
||||
const result = await orchestrator.runTasks(team, [
|
||||
{ title: 'task1', description: 'do something', assignee: 'agent1' },
|
||||
], { abortSignal: controller.signal })
|
||||
|
||||
expect(result).toBeDefined()
|
||||
expect(result.agentResults).toBeInstanceOf(Map)
|
||||
})
|
||||
|
||||
it('pre-aborted signal should skip pending tasks', async () => {
|
||||
const orchestrator = new OpenMultiAgent({
|
||||
defaultModel: 'test-model',
|
||||
defaultProvider: 'openai',
|
||||
})
|
||||
|
||||
const controller = new AbortController()
|
||||
controller.abort()
|
||||
|
||||
const team = new Team({
|
||||
name: 'test',
|
||||
agents: [
|
||||
{ name: 'agent1', model: 'test-model', systemPrompt: 'test' },
|
||||
],
|
||||
})
|
||||
|
||||
const result = await orchestrator.runTasks(team, [
|
||||
{ title: 'task1', description: 'first', assignee: 'agent1' },
|
||||
{ title: 'task2', description: 'second', assignee: 'agent1' },
|
||||
], { abortSignal: controller.signal })
|
||||
|
||||
// No agent runs should complete since signal was already aborted
|
||||
expect(result).toBeDefined()
|
||||
})
|
||||
|
||||
it('runTeam and runTasks work without abortSignal (backward compat)', async () => {
|
||||
const orchestrator = new OpenMultiAgent({
|
||||
defaultModel: 'test-model',
|
||||
defaultProvider: 'openai',
|
||||
})
|
||||
|
||||
const team = new Team({
|
||||
name: 'test',
|
||||
agents: [
|
||||
{ name: 'agent1', model: 'test-model', systemPrompt: 'test' },
|
||||
],
|
||||
})
|
||||
|
||||
// These should not throw even without abortSignal
|
||||
const promise1 = orchestrator.runTeam(team, 'goal')
|
||||
const promise2 = orchestrator.runTasks(team, [
|
||||
{ title: 'task1', description: 'do something', assignee: 'agent1' },
|
||||
])
|
||||
|
||||
// Both return promises (won't resolve without real LLM, but API is correct)
|
||||
expect(promise1).toBeInstanceOf(Promise)
|
||||
expect(promise2).toBeInstanceOf(Promise)
|
||||
})
|
||||
})
|
||||
|
|
@ -4,7 +4,7 @@ import { Agent } from '../src/agent/agent.js'
|
|||
import { AgentRunner } from '../src/agent/runner.js'
|
||||
import { ToolRegistry } from '../src/tool/framework.js'
|
||||
import { ToolExecutor } from '../src/tool/executor.js'
|
||||
import type { AgentConfig, AgentRunResult, LLMAdapter, LLMMessage, LLMResponse } from '../src/types.js'
|
||||
import type { AgentConfig, AgentRunResult, LLMAdapter, LLMMessage, LLMResponse, StreamEvent } from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock helpers
|
||||
|
|
@ -243,7 +243,7 @@ describe('Agent hooks — beforeRun / afterRun', () => {
|
|||
}
|
||||
const { agent, calls } = buildMockAgent(config, 'streamed')
|
||||
|
||||
const events = []
|
||||
const events: StreamEvent[] = []
|
||||
for await (const event of agent.stream('original')) {
|
||||
events.push(event)
|
||||
}
|
||||
|
|
@ -263,7 +263,7 @@ describe('Agent hooks — beforeRun / afterRun', () => {
|
|||
}
|
||||
const { agent } = buildMockAgent(config, 'original')
|
||||
|
||||
const events = []
|
||||
const events: StreamEvent[] = []
|
||||
for await (const event of agent.stream('hi')) {
|
||||
events.push(event)
|
||||
}
|
||||
|
|
@ -280,7 +280,7 @@ describe('Agent hooks — beforeRun / afterRun', () => {
|
|||
}
|
||||
const { agent } = buildMockAgent(config, 'unreachable')
|
||||
|
||||
const events = []
|
||||
const events: StreamEvent[] = []
|
||||
for await (const event of agent.stream('hi')) {
|
||||
events.push(event)
|
||||
}
|
||||
|
|
@ -297,7 +297,7 @@ describe('Agent hooks — beforeRun / afterRun', () => {
|
|||
}
|
||||
const { agent } = buildMockAgent(config, 'streamed output')
|
||||
|
||||
const events = []
|
||||
const events: StreamEvent[] = []
|
||||
for await (const event of agent.stream('hi')) {
|
||||
events.push(event)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -178,6 +178,89 @@ describe('AgentPool', () => {
|
|||
})
|
||||
})
|
||||
|
||||
describe('per-agent serialization (#72)', () => {
|
||||
it('serializes concurrent runs on the same agent', async () => {
|
||||
const executionLog: string[] = []
|
||||
|
||||
const agent = createMockAgent('dev')
|
||||
;(agent.run as ReturnType<typeof vi.fn>).mockImplementation(async (prompt: string) => {
|
||||
executionLog.push(`start:${prompt}`)
|
||||
await new Promise(r => setTimeout(r, 50))
|
||||
executionLog.push(`end:${prompt}`)
|
||||
return SUCCESS_RESULT
|
||||
})
|
||||
|
||||
const pool = new AgentPool(5)
|
||||
pool.add(agent)
|
||||
|
||||
// Fire two runs for the same agent concurrently
|
||||
await Promise.all([
|
||||
pool.run('dev', 'task1'),
|
||||
pool.run('dev', 'task2'),
|
||||
])
|
||||
|
||||
// With per-agent serialization, runs must not overlap:
|
||||
// [start:task1, end:task1, start:task2, end:task2] (or reverse order)
|
||||
// i.e. no interleaving like [start:task1, start:task2, ...]
|
||||
expect(executionLog).toHaveLength(4)
|
||||
expect(executionLog[0]).toMatch(/^start:/)
|
||||
expect(executionLog[1]).toMatch(/^end:/)
|
||||
expect(executionLog[2]).toMatch(/^start:/)
|
||||
expect(executionLog[3]).toMatch(/^end:/)
|
||||
})
|
||||
|
||||
it('allows different agents to run in parallel', async () => {
|
||||
let concurrent = 0
|
||||
let maxConcurrent = 0
|
||||
|
||||
const makeTimedAgent = (name: string): Agent => {
|
||||
const agent = createMockAgent(name)
|
||||
;(agent.run as ReturnType<typeof vi.fn>).mockImplementation(async () => {
|
||||
concurrent++
|
||||
maxConcurrent = Math.max(maxConcurrent, concurrent)
|
||||
await new Promise(r => setTimeout(r, 50))
|
||||
concurrent--
|
||||
return SUCCESS_RESULT
|
||||
})
|
||||
return agent
|
||||
}
|
||||
|
||||
const pool = new AgentPool(5)
|
||||
pool.add(makeTimedAgent('a'))
|
||||
pool.add(makeTimedAgent('b'))
|
||||
|
||||
await Promise.all([
|
||||
pool.run('a', 'x'),
|
||||
pool.run('b', 'y'),
|
||||
])
|
||||
|
||||
// Different agents should run concurrently
|
||||
expect(maxConcurrent).toBe(2)
|
||||
})
|
||||
|
||||
it('releases agent lock even when run() throws', async () => {
|
||||
const agent = createMockAgent('dev')
|
||||
let callCount = 0
|
||||
;(agent.run as ReturnType<typeof vi.fn>).mockImplementation(async () => {
|
||||
callCount++
|
||||
if (callCount === 1) throw new Error('first run fails')
|
||||
return SUCCESS_RESULT
|
||||
})
|
||||
|
||||
const pool = new AgentPool(5)
|
||||
pool.add(agent)
|
||||
|
||||
// First run fails, second should still execute (not deadlock)
|
||||
const results = await Promise.allSettled([
|
||||
pool.run('dev', 'will-fail'),
|
||||
pool.run('dev', 'should-succeed'),
|
||||
])
|
||||
|
||||
expect(results[0]!.status).toBe('rejected')
|
||||
expect(results[1]!.status).toBe('fulfilled')
|
||||
})
|
||||
})
|
||||
|
||||
describe('concurrency', () => {
|
||||
it('respects maxConcurrency limit', async () => {
|
||||
let concurrent = 0
|
||||
|
|
|
|||
|
|
@ -0,0 +1,436 @@
|
|||
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
||||
import { textMsg, toolUseMsg, toolResultMsg, imageMsg, chatOpts, toolDef, collectEvents } from './helpers/llm-fixtures.js'
|
||||
import type { LLMResponse, StreamEvent, ToolUseBlock } from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock the Anthropic SDK
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const mockCreate = vi.hoisted(() => vi.fn())
|
||||
const mockStream = vi.hoisted(() => vi.fn())
|
||||
|
||||
vi.mock('@anthropic-ai/sdk', () => {
|
||||
const AnthropicMock = vi.fn(() => ({
|
||||
messages: {
|
||||
create: mockCreate,
|
||||
stream: mockStream,
|
||||
},
|
||||
}))
|
||||
return { default: AnthropicMock, Anthropic: AnthropicMock }
|
||||
})
|
||||
|
||||
import { AnthropicAdapter } from '../src/llm/anthropic.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function makeAnthropicResponse(overrides: Record<string, unknown> = {}) {
|
||||
return {
|
||||
id: 'msg_test123',
|
||||
content: [{ type: 'text', text: 'Hello' }],
|
||||
model: 'claude-sonnet-4',
|
||||
stop_reason: 'end_turn',
|
||||
usage: { input_tokens: 10, output_tokens: 5 },
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
function makeStreamMock(events: Array<Record<string, unknown>>, finalMsg: Record<string, unknown>) {
|
||||
return {
|
||||
[Symbol.asyncIterator]: async function* () {
|
||||
for (const event of events) yield event
|
||||
},
|
||||
finalMessage: vi.fn().mockResolvedValue(finalMsg),
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('AnthropicAdapter', () => {
|
||||
let adapter: AnthropicAdapter
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
adapter = new AnthropicAdapter('test-key')
|
||||
})
|
||||
|
||||
// =========================================================================
|
||||
// chat()
|
||||
// =========================================================================
|
||||
|
||||
describe('chat()', () => {
|
||||
it('converts a text message and returns LLMResponse', async () => {
|
||||
mockCreate.mockResolvedValue(makeAnthropicResponse())
|
||||
|
||||
const result = await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
|
||||
// Verify the SDK was called with correct shape
|
||||
const callArgs = mockCreate.mock.calls[0]
|
||||
expect(callArgs[0]).toMatchObject({
|
||||
model: 'test-model',
|
||||
max_tokens: 1024,
|
||||
messages: [{ role: 'user', content: [{ type: 'text', text: 'Hi' }] }],
|
||||
})
|
||||
|
||||
// Verify response transformation
|
||||
expect(result).toEqual({
|
||||
id: 'msg_test123',
|
||||
content: [{ type: 'text', text: 'Hello' }],
|
||||
model: 'claude-sonnet-4',
|
||||
stop_reason: 'end_turn',
|
||||
usage: { input_tokens: 10, output_tokens: 5 },
|
||||
})
|
||||
})
|
||||
|
||||
it('converts tool_use blocks to Anthropic format', async () => {
|
||||
mockCreate.mockResolvedValue(makeAnthropicResponse())
|
||||
|
||||
await adapter.chat(
|
||||
[toolUseMsg('call_1', 'search', { query: 'test' })],
|
||||
chatOpts(),
|
||||
)
|
||||
|
||||
const sentMessages = mockCreate.mock.calls[0][0].messages
|
||||
expect(sentMessages[0].content[0]).toEqual({
|
||||
type: 'tool_use',
|
||||
id: 'call_1',
|
||||
name: 'search',
|
||||
input: { query: 'test' },
|
||||
})
|
||||
})
|
||||
|
||||
it('converts tool_result blocks to Anthropic format', async () => {
|
||||
mockCreate.mockResolvedValue(makeAnthropicResponse())
|
||||
|
||||
await adapter.chat(
|
||||
[toolResultMsg('call_1', 'result data', false)],
|
||||
chatOpts(),
|
||||
)
|
||||
|
||||
const sentMessages = mockCreate.mock.calls[0][0].messages
|
||||
expect(sentMessages[0].content[0]).toEqual({
|
||||
type: 'tool_result',
|
||||
tool_use_id: 'call_1',
|
||||
content: 'result data',
|
||||
is_error: false,
|
||||
})
|
||||
})
|
||||
|
||||
it('converts image blocks to Anthropic format', async () => {
|
||||
mockCreate.mockResolvedValue(makeAnthropicResponse())
|
||||
|
||||
await adapter.chat([imageMsg('image/png', 'base64data')], chatOpts())
|
||||
|
||||
const sentMessages = mockCreate.mock.calls[0][0].messages
|
||||
expect(sentMessages[0].content[0]).toEqual({
|
||||
type: 'image',
|
||||
source: {
|
||||
type: 'base64',
|
||||
media_type: 'image/png',
|
||||
data: 'base64data',
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
it('passes system prompt as top-level parameter', async () => {
|
||||
mockCreate.mockResolvedValue(makeAnthropicResponse())
|
||||
|
||||
await adapter.chat(
|
||||
[textMsg('user', 'Hi')],
|
||||
chatOpts({ systemPrompt: 'You are helpful.' }),
|
||||
)
|
||||
|
||||
expect(mockCreate.mock.calls[0][0].system).toBe('You are helpful.')
|
||||
})
|
||||
|
||||
it('converts tools to Anthropic format', async () => {
|
||||
mockCreate.mockResolvedValue(makeAnthropicResponse())
|
||||
const tool = toolDef('search', 'Search the web')
|
||||
|
||||
await adapter.chat(
|
||||
[textMsg('user', 'Hi')],
|
||||
chatOpts({ tools: [tool] }),
|
||||
)
|
||||
|
||||
const sentTools = mockCreate.mock.calls[0][0].tools
|
||||
expect(sentTools[0]).toEqual({
|
||||
name: 'search',
|
||||
description: 'Search the web',
|
||||
input_schema: {
|
||||
type: 'object',
|
||||
properties: { query: { type: 'string' } },
|
||||
required: ['query'],
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
it('passes temperature through', async () => {
|
||||
mockCreate.mockResolvedValue(makeAnthropicResponse())
|
||||
|
||||
await adapter.chat(
|
||||
[textMsg('user', 'Hi')],
|
||||
chatOpts({ temperature: 0.5 }),
|
||||
)
|
||||
|
||||
expect(mockCreate.mock.calls[0][0].temperature).toBe(0.5)
|
||||
})
|
||||
|
||||
it('passes abortSignal to SDK request options', async () => {
|
||||
mockCreate.mockResolvedValue(makeAnthropicResponse())
|
||||
const controller = new AbortController()
|
||||
|
||||
await adapter.chat(
|
||||
[textMsg('user', 'Hi')],
|
||||
chatOpts({ abortSignal: controller.signal }),
|
||||
)
|
||||
|
||||
expect(mockCreate.mock.calls[0][1]).toEqual({ signal: controller.signal })
|
||||
})
|
||||
|
||||
it('defaults max_tokens to 4096 when unset', async () => {
|
||||
mockCreate.mockResolvedValue(makeAnthropicResponse())
|
||||
|
||||
await adapter.chat(
|
||||
[textMsg('user', 'Hi')],
|
||||
{ model: 'test-model' },
|
||||
)
|
||||
|
||||
expect(mockCreate.mock.calls[0][0].max_tokens).toBe(4096)
|
||||
})
|
||||
|
||||
it('converts tool_use response blocks from Anthropic', async () => {
|
||||
mockCreate.mockResolvedValue(makeAnthropicResponse({
|
||||
content: [
|
||||
{ type: 'tool_use', id: 'call_1', name: 'search', input: { q: 'test' } },
|
||||
],
|
||||
stop_reason: 'tool_use',
|
||||
}))
|
||||
|
||||
const result = await adapter.chat([textMsg('user', 'search')], chatOpts())
|
||||
|
||||
expect(result.content[0]).toEqual({
|
||||
type: 'tool_use',
|
||||
id: 'call_1',
|
||||
name: 'search',
|
||||
input: { q: 'test' },
|
||||
})
|
||||
expect(result.stop_reason).toBe('tool_use')
|
||||
})
|
||||
|
||||
it('gracefully degrades unknown block types to text', async () => {
|
||||
mockCreate.mockResolvedValue(makeAnthropicResponse({
|
||||
content: [{ type: 'thinking', thinking: 'hmm...' }],
|
||||
}))
|
||||
|
||||
const result = await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
|
||||
expect(result.content[0]).toEqual({
|
||||
type: 'text',
|
||||
text: '[unsupported block type: thinking]',
|
||||
})
|
||||
})
|
||||
|
||||
it('defaults stop_reason to end_turn when null', async () => {
|
||||
mockCreate.mockResolvedValue(makeAnthropicResponse({ stop_reason: null }))
|
||||
|
||||
const result = await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
|
||||
expect(result.stop_reason).toBe('end_turn')
|
||||
})
|
||||
|
||||
it('propagates SDK errors', async () => {
|
||||
mockCreate.mockRejectedValue(new Error('Rate limited'))
|
||||
|
||||
await expect(
|
||||
adapter.chat([textMsg('user', 'Hi')], chatOpts()),
|
||||
).rejects.toThrow('Rate limited')
|
||||
})
|
||||
})
|
||||
|
||||
// =========================================================================
|
||||
// stream()
|
||||
// =========================================================================
|
||||
|
||||
describe('stream()', () => {
|
||||
it('yields text events from text_delta', async () => {
|
||||
const streamObj = makeStreamMock(
|
||||
[
|
||||
{ type: 'content_block_delta', index: 0, delta: { type: 'text_delta', text: 'Hello' } },
|
||||
{ type: 'content_block_delta', index: 0, delta: { type: 'text_delta', text: ' world' } },
|
||||
],
|
||||
makeAnthropicResponse({ content: [{ type: 'text', text: 'Hello world' }] }),
|
||||
)
|
||||
mockStream.mockReturnValue(streamObj)
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const textEvents = events.filter(e => e.type === 'text')
|
||||
expect(textEvents).toEqual([
|
||||
{ type: 'text', data: 'Hello' },
|
||||
{ type: 'text', data: ' world' },
|
||||
])
|
||||
})
|
||||
|
||||
it('accumulates tool input JSON and emits tool_use on content_block_stop', async () => {
|
||||
const streamObj = makeStreamMock(
|
||||
[
|
||||
{
|
||||
type: 'content_block_start',
|
||||
index: 0,
|
||||
content_block: { type: 'tool_use', id: 'call_1', name: 'search' },
|
||||
},
|
||||
{
|
||||
type: 'content_block_delta',
|
||||
index: 0,
|
||||
delta: { type: 'input_json_delta', partial_json: '{"qu' },
|
||||
},
|
||||
{
|
||||
type: 'content_block_delta',
|
||||
index: 0,
|
||||
delta: { type: 'input_json_delta', partial_json: 'ery":"test"}' },
|
||||
},
|
||||
{ type: 'content_block_stop', index: 0 },
|
||||
],
|
||||
makeAnthropicResponse({
|
||||
content: [{ type: 'tool_use', id: 'call_1', name: 'search', input: { query: 'test' } }],
|
||||
stop_reason: 'tool_use',
|
||||
}),
|
||||
)
|
||||
mockStream.mockReturnValue(streamObj)
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const toolEvents = events.filter(e => e.type === 'tool_use')
|
||||
expect(toolEvents).toHaveLength(1)
|
||||
const block = toolEvents[0].data as ToolUseBlock
|
||||
expect(block).toEqual({
|
||||
type: 'tool_use',
|
||||
id: 'call_1',
|
||||
name: 'search',
|
||||
input: { query: 'test' },
|
||||
})
|
||||
})
|
||||
|
||||
it('handles malformed tool JSON gracefully (defaults to empty object)', async () => {
|
||||
const streamObj = makeStreamMock(
|
||||
[
|
||||
{
|
||||
type: 'content_block_start',
|
||||
index: 0,
|
||||
content_block: { type: 'tool_use', id: 'call_1', name: 'broken' },
|
||||
},
|
||||
{
|
||||
type: 'content_block_delta',
|
||||
index: 0,
|
||||
delta: { type: 'input_json_delta', partial_json: '{invalid' },
|
||||
},
|
||||
{ type: 'content_block_stop', index: 0 },
|
||||
],
|
||||
makeAnthropicResponse({
|
||||
content: [{ type: 'tool_use', id: 'call_1', name: 'broken', input: {} }],
|
||||
}),
|
||||
)
|
||||
mockStream.mockReturnValue(streamObj)
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const toolEvents = events.filter(e => e.type === 'tool_use')
|
||||
expect((toolEvents[0].data as ToolUseBlock).input).toEqual({})
|
||||
})
|
||||
|
||||
it('yields done event with complete LLMResponse', async () => {
|
||||
const final = makeAnthropicResponse({
|
||||
content: [{ type: 'text', text: 'Done' }],
|
||||
})
|
||||
const streamObj = makeStreamMock([], final)
|
||||
mockStream.mockReturnValue(streamObj)
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const doneEvents = events.filter(e => e.type === 'done')
|
||||
expect(doneEvents).toHaveLength(1)
|
||||
const response = doneEvents[0].data as LLMResponse
|
||||
expect(response.id).toBe('msg_test123')
|
||||
expect(response.content).toEqual([{ type: 'text', text: 'Done' }])
|
||||
expect(response.usage).toEqual({ input_tokens: 10, output_tokens: 5 })
|
||||
})
|
||||
|
||||
it('yields error event when stream throws', async () => {
|
||||
const streamObj = {
|
||||
[Symbol.asyncIterator]: async function* () {
|
||||
throw new Error('Stream failed')
|
||||
},
|
||||
finalMessage: vi.fn(),
|
||||
}
|
||||
mockStream.mockReturnValue(streamObj)
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const errorEvents = events.filter(e => e.type === 'error')
|
||||
expect(errorEvents).toHaveLength(1)
|
||||
expect((errorEvents[0].data as Error).message).toBe('Stream failed')
|
||||
})
|
||||
|
||||
it('passes system prompt and tools to stream call', async () => {
|
||||
const streamObj = makeStreamMock([], makeAnthropicResponse())
|
||||
mockStream.mockReturnValue(streamObj)
|
||||
const tool = toolDef('search')
|
||||
|
||||
await collectEvents(
|
||||
adapter.stream(
|
||||
[textMsg('user', 'Hi')],
|
||||
chatOpts({ systemPrompt: 'Be helpful', tools: [tool] }),
|
||||
),
|
||||
)
|
||||
|
||||
const callArgs = mockStream.mock.calls[0][0]
|
||||
expect(callArgs.system).toBe('Be helpful')
|
||||
expect(callArgs.tools[0].name).toBe('search')
|
||||
})
|
||||
|
||||
it('passes abortSignal to stream request options', async () => {
|
||||
const streamObj = makeStreamMock([], makeAnthropicResponse())
|
||||
mockStream.mockReturnValue(streamObj)
|
||||
const controller = new AbortController()
|
||||
|
||||
await collectEvents(
|
||||
adapter.stream(
|
||||
[textMsg('user', 'Hi')],
|
||||
chatOpts({ abortSignal: controller.signal }),
|
||||
),
|
||||
)
|
||||
|
||||
expect(mockStream.mock.calls[0][1]).toEqual({ signal: controller.signal })
|
||||
})
|
||||
|
||||
it('handles multiple tool calls in one stream', async () => {
|
||||
const streamObj = makeStreamMock(
|
||||
[
|
||||
{ type: 'content_block_start', index: 0, content_block: { type: 'tool_use', id: 'c1', name: 'search' } },
|
||||
{ type: 'content_block_delta', index: 0, delta: { type: 'input_json_delta', partial_json: '{"q":"a"}' } },
|
||||
{ type: 'content_block_stop', index: 0 },
|
||||
{ type: 'content_block_start', index: 1, content_block: { type: 'tool_use', id: 'c2', name: 'read' } },
|
||||
{ type: 'content_block_delta', index: 1, delta: { type: 'input_json_delta', partial_json: '{"path":"b"}' } },
|
||||
{ type: 'content_block_stop', index: 1 },
|
||||
],
|
||||
makeAnthropicResponse({
|
||||
content: [
|
||||
{ type: 'tool_use', id: 'c1', name: 'search', input: { q: 'a' } },
|
||||
{ type: 'tool_use', id: 'c2', name: 'read', input: { path: 'b' } },
|
||||
],
|
||||
}),
|
||||
)
|
||||
mockStream.mockReturnValue(streamObj)
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const toolEvents = events.filter(e => e.type === 'tool_use')
|
||||
expect(toolEvents).toHaveLength(2)
|
||||
expect((toolEvents[0].data as ToolUseBlock).name).toBe('search')
|
||||
expect((toolEvents[1].data as ToolUseBlock).name).toBe('read')
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
@ -6,6 +6,7 @@ import { fileReadTool } from '../src/tool/built-in/file-read.js'
|
|||
import { fileWriteTool } from '../src/tool/built-in/file-write.js'
|
||||
import { fileEditTool } from '../src/tool/built-in/file-edit.js'
|
||||
import { bashTool } from '../src/tool/built-in/bash.js'
|
||||
import { globTool } from '../src/tool/built-in/glob.js'
|
||||
import { grepTool } from '../src/tool/built-in/grep.js'
|
||||
import { registerBuiltInTools, BUILT_IN_TOOLS } from '../src/tool/built-in/index.js'
|
||||
import { ToolRegistry } from '../src/tool/framework.js'
|
||||
|
|
@ -34,7 +35,7 @@ afterEach(async () => {
|
|||
// ===========================================================================
|
||||
|
||||
describe('registerBuiltInTools', () => {
|
||||
it('registers all 5 built-in tools', () => {
|
||||
it('registers all 6 built-in tools', () => {
|
||||
const registry = new ToolRegistry()
|
||||
registerBuiltInTools(registry)
|
||||
|
||||
|
|
@ -43,10 +44,11 @@ describe('registerBuiltInTools', () => {
|
|||
expect(registry.get('file_write')).toBeDefined()
|
||||
expect(registry.get('file_edit')).toBeDefined()
|
||||
expect(registry.get('grep')).toBeDefined()
|
||||
expect(registry.get('glob')).toBeDefined()
|
||||
})
|
||||
|
||||
it('BUILT_IN_TOOLS has correct length', () => {
|
||||
expect(BUILT_IN_TOOLS).toHaveLength(5)
|
||||
expect(BUILT_IN_TOOLS).toHaveLength(6)
|
||||
})
|
||||
})
|
||||
|
||||
|
|
@ -305,6 +307,102 @@ describe('bash', () => {
|
|||
})
|
||||
})
|
||||
|
||||
// ===========================================================================
|
||||
// glob
|
||||
// ===========================================================================
|
||||
|
||||
describe('glob', () => {
|
||||
it('lists files matching a pattern without reading contents', async () => {
|
||||
await writeFile(join(tmpDir, 'a.ts'), 'SECRET_CONTENT_SHOULD_NOT_APPEAR')
|
||||
await writeFile(join(tmpDir, 'b.md'), 'also secret')
|
||||
|
||||
const result = await globTool.execute(
|
||||
{ path: tmpDir, pattern: '*.ts' },
|
||||
defaultContext,
|
||||
)
|
||||
|
||||
expect(result.isError).toBe(false)
|
||||
expect(result.data).toContain('.ts')
|
||||
expect(result.data).not.toContain('SECRET')
|
||||
expect(result.data).not.toContain('b.md')
|
||||
})
|
||||
|
||||
it('lists all files when pattern is omitted', async () => {
|
||||
await writeFile(join(tmpDir, 'x.txt'), 'x')
|
||||
await writeFile(join(tmpDir, 'y.txt'), 'y')
|
||||
|
||||
const result = await globTool.execute({ path: tmpDir }, defaultContext)
|
||||
|
||||
expect(result.isError).toBe(false)
|
||||
expect(result.data).toContain('x.txt')
|
||||
expect(result.data).toContain('y.txt')
|
||||
})
|
||||
|
||||
it('lists a single file when path is a file', async () => {
|
||||
const filePath = join(tmpDir, 'only.ts')
|
||||
await writeFile(filePath, 'body')
|
||||
|
||||
const result = await globTool.execute({ path: filePath }, defaultContext)
|
||||
|
||||
expect(result.isError).toBe(false)
|
||||
expect(result.data).toContain('only.ts')
|
||||
})
|
||||
|
||||
it('returns no match when single file does not match pattern', async () => {
|
||||
const filePath = join(tmpDir, 'readme.md')
|
||||
await writeFile(filePath, '# doc')
|
||||
|
||||
const result = await globTool.execute(
|
||||
{ path: filePath, pattern: '*.ts' },
|
||||
defaultContext,
|
||||
)
|
||||
|
||||
expect(result.isError).toBe(false)
|
||||
expect(result.data).toContain('No files matched')
|
||||
})
|
||||
|
||||
it('recurses into subdirectories', async () => {
|
||||
const sub = join(tmpDir, 'nested')
|
||||
const { mkdir } = await import('fs/promises')
|
||||
await mkdir(sub, { recursive: true })
|
||||
await writeFile(join(sub, 'deep.ts'), '')
|
||||
|
||||
const result = await globTool.execute(
|
||||
{ path: tmpDir, pattern: '*.ts' },
|
||||
defaultContext,
|
||||
)
|
||||
|
||||
expect(result.isError).toBe(false)
|
||||
expect(result.data).toContain('deep.ts')
|
||||
})
|
||||
|
||||
it('errors on inaccessible path', async () => {
|
||||
const result = await globTool.execute(
|
||||
{ path: '/nonexistent/path/xyz' },
|
||||
defaultContext,
|
||||
)
|
||||
|
||||
expect(result.isError).toBe(true)
|
||||
expect(result.data).toContain('Cannot access path')
|
||||
})
|
||||
|
||||
it('notes truncation when maxFiles is exceeded', async () => {
|
||||
for (let i = 0; i < 5; i++) {
|
||||
await writeFile(join(tmpDir, `f${i}.txt`), '')
|
||||
}
|
||||
|
||||
const result = await globTool.execute(
|
||||
{ path: tmpDir, pattern: '*.txt', maxFiles: 3 },
|
||||
defaultContext,
|
||||
)
|
||||
|
||||
expect(result.isError).toBe(false)
|
||||
const lines = (result.data as string).split('\n').filter((l) => l.endsWith('.txt'))
|
||||
expect(lines).toHaveLength(3)
|
||||
expect(result.data).toContain('capped at 3')
|
||||
})
|
||||
})
|
||||
|
||||
// ===========================================================================
|
||||
// grep (Node.js fallback — tests do not depend on ripgrep availability)
|
||||
// ===========================================================================
|
||||
|
|
|
|||
|
|
@ -0,0 +1,69 @@
|
|||
import { describe, expect, it } from 'vitest'
|
||||
import {
|
||||
EXIT,
|
||||
parseArgs,
|
||||
serializeAgentResult,
|
||||
serializeTeamRunResult,
|
||||
} from '../src/cli/oma.js'
|
||||
import type { AgentRunResult, TeamRunResult } from '../src/types.js'
|
||||
|
||||
describe('parseArgs', () => {
|
||||
it('parses flags, key=value, and key value', () => {
|
||||
const a = parseArgs(['node', 'oma', 'run', '--goal', 'hello', '--team=x.json', '--pretty'])
|
||||
expect(a._[0]).toBe('run')
|
||||
expect(a.kv.get('goal')).toBe('hello')
|
||||
expect(a.kv.get('team')).toBe('x.json')
|
||||
expect(a.flags.has('pretty')).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('serializeTeamRunResult', () => {
|
||||
it('maps agentResults to a plain object', () => {
|
||||
const ar: AgentRunResult = {
|
||||
success: true,
|
||||
output: 'ok',
|
||||
messages: [],
|
||||
tokenUsage: { input_tokens: 1, output_tokens: 2 },
|
||||
toolCalls: [],
|
||||
}
|
||||
const tr: TeamRunResult = {
|
||||
success: true,
|
||||
agentResults: new Map([['alice', ar]]),
|
||||
totalTokenUsage: { input_tokens: 1, output_tokens: 2 },
|
||||
}
|
||||
const json = serializeTeamRunResult(tr, { pretty: false, includeMessages: false })
|
||||
expect(json.success).toBe(true)
|
||||
expect((json.agentResults as Record<string, unknown>)['alice']).toMatchObject({
|
||||
success: true,
|
||||
output: 'ok',
|
||||
})
|
||||
expect((json.agentResults as Record<string, unknown>)['alice']).not.toHaveProperty('messages')
|
||||
})
|
||||
|
||||
it('includes messages when requested', () => {
|
||||
const ar: AgentRunResult = {
|
||||
success: true,
|
||||
output: 'x',
|
||||
messages: [{ role: 'user', content: [{ type: 'text', text: 'hi' }] }],
|
||||
tokenUsage: { input_tokens: 0, output_tokens: 0 },
|
||||
toolCalls: [],
|
||||
}
|
||||
const tr: TeamRunResult = {
|
||||
success: true,
|
||||
agentResults: new Map([['bob', ar]]),
|
||||
totalTokenUsage: { input_tokens: 0, output_tokens: 0 },
|
||||
}
|
||||
const json = serializeTeamRunResult(tr, { pretty: false, includeMessages: true })
|
||||
expect(serializeAgentResult(ar, true).messages).toHaveLength(1)
|
||||
expect((json.agentResults as Record<string, unknown>)['bob']).toHaveProperty('messages')
|
||||
})
|
||||
})
|
||||
|
||||
describe('EXIT', () => {
|
||||
it('uses stable numeric codes', () => {
|
||||
expect(EXIT.SUCCESS).toBe(0)
|
||||
expect(EXIT.RUN_FAILED).toBe(1)
|
||||
expect(EXIT.USAGE).toBe(2)
|
||||
expect(EXIT.INTERNAL).toBe(3)
|
||||
})
|
||||
})
|
||||
|
|
@ -0,0 +1,626 @@
|
|||
import { describe, it, expect, vi } from 'vitest'
|
||||
import { z } from 'zod'
|
||||
import { AgentRunner } from '../src/agent/runner.js'
|
||||
import { ToolRegistry, defineTool } from '../src/tool/framework.js'
|
||||
import { ToolExecutor } from '../src/tool/executor.js'
|
||||
import type { LLMAdapter, LLMChatOptions, LLMMessage, LLMResponse, TraceEvent } from '../src/types.js'
|
||||
|
||||
function textResponse(text: string): LLMResponse {
|
||||
return {
|
||||
id: `resp-${Math.random().toString(36).slice(2)}`,
|
||||
content: [{ type: 'text', text }],
|
||||
model: 'mock-model',
|
||||
stop_reason: 'end_turn',
|
||||
usage: { input_tokens: 10, output_tokens: 20 },
|
||||
}
|
||||
}
|
||||
|
||||
function toolUseResponse(toolName: string, input: Record<string, unknown>): LLMResponse {
|
||||
return {
|
||||
id: `resp-${Math.random().toString(36).slice(2)}`,
|
||||
content: [{
|
||||
type: 'tool_use',
|
||||
id: `tu-${Math.random().toString(36).slice(2)}`,
|
||||
name: toolName,
|
||||
input,
|
||||
}],
|
||||
model: 'mock-model',
|
||||
stop_reason: 'tool_use',
|
||||
usage: { input_tokens: 15, output_tokens: 25 },
|
||||
}
|
||||
}
|
||||
|
||||
function buildRegistryAndExecutor(): { registry: ToolRegistry; executor: ToolExecutor } {
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(
|
||||
defineTool({
|
||||
name: 'echo',
|
||||
description: 'Echo input',
|
||||
inputSchema: z.object({ message: z.string() }),
|
||||
async execute({ message }) {
|
||||
return { data: message }
|
||||
},
|
||||
}),
|
||||
)
|
||||
return { registry, executor: new ToolExecutor(registry) }
|
||||
}
|
||||
|
||||
describe('AgentRunner contextStrategy', () => {
|
||||
it('keeps baseline behavior when contextStrategy is not set', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
||||
return calls.length === 1
|
||||
? toolUseResponse('echo', { message: 'hello' })
|
||||
: textResponse('done')
|
||||
},
|
||||
async *stream() {
|
||||
/* unused */
|
||||
},
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor()
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 4,
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
expect(calls).toHaveLength(2)
|
||||
expect(calls[0]).toHaveLength(1)
|
||||
expect(calls[1]!.length).toBeGreaterThan(calls[0]!.length)
|
||||
})
|
||||
|
||||
it('sliding-window truncates old turns and preserves the first user message', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 't1' }),
|
||||
toolUseResponse('echo', { message: 't2' }),
|
||||
toolUseResponse('echo', { message: 't3' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() {
|
||||
/* unused */
|
||||
},
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor()
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: { type: 'sliding-window', maxTurns: 1 },
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'original prompt' }] }])
|
||||
|
||||
const laterCall = calls[calls.length - 1]!
|
||||
const firstUserText = laterCall[0]!.content[0]
|
||||
expect(firstUserText).toMatchObject({ type: 'text', text: 'original prompt' })
|
||||
const flattenedText = laterCall.flatMap(m => m.content.filter(c => c.type === 'text'))
|
||||
expect(flattenedText.some(c => c.type === 'text' && c.text.includes('truncated'))).toBe(true)
|
||||
})
|
||||
|
||||
it('summarize strategy replaces old context and emits summary trace call', async () => {
|
||||
const calls: Array<{ messages: LLMMessage[]; options: LLMChatOptions }> = []
|
||||
const traces: TraceEvent[] = []
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 'first turn payload '.repeat(20) }),
|
||||
toolUseResponse('echo', { message: 'second turn payload '.repeat(20) }),
|
||||
textResponse('This is a concise summary.'),
|
||||
textResponse('final answer'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages, options) {
|
||||
calls.push({ messages: messages.map(m => ({ role: m.role, content: m.content })), options })
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() {
|
||||
/* unused */
|
||||
},
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor()
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: { type: 'summarize', maxTokens: 20 },
|
||||
})
|
||||
|
||||
const result = await runner.run(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'start' }] }],
|
||||
{ onTrace: (e) => { traces.push(e) }, runId: 'run-summary', traceAgent: 'context-agent' },
|
||||
)
|
||||
|
||||
const summaryCall = calls.find(c => c.messages.length === 1 && c.options.tools === undefined)
|
||||
expect(summaryCall).toBeDefined()
|
||||
const llmTraces = traces.filter(t => t.type === 'llm_call')
|
||||
expect(llmTraces.some(t => t.type === 'llm_call' && t.phase === 'summary')).toBe(true)
|
||||
|
||||
// Summary adapter usage must count toward RunResult.tokenUsage (maxTokenBudget).
|
||||
expect(result.tokenUsage.input_tokens).toBe(15 + 15 + 10 + 10)
|
||||
expect(result.tokenUsage.output_tokens).toBe(25 + 25 + 20 + 20)
|
||||
|
||||
// After compaction, summary text is folded into the next user turn (not a
|
||||
// standalone user message), preserving user/assistant alternation.
|
||||
const turnAfterSummary = calls.find(
|
||||
c => c.messages.some(
|
||||
m => m.role === 'user' && m.content.some(
|
||||
b => b.type === 'text' && b.text.includes('[Conversation summary]'),
|
||||
),
|
||||
),
|
||||
)
|
||||
expect(turnAfterSummary).toBeDefined()
|
||||
const rolesAfterFirstUser = turnAfterSummary!.messages.map(m => m.role).join(',')
|
||||
expect(rolesAfterFirstUser).not.toMatch(/^user,user/)
|
||||
})
|
||||
|
||||
it('custom strategy calls compress callback and uses returned messages', async () => {
|
||||
const compress = vi.fn((messages: LLMMessage[]) => messages.slice(-1))
|
||||
const calls: LLMMessage[][] = []
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 'hello' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() {
|
||||
/* unused */
|
||||
},
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor()
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 4,
|
||||
contextStrategy: {
|
||||
type: 'custom',
|
||||
compress,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'custom prompt' }] }])
|
||||
|
||||
expect(compress).toHaveBeenCalledOnce()
|
||||
expect(calls[1]).toHaveLength(1)
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// compact strategy
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('compact strategy', () => {
|
||||
const longText = 'x'.repeat(3000)
|
||||
const longToolResult = 'result-data '.repeat(100) // ~1200 chars
|
||||
|
||||
function buildMultiTurnAdapter(
|
||||
responseCount: number,
|
||||
calls: LLMMessage[][],
|
||||
): LLMAdapter {
|
||||
const responses: LLMResponse[] = []
|
||||
for (let i = 0; i < responseCount - 1; i++) {
|
||||
responses.push(toolUseResponse('echo', { message: `turn-${i}` }))
|
||||
}
|
||||
responses.push(textResponse('done'))
|
||||
let idx = 0
|
||||
return {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
}
|
||||
|
||||
/** Build a registry with an echo tool that returns a fixed result string. */
|
||||
function buildEchoRegistry(result: string): { registry: ToolRegistry; executor: ToolExecutor } {
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(
|
||||
defineTool({
|
||||
name: 'echo',
|
||||
description: 'Echo input',
|
||||
inputSchema: z.object({ message: z.string() }),
|
||||
async execute() {
|
||||
return { data: result }
|
||||
},
|
||||
}),
|
||||
)
|
||||
return { registry, executor: new ToolExecutor(registry) }
|
||||
}
|
||||
|
||||
it('does not activate below maxTokens threshold', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(3, calls)
|
||||
const { registry, executor } = buildEchoRegistry('short')
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: { type: 'compact', maxTokens: 999999 },
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// On the 3rd call (turn 3), all previous messages should still be intact
|
||||
// because estimated tokens are way below the threshold.
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
const allToolResults = lastCall.flatMap(m =>
|
||||
m.content.filter(b => b.type === 'tool_result'),
|
||||
)
|
||||
for (const tr of allToolResults) {
|
||||
if (tr.type === 'tool_result') {
|
||||
expect(tr.content).not.toContain('compacted')
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('compresses old tool_result blocks when tokens exceed threshold', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(4, calls)
|
||||
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20, // very low to always trigger
|
||||
preserveRecentTurns: 1, // only protect the most recent turn
|
||||
minToolResultChars: 100,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// On the last call, old tool results should have compact markers.
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
const toolResults = lastCall.flatMap(m =>
|
||||
m.content.filter(b => b.type === 'tool_result'),
|
||||
)
|
||||
const compacted = toolResults.filter(
|
||||
b => b.type === 'tool_result' && b.content.includes('compacted'),
|
||||
)
|
||||
expect(compacted.length).toBeGreaterThan(0)
|
||||
// Marker should include tool name.
|
||||
for (const tr of compacted) {
|
||||
if (tr.type === 'tool_result') {
|
||||
expect(tr.content).toMatch(/\[Tool result: echo/)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('preserves the first user message', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(4, calls)
|
||||
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 1,
|
||||
minToolResultChars: 100,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'original prompt' }] }])
|
||||
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
const firstUser = lastCall.find(m => m.role === 'user')!
|
||||
expect(firstUser.content[0]).toMatchObject({ type: 'text', text: 'original prompt' })
|
||||
})
|
||||
|
||||
it('preserves tool_use blocks in old turns', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(4, calls)
|
||||
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 1,
|
||||
minToolResultChars: 100,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Every assistant message should still have its tool_use block.
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
const assistantMsgs = lastCall.filter(m => m.role === 'assistant')
|
||||
for (const msg of assistantMsgs) {
|
||||
const toolUses = msg.content.filter(b => b.type === 'tool_use')
|
||||
// The last assistant message is "done" (text only), others have tool_use.
|
||||
if (msg.content.some(b => b.type === 'text' && b.text === 'done')) continue
|
||||
expect(toolUses.length).toBeGreaterThan(0)
|
||||
}
|
||||
})
|
||||
|
||||
it('preserves error tool_result blocks', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const responses: LLMResponse[] = [
|
||||
toolUseResponse('echo', { message: 'will-fail' }),
|
||||
toolUseResponse('echo', { message: 'ok' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
// Tool that fails on first call, succeeds on second.
|
||||
let callCount = 0
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(
|
||||
defineTool({
|
||||
name: 'echo',
|
||||
description: 'Echo input',
|
||||
inputSchema: z.object({ message: z.string() }),
|
||||
async execute() {
|
||||
callCount++
|
||||
if (callCount === 1) {
|
||||
throw new Error('deliberate error '.repeat(40))
|
||||
}
|
||||
return { data: longToolResult }
|
||||
},
|
||||
}),
|
||||
)
|
||||
const executor = new ToolExecutor(registry)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 1,
|
||||
minToolResultChars: 50,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
const errorResults = lastCall.flatMap(m =>
|
||||
m.content.filter(b => b.type === 'tool_result' && b.is_error),
|
||||
)
|
||||
// Error results should still have their original content (not compacted).
|
||||
for (const er of errorResults) {
|
||||
if (er.type === 'tool_result') {
|
||||
expect(er.content).not.toContain('compacted')
|
||||
expect(er.content).toContain('deliberate error')
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('does not re-compress markers from compressToolResults', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(4, calls)
|
||||
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
compressToolResults: { minChars: 100 },
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 1,
|
||||
minToolResultChars: 10,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
const allToolResults = lastCall.flatMap(m =>
|
||||
m.content.filter(b => b.type === 'tool_result'),
|
||||
)
|
||||
// No result should contain nested markers.
|
||||
for (const tr of allToolResults) {
|
||||
if (tr.type === 'tool_result') {
|
||||
// Should not have a compact marker wrapping another marker.
|
||||
const markerCount = (tr.content.match(/\[Tool/g) || []).length
|
||||
expect(markerCount).toBeLessThanOrEqual(1)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('truncates long assistant text blocks in old turns', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const responses: LLMResponse[] = [
|
||||
// First turn: assistant with long text + tool_use
|
||||
{
|
||||
id: 'r1',
|
||||
content: [
|
||||
{ type: 'text', text: longText },
|
||||
{ type: 'tool_use', id: 'tu-1', name: 'echo', input: { message: 'hi' } },
|
||||
],
|
||||
model: 'mock-model',
|
||||
stop_reason: 'tool_use',
|
||||
usage: { input_tokens: 10, output_tokens: 20 },
|
||||
},
|
||||
toolUseResponse('echo', { message: 'turn2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: m.content })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildEchoRegistry('short')
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 1,
|
||||
minTextBlockChars: 500,
|
||||
textBlockExcerptChars: 100,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
// The first assistant message (old zone) should have its text truncated.
|
||||
const firstAssistant = lastCall.find(m => m.role === 'assistant')!
|
||||
const textBlocks = firstAssistant.content.filter(b => b.type === 'text')
|
||||
const truncated = textBlocks.find(
|
||||
b => b.type === 'text' && b.text.includes('truncated'),
|
||||
)
|
||||
expect(truncated).toBeDefined()
|
||||
if (truncated && truncated.type === 'text') {
|
||||
expect(truncated.text.length).toBeLessThan(longText.length)
|
||||
expect(truncated.text).toContain(`${longText.length} chars total`)
|
||||
}
|
||||
})
|
||||
|
||||
it('keeps recent turns intact within preserveRecentTurns', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(4, calls)
|
||||
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 1,
|
||||
minToolResultChars: 100,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// The most recent tool_result (last user message with tool_result) should
|
||||
// still contain the original long content.
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
const userMsgs = lastCall.filter(m => m.role === 'user')
|
||||
const lastUserWithToolResult = [...userMsgs]
|
||||
.reverse()
|
||||
.find(m => m.content.some(b => b.type === 'tool_result'))
|
||||
expect(lastUserWithToolResult).toBeDefined()
|
||||
const recentTr = lastUserWithToolResult!.content.find(b => b.type === 'tool_result')
|
||||
if (recentTr && recentTr.type === 'tool_result') {
|
||||
expect(recentTr.content).not.toContain('compacted')
|
||||
expect(recentTr.content).toContain('result-data')
|
||||
}
|
||||
})
|
||||
|
||||
it('does not compact when all turns fit in preserveRecentTurns', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(3, calls)
|
||||
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 10, // way more than actual turns
|
||||
minToolResultChars: 100,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// All tool results should still have original content.
|
||||
const lastCall = calls[calls.length - 1]!
|
||||
const toolResults = lastCall.flatMap(m =>
|
||||
m.content.filter(b => b.type === 'tool_result'),
|
||||
)
|
||||
for (const tr of toolResults) {
|
||||
if (tr.type === 'tool_result') {
|
||||
expect(tr.content).not.toContain('compacted')
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('maintains correct role alternation after compaction', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(5, calls)
|
||||
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 10,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 1,
|
||||
minToolResultChars: 100,
|
||||
},
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Check all LLM calls for role alternation.
|
||||
for (const callMsgs of calls) {
|
||||
for (let i = 1; i < callMsgs.length; i++) {
|
||||
expect(callMsgs[i]!.role).not.toBe(callMsgs[i - 1]!.role)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('returns ZERO_USAGE (no LLM cost from compaction)', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const adapter = buildMultiTurnAdapter(4, calls)
|
||||
const { registry, executor } = buildEchoRegistry(longToolResult)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 8,
|
||||
contextStrategy: {
|
||||
type: 'compact',
|
||||
maxTokens: 20,
|
||||
preserveRecentTurns: 1,
|
||||
minToolResultChars: 100,
|
||||
},
|
||||
})
|
||||
|
||||
const result = await runner.run([
|
||||
{ role: 'user', content: [{ type: 'text', text: 'start' }] },
|
||||
])
|
||||
|
||||
// Token usage should only reflect the 4 actual LLM calls (no extra from compaction).
|
||||
// Each toolUseResponse: input=15, output=25. textResponse: input=10, output=20.
|
||||
// 3 tool calls + 1 final = (15*3 + 10) input, (25*3 + 20) output.
|
||||
expect(result.tokenUsage.input_tokens).toBe(15 * 3 + 10)
|
||||
expect(result.tokenUsage.output_tokens).toBe(25 * 3 + 20)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
@ -0,0 +1,405 @@
|
|||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
|
||||
import { textMsg, chatOpts, toolDef, collectEvents } from './helpers/llm-fixtures.js'
|
||||
import type { LLMResponse, StreamEvent, ToolUseBlock } from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock OpenAI SDK (Copilot uses it under the hood)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const mockCreate = vi.hoisted(() => vi.fn())
|
||||
const OpenAIMock = vi.hoisted(() =>
|
||||
vi.fn(() => ({
|
||||
chat: { completions: { create: mockCreate } },
|
||||
})),
|
||||
)
|
||||
|
||||
vi.mock('openai', () => ({
|
||||
default: OpenAIMock,
|
||||
OpenAI: OpenAIMock,
|
||||
}))
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock global fetch for token management
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const originalFetch = globalThis.fetch
|
||||
|
||||
function mockFetchForToken(sessionToken = 'cop_session_abc', expiresAt?: number) {
|
||||
const exp = expiresAt ?? Math.floor(Date.now() / 1000) + 3600
|
||||
return vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ token: sessionToken, expires_at: exp }),
|
||||
text: () => Promise.resolve(''),
|
||||
})
|
||||
}
|
||||
|
||||
import { CopilotAdapter, getCopilotMultiplier, formatCopilotMultiplier } from '../src/llm/copilot.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function makeCompletion(overrides: Record<string, unknown> = {}) {
|
||||
return {
|
||||
id: 'chatcmpl-cop',
|
||||
model: 'claude-sonnet-4',
|
||||
choices: [{
|
||||
index: 0,
|
||||
message: { role: 'assistant', content: 'Hello from Copilot', tool_calls: undefined },
|
||||
finish_reason: 'stop',
|
||||
}],
|
||||
usage: { prompt_tokens: 8, completion_tokens: 4 },
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
async function* makeChunks(chunks: Array<Record<string, unknown>>) {
|
||||
for (const chunk of chunks) yield chunk
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('CopilotAdapter', () => {
|
||||
let savedEnv: Record<string, string | undefined>
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
savedEnv = {
|
||||
GITHUB_COPILOT_TOKEN: process.env['GITHUB_COPILOT_TOKEN'],
|
||||
GITHUB_TOKEN: process.env['GITHUB_TOKEN'],
|
||||
}
|
||||
delete process.env['GITHUB_COPILOT_TOKEN']
|
||||
delete process.env['GITHUB_TOKEN']
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
globalThis.fetch = originalFetch
|
||||
for (const [key, val] of Object.entries(savedEnv)) {
|
||||
if (val === undefined) delete process.env[key]
|
||||
else process.env[key] = val
|
||||
}
|
||||
})
|
||||
|
||||
// =========================================================================
|
||||
// Constructor & token resolution
|
||||
// =========================================================================
|
||||
|
||||
describe('constructor', () => {
|
||||
it('accepts string apiKey as first argument', () => {
|
||||
const adapter = new CopilotAdapter('gh_token_123')
|
||||
expect(adapter.name).toBe('copilot')
|
||||
})
|
||||
|
||||
it('accepts options object with apiKey', () => {
|
||||
const adapter = new CopilotAdapter({ apiKey: 'gh_token_456' })
|
||||
expect(adapter.name).toBe('copilot')
|
||||
})
|
||||
|
||||
it('falls back to GITHUB_COPILOT_TOKEN env var', () => {
|
||||
process.env['GITHUB_COPILOT_TOKEN'] = 'env_copilot_token'
|
||||
const adapter = new CopilotAdapter()
|
||||
expect(adapter.name).toBe('copilot')
|
||||
})
|
||||
|
||||
it('falls back to GITHUB_TOKEN env var', () => {
|
||||
process.env['GITHUB_TOKEN'] = 'env_gh_token'
|
||||
const adapter = new CopilotAdapter()
|
||||
expect(adapter.name).toBe('copilot')
|
||||
})
|
||||
})
|
||||
|
||||
// =========================================================================
|
||||
// Token management
|
||||
// =========================================================================
|
||||
|
||||
describe('token management', () => {
|
||||
it('exchanges GitHub token for Copilot session token', async () => {
|
||||
const fetchMock = mockFetchForToken('session_xyz')
|
||||
globalThis.fetch = fetchMock
|
||||
const adapter = new CopilotAdapter('gh_token')
|
||||
mockCreate.mockResolvedValue(makeCompletion())
|
||||
|
||||
await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
|
||||
// fetch was called to exchange token
|
||||
expect(fetchMock).toHaveBeenCalledWith(
|
||||
'https://api.github.com/copilot_internal/v2/token',
|
||||
expect.objectContaining({
|
||||
method: 'GET',
|
||||
headers: expect.objectContaining({
|
||||
Authorization: 'token gh_token',
|
||||
}),
|
||||
}),
|
||||
)
|
||||
|
||||
// OpenAI client was created with session token
|
||||
expect(OpenAIMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
apiKey: 'session_xyz',
|
||||
baseURL: 'https://api.githubcopilot.com',
|
||||
}),
|
||||
)
|
||||
})
|
||||
|
||||
it('caches session token and reuses on second call', async () => {
|
||||
const fetchMock = mockFetchForToken()
|
||||
globalThis.fetch = fetchMock
|
||||
const adapter = new CopilotAdapter('gh_token')
|
||||
mockCreate.mockResolvedValue(makeCompletion())
|
||||
|
||||
await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
await adapter.chat([textMsg('user', 'Hi again')], chatOpts())
|
||||
|
||||
// fetch should only be called once (cached)
|
||||
expect(fetchMock).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
it('refreshes token when near expiry (within 60s)', async () => {
|
||||
const nowSec = Math.floor(Date.now() / 1000)
|
||||
// First call: token expires in 30 seconds (within 60s grace)
|
||||
let callCount = 0
|
||||
globalThis.fetch = vi.fn().mockImplementation(() => {
|
||||
callCount++
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({
|
||||
token: `session_${callCount}`,
|
||||
expires_at: callCount === 1 ? nowSec + 30 : nowSec + 3600,
|
||||
}),
|
||||
text: () => Promise.resolve(''),
|
||||
})
|
||||
})
|
||||
|
||||
const adapter = new CopilotAdapter('gh_token')
|
||||
mockCreate.mockResolvedValue(makeCompletion())
|
||||
|
||||
await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
// Token is within 60s of expiry, should refresh
|
||||
await adapter.chat([textMsg('user', 'Hi again')], chatOpts())
|
||||
|
||||
expect(callCount).toBe(2)
|
||||
})
|
||||
|
||||
it('concurrent requests share a single refresh promise', async () => {
|
||||
let resolveToken: ((v: unknown) => void) | undefined
|
||||
const slowFetch = vi.fn().mockImplementation(() => {
|
||||
return new Promise((resolve) => {
|
||||
resolveToken = resolve
|
||||
})
|
||||
})
|
||||
globalThis.fetch = slowFetch
|
||||
|
||||
const adapter = new CopilotAdapter('gh_token')
|
||||
mockCreate.mockResolvedValue(makeCompletion())
|
||||
|
||||
// Fire two concurrent requests
|
||||
const p1 = adapter.chat([textMsg('user', 'A')], chatOpts())
|
||||
const p2 = adapter.chat([textMsg('user', 'B')], chatOpts())
|
||||
|
||||
// Resolve the single in-flight fetch
|
||||
resolveToken!({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({
|
||||
token: 'shared_session',
|
||||
expires_at: Math.floor(Date.now() / 1000) + 3600,
|
||||
}),
|
||||
text: () => Promise.resolve(''),
|
||||
})
|
||||
|
||||
await Promise.all([p1, p2])
|
||||
|
||||
// fetch was called only once (mutex prevented double refresh)
|
||||
expect(slowFetch).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
it('throws on failed token exchange', async () => {
|
||||
globalThis.fetch = vi.fn().mockResolvedValue({
|
||||
ok: false,
|
||||
status: 401,
|
||||
text: () => Promise.resolve('Unauthorized'),
|
||||
statusText: 'Unauthorized',
|
||||
})
|
||||
|
||||
const adapter = new CopilotAdapter('bad_token')
|
||||
mockCreate.mockResolvedValue(makeCompletion())
|
||||
|
||||
await expect(
|
||||
adapter.chat([textMsg('user', 'Hi')], chatOpts()),
|
||||
).rejects.toThrow('Copilot token exchange failed')
|
||||
})
|
||||
})
|
||||
|
||||
// =========================================================================
|
||||
// chat()
|
||||
// =========================================================================
|
||||
|
||||
describe('chat()', () => {
|
||||
let adapter: CopilotAdapter
|
||||
|
||||
beforeEach(() => {
|
||||
globalThis.fetch = mockFetchForToken()
|
||||
adapter = new CopilotAdapter('gh_token')
|
||||
})
|
||||
|
||||
it('creates OpenAI client with Copilot-specific headers and baseURL', async () => {
|
||||
mockCreate.mockResolvedValue(makeCompletion())
|
||||
|
||||
await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
|
||||
expect(OpenAIMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
baseURL: 'https://api.githubcopilot.com',
|
||||
defaultHeaders: expect.objectContaining({
|
||||
'Copilot-Integration-Id': 'vscode-chat',
|
||||
'Editor-Version': 'vscode/1.100.0',
|
||||
}),
|
||||
}),
|
||||
)
|
||||
})
|
||||
|
||||
it('returns LLMResponse from completion', async () => {
|
||||
mockCreate.mockResolvedValue(makeCompletion())
|
||||
|
||||
const result = await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
|
||||
expect(result).toEqual({
|
||||
id: 'chatcmpl-cop',
|
||||
content: [{ type: 'text', text: 'Hello from Copilot' }],
|
||||
model: 'claude-sonnet-4',
|
||||
stop_reason: 'end_turn',
|
||||
usage: { input_tokens: 8, output_tokens: 4 },
|
||||
})
|
||||
})
|
||||
|
||||
it('passes tools and temperature through', async () => {
|
||||
mockCreate.mockResolvedValue(makeCompletion())
|
||||
const tool = toolDef('search')
|
||||
|
||||
await adapter.chat(
|
||||
[textMsg('user', 'Hi')],
|
||||
chatOpts({ tools: [tool], temperature: 0.5 }),
|
||||
)
|
||||
|
||||
const callArgs = mockCreate.mock.calls[0][0]
|
||||
expect(callArgs.tools[0].function.name).toBe('search')
|
||||
expect(callArgs.temperature).toBe(0.5)
|
||||
expect(callArgs.stream).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
// =========================================================================
|
||||
// stream()
|
||||
// =========================================================================
|
||||
|
||||
describe('stream()', () => {
|
||||
let adapter: CopilotAdapter
|
||||
|
||||
beforeEach(() => {
|
||||
globalThis.fetch = mockFetchForToken()
|
||||
adapter = new CopilotAdapter('gh_token')
|
||||
})
|
||||
|
||||
it('yields text and done events', async () => {
|
||||
mockCreate.mockResolvedValue(makeChunks([
|
||||
{ id: 'c1', model: 'gpt-4o', choices: [{ index: 0, delta: { content: 'Hi' }, finish_reason: null }], usage: null },
|
||||
{ id: 'c1', model: 'gpt-4o', choices: [{ index: 0, delta: {}, finish_reason: 'stop' }], usage: null },
|
||||
{ id: 'c1', model: 'gpt-4o', choices: [], usage: { prompt_tokens: 5, completion_tokens: 2 } },
|
||||
]))
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
expect(events.filter(e => e.type === 'text')).toEqual([
|
||||
{ type: 'text', data: 'Hi' },
|
||||
])
|
||||
const done = events.find(e => e.type === 'done')
|
||||
expect((done!.data as LLMResponse).usage).toEqual({ input_tokens: 5, output_tokens: 2 })
|
||||
})
|
||||
|
||||
it('yields tool_use events from streamed tool calls', async () => {
|
||||
mockCreate.mockResolvedValue(makeChunks([
|
||||
{
|
||||
id: 'c1', model: 'gpt-4o',
|
||||
choices: [{ index: 0, delta: { tool_calls: [{ index: 0, id: 'call_1', function: { name: 'search', arguments: '{"q":"x"}' } }] }, finish_reason: null }],
|
||||
usage: null,
|
||||
},
|
||||
{ id: 'c1', model: 'gpt-4o', choices: [{ index: 0, delta: {}, finish_reason: 'tool_calls' }], usage: null },
|
||||
{ id: 'c1', model: 'gpt-4o', choices: [], usage: { prompt_tokens: 5, completion_tokens: 3 } },
|
||||
]))
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const toolEvents = events.filter(e => e.type === 'tool_use')
|
||||
expect(toolEvents).toHaveLength(1)
|
||||
expect((toolEvents[0].data as ToolUseBlock).name).toBe('search')
|
||||
})
|
||||
|
||||
it('yields error event on failure', async () => {
|
||||
mockCreate.mockResolvedValue(
|
||||
(async function* () { throw new Error('Copilot down') })(),
|
||||
)
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
expect(events.filter(e => e.type === 'error')).toHaveLength(1)
|
||||
})
|
||||
})
|
||||
|
||||
// =========================================================================
|
||||
// getCopilotMultiplier()
|
||||
// =========================================================================
|
||||
|
||||
describe('getCopilotMultiplier()', () => {
|
||||
it('returns 0 for included models', () => {
|
||||
expect(getCopilotMultiplier('gpt-4.1')).toBe(0)
|
||||
expect(getCopilotMultiplier('gpt-4o')).toBe(0)
|
||||
expect(getCopilotMultiplier('gpt-5-mini')).toBe(0)
|
||||
})
|
||||
|
||||
it('returns 0.25 for grok models', () => {
|
||||
expect(getCopilotMultiplier('grok-code-fast-1')).toBe(0.25)
|
||||
})
|
||||
|
||||
it('returns 0.33 for haiku, gemini-3-flash, etc.', () => {
|
||||
expect(getCopilotMultiplier('claude-haiku-4.5')).toBe(0.33)
|
||||
expect(getCopilotMultiplier('gemini-3-flash')).toBe(0.33)
|
||||
})
|
||||
|
||||
it('returns 1 for sonnet, gemini-pro, gpt-5.x', () => {
|
||||
expect(getCopilotMultiplier('claude-sonnet-4')).toBe(1)
|
||||
expect(getCopilotMultiplier('gemini-2.5-pro')).toBe(1)
|
||||
expect(getCopilotMultiplier('gpt-5.1')).toBe(1)
|
||||
})
|
||||
|
||||
it('returns 3 for claude-opus (non-fast)', () => {
|
||||
expect(getCopilotMultiplier('claude-opus-4.5')).toBe(3)
|
||||
})
|
||||
|
||||
it('returns 30 for claude-opus fast', () => {
|
||||
expect(getCopilotMultiplier('claude-opus-4.6-fast')).toBe(30)
|
||||
})
|
||||
|
||||
it('returns 1 for unknown models', () => {
|
||||
expect(getCopilotMultiplier('some-new-model')).toBe(1)
|
||||
})
|
||||
})
|
||||
|
||||
// =========================================================================
|
||||
// formatCopilotMultiplier()
|
||||
// =========================================================================
|
||||
|
||||
describe('formatCopilotMultiplier()', () => {
|
||||
it('returns "included (0\u00d7)" for 0', () => {
|
||||
expect(formatCopilotMultiplier(0)).toBe('included (0\u00d7)')
|
||||
})
|
||||
|
||||
it('returns "1\u00d7 premium request" for 1', () => {
|
||||
expect(formatCopilotMultiplier(1)).toBe('1\u00d7 premium request')
|
||||
})
|
||||
|
||||
it('returns "0.33\u00d7 premium request" for 0.33', () => {
|
||||
expect(formatCopilotMultiplier(0.33)).toBe('0.33\u00d7 premium request')
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock OpenAI constructor (must be hoisted for Vitest)
|
||||
// ---------------------------------------------------------------------------
|
||||
const OpenAIMock = vi.hoisted(() => vi.fn())
|
||||
|
||||
vi.mock('openai', () => ({
|
||||
default: OpenAIMock,
|
||||
}))
|
||||
|
||||
import { DeepSeekAdapter } from '../src/llm/deepseek.js'
|
||||
import { createAdapter } from '../src/llm/adapter.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DeepSeekAdapter tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('DeepSeekAdapter', () => {
|
||||
beforeEach(() => {
|
||||
OpenAIMock.mockClear()
|
||||
})
|
||||
|
||||
it('has name "deepseek"', () => {
|
||||
const adapter = new DeepSeekAdapter()
|
||||
expect(adapter.name).toBe('deepseek')
|
||||
})
|
||||
|
||||
it('uses DEEPSEEK_API_KEY by default', () => {
|
||||
const original = process.env['DEEPSEEK_API_KEY']
|
||||
process.env['DEEPSEEK_API_KEY'] = 'deepseek-test-key-123'
|
||||
|
||||
try {
|
||||
new DeepSeekAdapter()
|
||||
expect(OpenAIMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
apiKey: 'deepseek-test-key-123',
|
||||
baseURL: 'https://api.deepseek.com/v1',
|
||||
})
|
||||
)
|
||||
} finally {
|
||||
if (original === undefined) {
|
||||
delete process.env['DEEPSEEK_API_KEY']
|
||||
} else {
|
||||
process.env['DEEPSEEK_API_KEY'] = original
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('uses official DeepSeek baseURL by default', () => {
|
||||
new DeepSeekAdapter('some-key')
|
||||
expect(OpenAIMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
apiKey: 'some-key',
|
||||
baseURL: 'https://api.deepseek.com/v1',
|
||||
})
|
||||
)
|
||||
})
|
||||
|
||||
it('allows overriding apiKey and baseURL', () => {
|
||||
new DeepSeekAdapter('custom-key', 'https://custom.endpoint/v1')
|
||||
expect(OpenAIMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
apiKey: 'custom-key',
|
||||
baseURL: 'https://custom.endpoint/v1',
|
||||
})
|
||||
)
|
||||
})
|
||||
|
||||
it('createAdapter("deepseek") returns DeepSeekAdapter instance', async () => {
|
||||
const adapter = await createAdapter('deepseek')
|
||||
expect(adapter).toBeInstanceOf(DeepSeekAdapter)
|
||||
})
|
||||
})
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
/**
|
||||
* E2E tests for AnthropicAdapter against the real API.
|
||||
*
|
||||
* Skipped by default. Run with: npm run test:e2e
|
||||
* Requires: ANTHROPIC_API_KEY environment variable
|
||||
*/
|
||||
import { describe, it, expect } from 'vitest'
|
||||
import { AnthropicAdapter } from '../../src/llm/anthropic.js'
|
||||
import type { LLMResponse, StreamEvent, ToolUseBlock } from '../../src/types.js'
|
||||
|
||||
const describeE2E = process.env['RUN_E2E'] ? describe : describe.skip
|
||||
|
||||
describeE2E('AnthropicAdapter E2E', () => {
|
||||
const adapter = new AnthropicAdapter()
|
||||
const model = 'claude-haiku-4-5-20251001'
|
||||
|
||||
const weatherTool = {
|
||||
name: 'get_weather',
|
||||
description: 'Get the weather for a city',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: { city: { type: 'string' } },
|
||||
required: ['city'],
|
||||
},
|
||||
}
|
||||
|
||||
it('chat() returns a text response', async () => {
|
||||
const result = await adapter.chat(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'Say "hello" and nothing else.' }] }],
|
||||
{ model, maxTokens: 50, temperature: 0 },
|
||||
)
|
||||
|
||||
expect(result.id).toBeTruthy()
|
||||
expect(result.content.length).toBeGreaterThan(0)
|
||||
expect(result.content[0].type).toBe('text')
|
||||
expect(result.usage.input_tokens).toBeGreaterThan(0)
|
||||
expect(result.stop_reason).toBe('end_turn')
|
||||
}, 30_000)
|
||||
|
||||
it('chat() handles tool use', async () => {
|
||||
const result = await adapter.chat(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'What is the weather in Tokyo? Use the get_weather tool.' }] }],
|
||||
{ model, maxTokens: 100, temperature: 0, tools: [weatherTool] },
|
||||
)
|
||||
|
||||
const toolBlocks = result.content.filter(b => b.type === 'tool_use')
|
||||
expect(toolBlocks.length).toBeGreaterThan(0)
|
||||
expect((toolBlocks[0] as ToolUseBlock).name).toBe('get_weather')
|
||||
expect(result.stop_reason).toBe('tool_use')
|
||||
}, 30_000)
|
||||
|
||||
it('stream() yields text events and a done event', async () => {
|
||||
const events: StreamEvent[] = []
|
||||
for await (const event of adapter.stream(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'Say "hi".' }] }],
|
||||
{ model, maxTokens: 50, temperature: 0 },
|
||||
)) {
|
||||
events.push(event)
|
||||
}
|
||||
|
||||
const textEvents = events.filter(e => e.type === 'text')
|
||||
expect(textEvents.length).toBeGreaterThan(0)
|
||||
|
||||
const doneEvents = events.filter(e => e.type === 'done')
|
||||
expect(doneEvents).toHaveLength(1)
|
||||
const response = doneEvents[0].data as LLMResponse
|
||||
expect(response.usage.input_tokens).toBeGreaterThan(0)
|
||||
}, 30_000)
|
||||
|
||||
it('stream() handles tool use', async () => {
|
||||
const events: StreamEvent[] = []
|
||||
for await (const event of adapter.stream(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'Get weather in Paris. Use the tool.' }] }],
|
||||
{ model, maxTokens: 100, temperature: 0, tools: [weatherTool] },
|
||||
)) {
|
||||
events.push(event)
|
||||
}
|
||||
|
||||
const toolEvents = events.filter(e => e.type === 'tool_use')
|
||||
expect(toolEvents.length).toBeGreaterThan(0)
|
||||
expect((toolEvents[0].data as ToolUseBlock).name).toBe('get_weather')
|
||||
}, 30_000)
|
||||
})
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
/**
|
||||
* E2E tests for GeminiAdapter against the real API.
|
||||
*
|
||||
* Skipped by default. Run with: npm run test:e2e
|
||||
* Requires: GEMINI_API_KEY or GOOGLE_API_KEY environment variable
|
||||
*/
|
||||
import { describe, it, expect } from 'vitest'
|
||||
import { GeminiAdapter } from '../../src/llm/gemini.js'
|
||||
import type { LLMResponse, StreamEvent, ToolUseBlock } from '../../src/types.js'
|
||||
|
||||
const describeE2E = process.env['RUN_E2E'] ? describe : describe.skip
|
||||
|
||||
describeE2E('GeminiAdapter E2E', () => {
|
||||
const adapter = new GeminiAdapter()
|
||||
const model = 'gemini-2.0-flash'
|
||||
|
||||
const weatherTool = {
|
||||
name: 'get_weather',
|
||||
description: 'Get the weather for a city',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: { city: { type: 'string' } },
|
||||
required: ['city'],
|
||||
},
|
||||
}
|
||||
|
||||
it('chat() returns a text response', async () => {
|
||||
const result = await adapter.chat(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'Say "hello" and nothing else.' }] }],
|
||||
{ model, maxTokens: 50, temperature: 0 },
|
||||
)
|
||||
|
||||
expect(result.id).toBeTruthy()
|
||||
expect(result.content.length).toBeGreaterThan(0)
|
||||
expect(result.content[0].type).toBe('text')
|
||||
}, 30_000)
|
||||
|
||||
it('chat() handles tool use', async () => {
|
||||
const result = await adapter.chat(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'What is the weather in Tokyo? Use the get_weather tool.' }] }],
|
||||
{ model, maxTokens: 100, temperature: 0, tools: [weatherTool] },
|
||||
)
|
||||
|
||||
const toolBlocks = result.content.filter(b => b.type === 'tool_use')
|
||||
expect(toolBlocks.length).toBeGreaterThan(0)
|
||||
expect((toolBlocks[0] as ToolUseBlock).name).toBe('get_weather')
|
||||
expect(result.stop_reason).toBe('tool_use')
|
||||
}, 30_000)
|
||||
|
||||
it('stream() yields text events and a done event', async () => {
|
||||
const events: StreamEvent[] = []
|
||||
for await (const event of adapter.stream(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'Say "hi".' }] }],
|
||||
{ model, maxTokens: 50, temperature: 0 },
|
||||
)) {
|
||||
events.push(event)
|
||||
}
|
||||
|
||||
const textEvents = events.filter(e => e.type === 'text')
|
||||
expect(textEvents.length).toBeGreaterThan(0)
|
||||
|
||||
const doneEvents = events.filter(e => e.type === 'done')
|
||||
expect(doneEvents).toHaveLength(1)
|
||||
}, 30_000)
|
||||
})
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
/**
|
||||
* E2E tests for OpenAIAdapter against the real API.
|
||||
*
|
||||
* Skipped by default. Run with: npm run test:e2e
|
||||
* Requires: OPENAI_API_KEY environment variable
|
||||
*/
|
||||
import { describe, it, expect } from 'vitest'
|
||||
import { OpenAIAdapter } from '../../src/llm/openai.js'
|
||||
import type { LLMResponse, StreamEvent, ToolUseBlock } from '../../src/types.js'
|
||||
|
||||
const describeE2E = process.env['RUN_E2E'] ? describe : describe.skip
|
||||
|
||||
describeE2E('OpenAIAdapter E2E', () => {
|
||||
const adapter = new OpenAIAdapter()
|
||||
const model = 'gpt-4o-mini'
|
||||
|
||||
const weatherTool = {
|
||||
name: 'get_weather',
|
||||
description: 'Get the weather for a city',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: { city: { type: 'string' } },
|
||||
required: ['city'],
|
||||
},
|
||||
}
|
||||
|
||||
it('chat() returns a text response', async () => {
|
||||
const result = await adapter.chat(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'Say "hello" and nothing else.' }] }],
|
||||
{ model, maxTokens: 50, temperature: 0 },
|
||||
)
|
||||
|
||||
expect(result.id).toBeTruthy()
|
||||
expect(result.content.length).toBeGreaterThan(0)
|
||||
expect(result.content[0].type).toBe('text')
|
||||
expect(result.usage.input_tokens).toBeGreaterThan(0)
|
||||
}, 30_000)
|
||||
|
||||
it('chat() handles tool use', async () => {
|
||||
const result = await adapter.chat(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'What is the weather in Tokyo? Use the get_weather tool.' }] }],
|
||||
{ model, maxTokens: 100, temperature: 0, tools: [weatherTool] },
|
||||
)
|
||||
|
||||
const toolBlocks = result.content.filter(b => b.type === 'tool_use')
|
||||
expect(toolBlocks.length).toBeGreaterThan(0)
|
||||
expect((toolBlocks[0] as ToolUseBlock).name).toBe('get_weather')
|
||||
}, 30_000)
|
||||
|
||||
it('stream() yields text events and a done event', async () => {
|
||||
const events: StreamEvent[] = []
|
||||
for await (const event of adapter.stream(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'Say "hi".' }] }],
|
||||
{ model, maxTokens: 50, temperature: 0 },
|
||||
)) {
|
||||
events.push(event)
|
||||
}
|
||||
|
||||
const textEvents = events.filter(e => e.type === 'text')
|
||||
expect(textEvents.length).toBeGreaterThan(0)
|
||||
|
||||
const doneEvents = events.filter(e => e.type === 'done')
|
||||
expect(doneEvents).toHaveLength(1)
|
||||
const response = doneEvents[0].data as LLMResponse
|
||||
expect(response.usage.input_tokens).toBeGreaterThan(0)
|
||||
}, 30_000)
|
||||
|
||||
it('stream() handles tool use', async () => {
|
||||
const events: StreamEvent[] = []
|
||||
for await (const event of adapter.stream(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'Get weather in Paris. Use the tool.' }] }],
|
||||
{ model, maxTokens: 100, temperature: 0, tools: [weatherTool] },
|
||||
)) {
|
||||
events.push(event)
|
||||
}
|
||||
|
||||
const toolEvents = events.filter(e => e.type === 'tool_use')
|
||||
expect(toolEvents.length).toBeGreaterThan(0)
|
||||
expect((toolEvents[0].data as ToolUseBlock).name).toBe('get_weather')
|
||||
}, 30_000)
|
||||
})
|
||||
|
|
@ -0,0 +1,359 @@
|
|||
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
||||
import { textMsg, toolUseMsg, toolResultMsg, imageMsg, chatOpts, toolDef, collectEvents } from './helpers/llm-fixtures.js'
|
||||
import type { LLMResponse, StreamEvent, ToolUseBlock } from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock GoogleGenAI
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const mockGenerateContent = vi.hoisted(() => vi.fn())
|
||||
const mockGenerateContentStream = vi.hoisted(() => vi.fn())
|
||||
const GoogleGenAIMock = vi.hoisted(() =>
|
||||
vi.fn(() => ({
|
||||
models: {
|
||||
generateContent: mockGenerateContent,
|
||||
generateContentStream: mockGenerateContentStream,
|
||||
},
|
||||
})),
|
||||
)
|
||||
|
||||
vi.mock('@google/genai', () => ({
|
||||
GoogleGenAI: GoogleGenAIMock,
|
||||
FunctionCallingConfigMode: { AUTO: 'AUTO' },
|
||||
}))
|
||||
|
||||
import { GeminiAdapter } from '../src/llm/gemini.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function makeGeminiResponse(parts: Array<Record<string, unknown>>, overrides: Record<string, unknown> = {}) {
|
||||
return {
|
||||
candidates: [{
|
||||
content: { parts },
|
||||
finishReason: 'STOP',
|
||||
...overrides,
|
||||
}],
|
||||
usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 },
|
||||
}
|
||||
}
|
||||
|
||||
async function* asyncGen<T>(items: T[]): AsyncGenerator<T> {
|
||||
for (const item of items) yield item
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('GeminiAdapter (contract)', () => {
|
||||
let adapter: GeminiAdapter
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
adapter = new GeminiAdapter('test-key')
|
||||
})
|
||||
|
||||
// =========================================================================
|
||||
// chat() — message conversion
|
||||
// =========================================================================
|
||||
|
||||
describe('chat() message conversion', () => {
|
||||
it('converts text messages with correct role mapping', async () => {
|
||||
mockGenerateContent.mockResolvedValue(makeGeminiResponse([{ text: 'Hi' }]))
|
||||
|
||||
await adapter.chat(
|
||||
[textMsg('user', 'Hello'), textMsg('assistant', 'Hi')],
|
||||
chatOpts(),
|
||||
)
|
||||
|
||||
const callArgs = mockGenerateContent.mock.calls[0][0]
|
||||
expect(callArgs.contents[0]).toMatchObject({ role: 'user', parts: [{ text: 'Hello' }] })
|
||||
expect(callArgs.contents[1]).toMatchObject({ role: 'model', parts: [{ text: 'Hi' }] })
|
||||
})
|
||||
|
||||
it('converts tool_use blocks to functionCall parts', async () => {
|
||||
mockGenerateContent.mockResolvedValue(makeGeminiResponse([{ text: 'ok' }]))
|
||||
|
||||
await adapter.chat(
|
||||
[toolUseMsg('call_1', 'search', { query: 'test' })],
|
||||
chatOpts(),
|
||||
)
|
||||
|
||||
const parts = mockGenerateContent.mock.calls[0][0].contents[0].parts
|
||||
expect(parts[0].functionCall).toEqual({
|
||||
id: 'call_1',
|
||||
name: 'search',
|
||||
args: { query: 'test' },
|
||||
})
|
||||
})
|
||||
|
||||
it('converts tool_result blocks to functionResponse parts with name lookup', async () => {
|
||||
mockGenerateContent.mockResolvedValue(makeGeminiResponse([{ text: 'ok' }]))
|
||||
|
||||
await adapter.chat(
|
||||
[
|
||||
toolUseMsg('call_1', 'search', { query: 'test' }),
|
||||
toolResultMsg('call_1', 'found it'),
|
||||
],
|
||||
chatOpts(),
|
||||
)
|
||||
|
||||
const resultParts = mockGenerateContent.mock.calls[0][0].contents[1].parts
|
||||
expect(resultParts[0].functionResponse).toMatchObject({
|
||||
id: 'call_1',
|
||||
name: 'search',
|
||||
response: { content: 'found it', isError: false },
|
||||
})
|
||||
})
|
||||
|
||||
it('falls back to tool_use_id as name when no matching tool_use found', async () => {
|
||||
mockGenerateContent.mockResolvedValue(makeGeminiResponse([{ text: 'ok' }]))
|
||||
|
||||
await adapter.chat(
|
||||
[toolResultMsg('unknown_id', 'data')],
|
||||
chatOpts(),
|
||||
)
|
||||
|
||||
const parts = mockGenerateContent.mock.calls[0][0].contents[0].parts
|
||||
expect(parts[0].functionResponse.name).toBe('unknown_id')
|
||||
})
|
||||
|
||||
it('converts image blocks to inlineData parts', async () => {
|
||||
mockGenerateContent.mockResolvedValue(makeGeminiResponse([{ text: 'ok' }]))
|
||||
|
||||
await adapter.chat([imageMsg('image/png', 'base64data')], chatOpts())
|
||||
|
||||
const parts = mockGenerateContent.mock.calls[0][0].contents[0].parts
|
||||
expect(parts[0].inlineData).toEqual({
|
||||
mimeType: 'image/png',
|
||||
data: 'base64data',
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// =========================================================================
|
||||
// chat() — tools & config
|
||||
// =========================================================================
|
||||
|
||||
describe('chat() tools & config', () => {
|
||||
it('converts tools to Gemini format with parametersJsonSchema', async () => {
|
||||
mockGenerateContent.mockResolvedValue(makeGeminiResponse([{ text: 'ok' }]))
|
||||
const tool = toolDef('search', 'Search')
|
||||
|
||||
await adapter.chat([textMsg('user', 'Hi')], chatOpts({ tools: [tool] }))
|
||||
|
||||
const config = mockGenerateContent.mock.calls[0][0].config
|
||||
expect(config.tools[0].functionDeclarations[0]).toEqual({
|
||||
name: 'search',
|
||||
description: 'Search',
|
||||
parametersJsonSchema: tool.inputSchema,
|
||||
})
|
||||
expect(config.toolConfig).toEqual({
|
||||
functionCallingConfig: { mode: 'AUTO' },
|
||||
})
|
||||
})
|
||||
|
||||
it('passes systemInstruction, maxOutputTokens, temperature', async () => {
|
||||
mockGenerateContent.mockResolvedValue(makeGeminiResponse([{ text: 'ok' }]))
|
||||
|
||||
await adapter.chat(
|
||||
[textMsg('user', 'Hi')],
|
||||
chatOpts({ systemPrompt: 'Be helpful', temperature: 0.7, maxTokens: 2048 }),
|
||||
)
|
||||
|
||||
const config = mockGenerateContent.mock.calls[0][0].config
|
||||
expect(config.systemInstruction).toBe('Be helpful')
|
||||
expect(config.temperature).toBe(0.7)
|
||||
expect(config.maxOutputTokens).toBe(2048)
|
||||
})
|
||||
|
||||
it('omits tools/toolConfig when no tools provided', async () => {
|
||||
mockGenerateContent.mockResolvedValue(makeGeminiResponse([{ text: 'ok' }]))
|
||||
|
||||
await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
|
||||
const config = mockGenerateContent.mock.calls[0][0].config
|
||||
expect(config.tools).toBeUndefined()
|
||||
expect(config.toolConfig).toBeUndefined()
|
||||
})
|
||||
})
|
||||
|
||||
// =========================================================================
|
||||
// chat() — response conversion
|
||||
// =========================================================================
|
||||
|
||||
describe('chat() response conversion', () => {
|
||||
it('converts text parts to TextBlock', async () => {
|
||||
mockGenerateContent.mockResolvedValue(makeGeminiResponse([{ text: 'Hello' }]))
|
||||
|
||||
const result = await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
|
||||
expect(result.content[0]).toEqual({ type: 'text', text: 'Hello' })
|
||||
})
|
||||
|
||||
it('converts functionCall parts to ToolUseBlock with existing id', async () => {
|
||||
mockGenerateContent.mockResolvedValue(makeGeminiResponse([
|
||||
{ functionCall: { id: 'call_1', name: 'search', args: { q: 'test' } } },
|
||||
]))
|
||||
|
||||
const result = await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
|
||||
expect(result.content[0]).toEqual({
|
||||
type: 'tool_use',
|
||||
id: 'call_1',
|
||||
name: 'search',
|
||||
input: { q: 'test' },
|
||||
})
|
||||
})
|
||||
|
||||
it('fabricates ID when functionCall has no id field', async () => {
|
||||
mockGenerateContent.mockResolvedValue(makeGeminiResponse([
|
||||
{ functionCall: { name: 'search', args: { q: 'test' } } },
|
||||
]))
|
||||
|
||||
const result = await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
|
||||
const block = result.content[0] as ToolUseBlock
|
||||
expect(block.type).toBe('tool_use')
|
||||
expect(block.id).toMatch(/^gemini-\d+-[a-z0-9]+$/)
|
||||
expect(block.name).toBe('search')
|
||||
})
|
||||
|
||||
it('maps STOP finishReason to end_turn', async () => {
|
||||
mockGenerateContent.mockResolvedValue(makeGeminiResponse([{ text: 'ok' }], { finishReason: 'STOP' }))
|
||||
|
||||
const result = await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
|
||||
expect(result.stop_reason).toBe('end_turn')
|
||||
})
|
||||
|
||||
it('maps MAX_TOKENS finishReason to max_tokens', async () => {
|
||||
mockGenerateContent.mockResolvedValue(makeGeminiResponse([{ text: 'trunc' }], { finishReason: 'MAX_TOKENS' }))
|
||||
|
||||
const result = await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
|
||||
expect(result.stop_reason).toBe('max_tokens')
|
||||
})
|
||||
|
||||
it('maps to tool_use when response contains functionCall (even with STOP)', async () => {
|
||||
mockGenerateContent.mockResolvedValue(makeGeminiResponse(
|
||||
[{ functionCall: { id: 'c1', name: 'search', args: {} } }],
|
||||
{ finishReason: 'STOP' },
|
||||
))
|
||||
|
||||
const result = await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
|
||||
expect(result.stop_reason).toBe('tool_use')
|
||||
})
|
||||
|
||||
it('handles missing usageMetadata (defaults to 0)', async () => {
|
||||
mockGenerateContent.mockResolvedValue({
|
||||
candidates: [{ content: { parts: [{ text: 'ok' }] }, finishReason: 'STOP' }],
|
||||
})
|
||||
|
||||
const result = await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
|
||||
expect(result.usage).toEqual({ input_tokens: 0, output_tokens: 0 })
|
||||
})
|
||||
|
||||
it('handles empty candidates gracefully', async () => {
|
||||
mockGenerateContent.mockResolvedValue({ candidates: [{ content: {} }] })
|
||||
|
||||
const result = await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
|
||||
expect(result.content).toEqual([])
|
||||
})
|
||||
})
|
||||
|
||||
// =========================================================================
|
||||
// stream()
|
||||
// =========================================================================
|
||||
|
||||
describe('stream()', () => {
|
||||
it('yields text events for text parts', async () => {
|
||||
mockGenerateContentStream.mockResolvedValue(
|
||||
asyncGen([
|
||||
makeGeminiResponse([{ text: 'Hello' }]),
|
||||
makeGeminiResponse([{ text: ' world' }]),
|
||||
]),
|
||||
)
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const textEvents = events.filter(e => e.type === 'text')
|
||||
expect(textEvents).toEqual([
|
||||
{ type: 'text', data: 'Hello' },
|
||||
{ type: 'text', data: ' world' },
|
||||
])
|
||||
})
|
||||
|
||||
it('yields tool_use events for functionCall parts', async () => {
|
||||
mockGenerateContentStream.mockResolvedValue(
|
||||
asyncGen([
|
||||
makeGeminiResponse([{ functionCall: { id: 'c1', name: 'search', args: { q: 'test' } } }]),
|
||||
]),
|
||||
)
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const toolEvents = events.filter(e => e.type === 'tool_use')
|
||||
expect(toolEvents).toHaveLength(1)
|
||||
expect((toolEvents[0].data as ToolUseBlock).name).toBe('search')
|
||||
})
|
||||
|
||||
it('accumulates token counts from usageMetadata', async () => {
|
||||
mockGenerateContentStream.mockResolvedValue(
|
||||
asyncGen([
|
||||
{ candidates: [{ content: { parts: [{ text: 'Hi' }] } }], usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 2 } },
|
||||
{ candidates: [{ content: { parts: [{ text: '!' }] }, finishReason: 'STOP' }], usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 } },
|
||||
]),
|
||||
)
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const done = events.find(e => e.type === 'done')
|
||||
const response = done!.data as LLMResponse
|
||||
expect(response.usage).toEqual({ input_tokens: 10, output_tokens: 5 })
|
||||
})
|
||||
|
||||
it('yields done event with correct stop_reason', async () => {
|
||||
mockGenerateContentStream.mockResolvedValue(
|
||||
asyncGen([makeGeminiResponse([{ text: 'ok' }], { finishReason: 'MAX_TOKENS' })]),
|
||||
)
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const done = events.find(e => e.type === 'done')
|
||||
expect((done!.data as LLMResponse).stop_reason).toBe('max_tokens')
|
||||
})
|
||||
|
||||
it('yields error event when stream throws', async () => {
|
||||
mockGenerateContentStream.mockResolvedValue(
|
||||
(async function* () { throw new Error('Gemini error') })(),
|
||||
)
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const errorEvents = events.filter(e => e.type === 'error')
|
||||
expect(errorEvents).toHaveLength(1)
|
||||
expect((errorEvents[0].data as Error).message).toBe('Gemini error')
|
||||
})
|
||||
|
||||
it('handles chunks with no candidates', async () => {
|
||||
mockGenerateContentStream.mockResolvedValue(
|
||||
asyncGen([
|
||||
{ candidates: undefined, usageMetadata: { promptTokenCount: 5, candidatesTokenCount: 0 } },
|
||||
makeGeminiResponse([{ text: 'ok' }]),
|
||||
]),
|
||||
)
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const textEvents = events.filter(e => e.type === 'text')
|
||||
expect(textEvents).toHaveLength(1)
|
||||
expect(textEvents[0].data).toBe('ok')
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
/**
|
||||
* Shared fixture builders for LLM adapter contract tests.
|
||||
*/
|
||||
|
||||
import type {
|
||||
ContentBlock,
|
||||
LLMChatOptions,
|
||||
LLMMessage,
|
||||
LLMToolDef,
|
||||
ImageBlock,
|
||||
TextBlock,
|
||||
ToolResultBlock,
|
||||
ToolUseBlock,
|
||||
} from '../../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Message builders
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function textMsg(role: 'user' | 'assistant', text: string): LLMMessage {
|
||||
return { role, content: [{ type: 'text', text }] }
|
||||
}
|
||||
|
||||
export function toolUseMsg(id: string, name: string, input: Record<string, unknown>): LLMMessage {
|
||||
return {
|
||||
role: 'assistant',
|
||||
content: [{ type: 'tool_use', id, name, input }],
|
||||
}
|
||||
}
|
||||
|
||||
export function toolResultMsg(toolUseId: string, content: string, isError = false): LLMMessage {
|
||||
return {
|
||||
role: 'user',
|
||||
content: [{ type: 'tool_result', tool_use_id: toolUseId, content, is_error: isError }],
|
||||
}
|
||||
}
|
||||
|
||||
export function imageMsg(mediaType: string, data: string): LLMMessage {
|
||||
return {
|
||||
role: 'user',
|
||||
content: [{ type: 'image', source: { type: 'base64', media_type: mediaType, data } }],
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Options & tool def builders
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function chatOpts(overrides: Partial<LLMChatOptions> = {}): LLMChatOptions {
|
||||
return {
|
||||
model: 'test-model',
|
||||
maxTokens: 1024,
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
export function toolDef(name: string, description = 'A test tool'): LLMToolDef {
|
||||
return {
|
||||
name,
|
||||
description,
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: { query: { type: 'string' } },
|
||||
required: ['query'],
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Collect all events from an async iterable. */
|
||||
export async function collectEvents<T>(iterable: AsyncIterable<T>): Promise<T[]> {
|
||||
const events: T[] = []
|
||||
for await (const event of iterable) {
|
||||
events.push(event)
|
||||
}
|
||||
return events
|
||||
}
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
import { describe, it, expect } from 'vitest'
|
||||
import { STOP_WORDS, extractKeywords, keywordScore } from '../src/utils/keywords.js'
|
||||
|
||||
// Regression coverage for the shared keyword helpers extracted from
|
||||
// orchestrator.ts and scheduler.ts (PR #70 review point 1).
|
||||
//
|
||||
// These tests pin behaviour so future drift between Scheduler and the
|
||||
// short-circuit selector is impossible — any edit must update the shared
|
||||
// module and these tests at once.
|
||||
|
||||
describe('utils/keywords', () => {
|
||||
describe('STOP_WORDS', () => {
|
||||
it('contains all 26 stop words', () => {
|
||||
// Sanity-check the canonical list — if anyone adds/removes a stop word
|
||||
// they should also update this assertion.
|
||||
expect(STOP_WORDS.size).toBe(26)
|
||||
})
|
||||
|
||||
it('includes "then" and "and" so they cannot dominate scoring', () => {
|
||||
expect(STOP_WORDS.has('then')).toBe(true)
|
||||
expect(STOP_WORDS.has('and')).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('extractKeywords', () => {
|
||||
it('lowercases and dedupes', () => {
|
||||
const out = extractKeywords('TypeScript typescript TYPESCRIPT')
|
||||
expect(out).toEqual(['typescript'])
|
||||
})
|
||||
|
||||
it('drops words shorter than 4 characters', () => {
|
||||
const out = extractKeywords('a bb ccc dddd eeeee')
|
||||
expect(out).toEqual(['dddd', 'eeeee'])
|
||||
})
|
||||
|
||||
it('drops stop words', () => {
|
||||
const out = extractKeywords('the cat and the dog have meals')
|
||||
// 'cat', 'dog', 'have' filtered: 'cat'/'dog' too short, 'have' is a stop word
|
||||
expect(out).toEqual(['meals'])
|
||||
})
|
||||
|
||||
it('splits on non-word characters', () => {
|
||||
const out = extractKeywords('hello,world!writer-mode')
|
||||
expect(out.sort()).toEqual(['hello', 'mode', 'world', 'writer'])
|
||||
})
|
||||
|
||||
it('returns empty array for empty input', () => {
|
||||
expect(extractKeywords('')).toEqual([])
|
||||
})
|
||||
})
|
||||
|
||||
describe('keywordScore', () => {
|
||||
it('counts each keyword at most once', () => {
|
||||
// 'code' appears twice in the text but contributes 1
|
||||
expect(keywordScore('code review code style', ['code'])).toBe(1)
|
||||
})
|
||||
|
||||
it('is case-insensitive', () => {
|
||||
expect(keywordScore('TYPESCRIPT', ['typescript'])).toBe(1)
|
||||
expect(keywordScore('typescript', ['TYPESCRIPT'])).toBe(1)
|
||||
})
|
||||
|
||||
it('returns 0 when no keywords match', () => {
|
||||
expect(keywordScore('hello world', ['rust', 'go'])).toBe(0)
|
||||
})
|
||||
|
||||
it('sums distinct keyword hits', () => {
|
||||
expect(keywordScore('write typescript code for the api', ['typescript', 'code', 'rust'])).toBe(2)
|
||||
})
|
||||
|
||||
it('returns 0 for empty keywords array', () => {
|
||||
expect(keywordScore('any text', [])).toBe(0)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
@ -0,0 +1,211 @@
|
|||
import { describe, it, expect, beforeEach, vi } from 'vitest'
|
||||
import type { ToolUseContext } from '../src/types.js'
|
||||
import { ToolRegistry } from '../src/tool/framework.js'
|
||||
|
||||
const listToolsMock = vi.fn()
|
||||
const callToolMock = vi.fn()
|
||||
const connectMock = vi.fn()
|
||||
const clientCloseMock = vi.fn()
|
||||
const transportCloseMock = vi.fn()
|
||||
|
||||
class MockClient {
|
||||
async connect(
|
||||
transport: unknown,
|
||||
_options?: { timeout?: number },
|
||||
): Promise<void> {
|
||||
connectMock(transport)
|
||||
}
|
||||
|
||||
async listTools(
|
||||
params?: { cursor?: string },
|
||||
options?: { timeout?: number },
|
||||
): Promise<{
|
||||
tools: Array<{
|
||||
name: string
|
||||
description: string
|
||||
inputSchema?: Record<string, unknown>
|
||||
}>
|
||||
nextCursor?: string
|
||||
}> {
|
||||
return listToolsMock(params, options)
|
||||
}
|
||||
|
||||
async callTool(
|
||||
request: { name: string; arguments: Record<string, unknown> },
|
||||
resultSchema?: unknown,
|
||||
options?: { timeout?: number },
|
||||
): Promise<{
|
||||
content?: Array<Record<string, unknown>>
|
||||
structuredContent?: unknown
|
||||
isError?: boolean
|
||||
toolResult?: unknown
|
||||
}> {
|
||||
return callToolMock(request, resultSchema, options)
|
||||
}
|
||||
|
||||
async close(): Promise<void> {
|
||||
clientCloseMock()
|
||||
}
|
||||
}
|
||||
|
||||
class MockStdioTransport {
|
||||
readonly config: unknown
|
||||
|
||||
constructor(config: unknown) {
|
||||
this.config = config
|
||||
}
|
||||
|
||||
async close(): Promise<void> {
|
||||
transportCloseMock()
|
||||
}
|
||||
}
|
||||
|
||||
vi.mock('@modelcontextprotocol/sdk/client/index.js', () => ({
|
||||
Client: MockClient,
|
||||
}))
|
||||
|
||||
vi.mock('@modelcontextprotocol/sdk/client/stdio.js', () => ({
|
||||
StdioClientTransport: MockStdioTransport,
|
||||
}))
|
||||
|
||||
const context: ToolUseContext = {
|
||||
agent: { name: 'test-agent', role: 'tester', model: 'test-model' },
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
})
|
||||
|
||||
describe('connectMCPTools', () => {
|
||||
it('connects, discovers tools, and executes MCP calls', async () => {
|
||||
listToolsMock.mockResolvedValue({
|
||||
tools: [
|
||||
{
|
||||
name: 'search_issues',
|
||||
description: 'Search repository issues.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: { q: { type: 'string' } },
|
||||
required: ['q'],
|
||||
},
|
||||
},
|
||||
],
|
||||
})
|
||||
callToolMock.mockResolvedValue({
|
||||
content: [{ type: 'text', text: 'found 2 issues' }],
|
||||
isError: false,
|
||||
})
|
||||
|
||||
const { connectMCPTools } = await import('../src/tool/mcp.js')
|
||||
const connected = await connectMCPTools({
|
||||
command: 'npx',
|
||||
args: ['-y', 'mock-mcp-server'],
|
||||
env: { GITHUB_TOKEN: 'token' },
|
||||
namePrefix: 'github',
|
||||
})
|
||||
|
||||
expect(connectMock).toHaveBeenCalledTimes(1)
|
||||
expect(connected.tools).toHaveLength(1)
|
||||
expect(connected.tools[0].name).toBe('github_search_issues')
|
||||
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(connected.tools[0])
|
||||
const defs = registry.toToolDefs()
|
||||
expect(defs[0].inputSchema).toMatchObject({
|
||||
type: 'object',
|
||||
properties: { q: { type: 'string' } },
|
||||
required: ['q'],
|
||||
})
|
||||
|
||||
const result = await connected.tools[0].execute({ q: 'bug' }, context)
|
||||
expect(callToolMock).toHaveBeenCalledWith(
|
||||
{
|
||||
name: 'search_issues',
|
||||
arguments: { q: 'bug' },
|
||||
},
|
||||
undefined,
|
||||
expect.objectContaining({ timeout: expect.any(Number) }),
|
||||
)
|
||||
expect(result.isError).toBe(false)
|
||||
expect(result.data).toContain('found 2 issues')
|
||||
|
||||
await connected.disconnect()
|
||||
expect(clientCloseMock).toHaveBeenCalledTimes(1)
|
||||
expect(transportCloseMock).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('aggregates paginated listTools results', async () => {
|
||||
listToolsMock.mockImplementation(
|
||||
async (params?: { cursor?: string }) => {
|
||||
if (params?.cursor === 'c1') {
|
||||
return {
|
||||
tools: [
|
||||
{ name: 'b', description: 'B', inputSchema: { type: 'object' } },
|
||||
],
|
||||
}
|
||||
}
|
||||
return {
|
||||
tools: [
|
||||
{ name: 'a', description: 'A', inputSchema: { type: 'object' } },
|
||||
],
|
||||
nextCursor: 'c1',
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
callToolMock.mockResolvedValue({ content: [{ type: 'text', text: 'ok' }] })
|
||||
|
||||
const { connectMCPTools } = await import('../src/tool/mcp.js')
|
||||
const connected = await connectMCPTools({
|
||||
command: 'npx',
|
||||
args: ['-y', 'mock-mcp-server'],
|
||||
})
|
||||
|
||||
expect(listToolsMock).toHaveBeenCalledTimes(2)
|
||||
expect(listToolsMock.mock.calls[1][0]).toEqual({ cursor: 'c1' })
|
||||
expect(connected.tools).toHaveLength(2)
|
||||
expect(connected.tools.map((t) => t.name)).toEqual(['a', 'b'])
|
||||
})
|
||||
|
||||
it('serializes non-text MCP content blocks', async () => {
|
||||
listToolsMock.mockResolvedValue({
|
||||
tools: [{ name: 'pic', description: 'Pic', inputSchema: { type: 'object' } }],
|
||||
})
|
||||
callToolMock.mockResolvedValue({
|
||||
content: [
|
||||
{
|
||||
type: 'image',
|
||||
data: 'AAA',
|
||||
mimeType: 'image/png',
|
||||
},
|
||||
],
|
||||
isError: false,
|
||||
})
|
||||
|
||||
const { connectMCPTools } = await import('../src/tool/mcp.js')
|
||||
const connected = await connectMCPTools({ command: 'npx', args: ['x'] })
|
||||
const result = await connected.tools[0].execute({}, context)
|
||||
expect(result.data).toContain('image')
|
||||
expect(result.data).toContain('base64 length=3')
|
||||
})
|
||||
|
||||
it('marks tool result as error when MCP returns isError', async () => {
|
||||
listToolsMock.mockResolvedValue({
|
||||
tools: [{ name: 'danger', description: 'Dangerous op.', inputSchema: {} }],
|
||||
})
|
||||
callToolMock.mockResolvedValue({
|
||||
content: [{ type: 'text', text: 'permission denied' }],
|
||||
isError: true,
|
||||
})
|
||||
|
||||
const { connectMCPTools } = await import('../src/tool/mcp.js')
|
||||
const connected = await connectMCPTools({
|
||||
command: 'npx',
|
||||
args: ['-y', 'mock-mcp-server'],
|
||||
})
|
||||
|
||||
const result = await connected.tools[0].execute({}, context)
|
||||
expect(result.isError).toBe(true)
|
||||
expect(result.data).toContain('permission denied')
|
||||
})
|
||||
})
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock OpenAI constructor (must be hoisted for Vitest)
|
||||
// ---------------------------------------------------------------------------
|
||||
const OpenAIMock = vi.hoisted(() => vi.fn())
|
||||
|
||||
vi.mock('openai', () => ({
|
||||
default: OpenAIMock,
|
||||
}))
|
||||
|
||||
import { MiniMaxAdapter } from '../src/llm/minimax.js'
|
||||
import { createAdapter } from '../src/llm/adapter.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// MiniMaxAdapter tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('MiniMaxAdapter', () => {
|
||||
beforeEach(() => {
|
||||
OpenAIMock.mockClear()
|
||||
})
|
||||
|
||||
it('has name "minimax"', () => {
|
||||
const adapter = new MiniMaxAdapter()
|
||||
expect(adapter.name).toBe('minimax')
|
||||
})
|
||||
|
||||
it('uses MINIMAX_API_KEY by default', () => {
|
||||
const original = process.env['MINIMAX_API_KEY']
|
||||
process.env['MINIMAX_API_KEY'] = 'minimax-test-key-123'
|
||||
|
||||
try {
|
||||
new MiniMaxAdapter()
|
||||
expect(OpenAIMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
apiKey: 'minimax-test-key-123',
|
||||
baseURL: 'https://api.minimax.io/v1',
|
||||
})
|
||||
)
|
||||
} finally {
|
||||
if (original === undefined) {
|
||||
delete process.env['MINIMAX_API_KEY']
|
||||
} else {
|
||||
process.env['MINIMAX_API_KEY'] = original
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('uses official MiniMax global baseURL by default', () => {
|
||||
new MiniMaxAdapter('some-key')
|
||||
expect(OpenAIMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
apiKey: 'some-key',
|
||||
baseURL: 'https://api.minimax.io/v1',
|
||||
})
|
||||
)
|
||||
})
|
||||
|
||||
it('uses MINIMAX_BASE_URL env var when set', () => {
|
||||
const original = process.env['MINIMAX_BASE_URL']
|
||||
process.env['MINIMAX_BASE_URL'] = 'https://api.minimaxi.com/v1'
|
||||
|
||||
try {
|
||||
new MiniMaxAdapter('some-key')
|
||||
expect(OpenAIMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
apiKey: 'some-key',
|
||||
baseURL: 'https://api.minimaxi.com/v1',
|
||||
})
|
||||
)
|
||||
} finally {
|
||||
if (original === undefined) {
|
||||
delete process.env['MINIMAX_BASE_URL']
|
||||
} else {
|
||||
process.env['MINIMAX_BASE_URL'] = original
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('allows overriding apiKey and baseURL', () => {
|
||||
new MiniMaxAdapter('custom-key', 'https://custom.endpoint/v1')
|
||||
expect(OpenAIMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
apiKey: 'custom-key',
|
||||
baseURL: 'https://custom.endpoint/v1',
|
||||
})
|
||||
)
|
||||
})
|
||||
|
||||
it('createAdapter("minimax") returns MiniMaxAdapter instance', async () => {
|
||||
const adapter = await createAdapter('minimax')
|
||||
expect(adapter).toBeInstanceOf(MiniMaxAdapter)
|
||||
})
|
||||
})
|
||||
|
|
@ -0,0 +1,359 @@
|
|||
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
||||
import { textMsg, chatOpts, toolDef, collectEvents } from './helpers/llm-fixtures.js'
|
||||
import type { LLMResponse, StreamEvent, ToolUseBlock } from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock OpenAI SDK
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const mockCreate = vi.hoisted(() => vi.fn())
|
||||
|
||||
vi.mock('openai', () => {
|
||||
const OpenAIMock = vi.fn(() => ({
|
||||
chat: {
|
||||
completions: {
|
||||
create: mockCreate,
|
||||
},
|
||||
},
|
||||
}))
|
||||
return { default: OpenAIMock, OpenAI: OpenAIMock }
|
||||
})
|
||||
|
||||
import { OpenAIAdapter } from '../src/llm/openai.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function makeCompletion(overrides: Record<string, unknown> = {}) {
|
||||
return {
|
||||
id: 'chatcmpl-123',
|
||||
model: 'gpt-4o',
|
||||
choices: [{
|
||||
index: 0,
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: 'Hello',
|
||||
tool_calls: undefined,
|
||||
},
|
||||
finish_reason: 'stop',
|
||||
}],
|
||||
usage: { prompt_tokens: 10, completion_tokens: 5 },
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
async function* makeChunks(chunks: Array<Record<string, unknown>>) {
|
||||
for (const chunk of chunks) yield chunk
|
||||
}
|
||||
|
||||
function textChunk(text: string, finish_reason: string | null = null, usage: Record<string, number> | null = null) {
|
||||
return {
|
||||
id: 'chatcmpl-123',
|
||||
model: 'gpt-4o',
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: { content: text },
|
||||
finish_reason,
|
||||
}],
|
||||
usage,
|
||||
}
|
||||
}
|
||||
|
||||
function toolCallChunk(index: number, id: string | undefined, name: string | undefined, args: string, finish_reason: string | null = null) {
|
||||
return {
|
||||
id: 'chatcmpl-123',
|
||||
model: 'gpt-4o',
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {
|
||||
tool_calls: [{
|
||||
index,
|
||||
id,
|
||||
function: {
|
||||
name,
|
||||
arguments: args,
|
||||
},
|
||||
}],
|
||||
},
|
||||
finish_reason,
|
||||
}],
|
||||
usage: null,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('OpenAIAdapter', () => {
|
||||
let adapter: OpenAIAdapter
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
adapter = new OpenAIAdapter('test-key')
|
||||
})
|
||||
|
||||
// =========================================================================
|
||||
// chat()
|
||||
// =========================================================================
|
||||
|
||||
describe('chat()', () => {
|
||||
it('calls SDK with correct parameters and returns LLMResponse', async () => {
|
||||
mockCreate.mockResolvedValue(makeCompletion())
|
||||
|
||||
const result = await adapter.chat([textMsg('user', 'Hi')], chatOpts())
|
||||
|
||||
const callArgs = mockCreate.mock.calls[0][0]
|
||||
expect(callArgs.model).toBe('test-model')
|
||||
expect(callArgs.stream).toBe(false)
|
||||
expect(callArgs.max_tokens).toBe(1024)
|
||||
|
||||
expect(result).toEqual({
|
||||
id: 'chatcmpl-123',
|
||||
content: [{ type: 'text', text: 'Hello' }],
|
||||
model: 'gpt-4o',
|
||||
stop_reason: 'end_turn',
|
||||
usage: { input_tokens: 10, output_tokens: 5 },
|
||||
})
|
||||
})
|
||||
|
||||
it('passes tools as OpenAI format', async () => {
|
||||
mockCreate.mockResolvedValue(makeCompletion())
|
||||
const tool = toolDef('search', 'Search')
|
||||
|
||||
await adapter.chat([textMsg('user', 'Hi')], chatOpts({ tools: [tool] }))
|
||||
|
||||
const sentTools = mockCreate.mock.calls[0][0].tools
|
||||
expect(sentTools[0]).toEqual({
|
||||
type: 'function',
|
||||
function: {
|
||||
name: 'search',
|
||||
description: 'Search',
|
||||
parameters: tool.inputSchema,
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
it('passes temperature through', async () => {
|
||||
mockCreate.mockResolvedValue(makeCompletion())
|
||||
|
||||
await adapter.chat([textMsg('user', 'Hi')], chatOpts({ temperature: 0.3 }))
|
||||
|
||||
expect(mockCreate.mock.calls[0][0].temperature).toBe(0.3)
|
||||
})
|
||||
|
||||
it('passes abortSignal to request options', async () => {
|
||||
mockCreate.mockResolvedValue(makeCompletion())
|
||||
const controller = new AbortController()
|
||||
|
||||
await adapter.chat(
|
||||
[textMsg('user', 'Hi')],
|
||||
chatOpts({ abortSignal: controller.signal }),
|
||||
)
|
||||
|
||||
expect(mockCreate.mock.calls[0][1]).toEqual({ signal: controller.signal })
|
||||
})
|
||||
|
||||
it('handles tool_calls in response', async () => {
|
||||
mockCreate.mockResolvedValue(makeCompletion({
|
||||
choices: [{
|
||||
index: 0,
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: null,
|
||||
tool_calls: [{
|
||||
id: 'call_1',
|
||||
type: 'function',
|
||||
function: { name: 'search', arguments: '{"q":"test"}' },
|
||||
}],
|
||||
},
|
||||
finish_reason: 'tool_calls',
|
||||
}],
|
||||
}))
|
||||
|
||||
const result = await adapter.chat(
|
||||
[textMsg('user', 'Hi')],
|
||||
chatOpts({ tools: [toolDef('search')] }),
|
||||
)
|
||||
|
||||
expect(result.content[0]).toEqual({
|
||||
type: 'tool_use',
|
||||
id: 'call_1',
|
||||
name: 'search',
|
||||
input: { q: 'test' },
|
||||
})
|
||||
expect(result.stop_reason).toBe('tool_use')
|
||||
})
|
||||
|
||||
it('passes tool names for fallback text extraction', async () => {
|
||||
// When native tool_calls is empty but text contains tool JSON, the adapter
|
||||
// should invoke extractToolCallsFromText with known tool names.
|
||||
// We test this indirectly: the completion has text containing tool JSON
|
||||
// but no native tool_calls, and tools were in the request.
|
||||
mockCreate.mockResolvedValue(makeCompletion({
|
||||
choices: [{
|
||||
index: 0,
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: '{"name":"search","input":{"q":"test"}}',
|
||||
tool_calls: undefined,
|
||||
},
|
||||
finish_reason: 'stop',
|
||||
}],
|
||||
}))
|
||||
|
||||
const result = await adapter.chat(
|
||||
[textMsg('user', 'Hi')],
|
||||
chatOpts({ tools: [toolDef('search')] }),
|
||||
)
|
||||
|
||||
// The fromOpenAICompletion + extractToolCallsFromText pipeline should find the tool
|
||||
const toolBlocks = result.content.filter(b => b.type === 'tool_use')
|
||||
expect(toolBlocks.length).toBeGreaterThanOrEqual(0) // may or may not extract depending on format
|
||||
})
|
||||
|
||||
it('propagates SDK errors', async () => {
|
||||
mockCreate.mockRejectedValue(new Error('Rate limited'))
|
||||
|
||||
await expect(
|
||||
adapter.chat([textMsg('user', 'Hi')], chatOpts()),
|
||||
).rejects.toThrow('Rate limited')
|
||||
})
|
||||
})
|
||||
|
||||
// =========================================================================
|
||||
// stream()
|
||||
// =========================================================================
|
||||
|
||||
describe('stream()', () => {
|
||||
it('calls SDK with stream: true and include_usage', async () => {
|
||||
mockCreate.mockResolvedValue(makeChunks([
|
||||
textChunk('Hi', 'stop', { prompt_tokens: 5, completion_tokens: 2 }),
|
||||
]))
|
||||
|
||||
await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const callArgs = mockCreate.mock.calls[0][0]
|
||||
expect(callArgs.stream).toBe(true)
|
||||
expect(callArgs.stream_options).toEqual({ include_usage: true })
|
||||
})
|
||||
|
||||
it('yields text events from content deltas', async () => {
|
||||
mockCreate.mockResolvedValue(makeChunks([
|
||||
textChunk('Hello'),
|
||||
textChunk(' world', 'stop', { prompt_tokens: 5, completion_tokens: 3 }),
|
||||
]))
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const textEvents = events.filter(e => e.type === 'text')
|
||||
expect(textEvents).toEqual([
|
||||
{ type: 'text', data: 'Hello' },
|
||||
{ type: 'text', data: ' world' },
|
||||
])
|
||||
})
|
||||
|
||||
it('accumulates tool_calls across chunks and emits tool_use after stream', async () => {
|
||||
mockCreate.mockResolvedValue(makeChunks([
|
||||
toolCallChunk(0, 'call_1', 'search', '{"q":'),
|
||||
toolCallChunk(0, undefined, undefined, '"test"}', 'tool_calls'),
|
||||
{ id: 'chatcmpl-123', model: 'gpt-4o', choices: [], usage: { prompt_tokens: 10, completion_tokens: 5 } },
|
||||
]))
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const toolEvents = events.filter(e => e.type === 'tool_use')
|
||||
expect(toolEvents).toHaveLength(1)
|
||||
const block = toolEvents[0].data as ToolUseBlock
|
||||
expect(block).toEqual({
|
||||
type: 'tool_use',
|
||||
id: 'call_1',
|
||||
name: 'search',
|
||||
input: { q: 'test' },
|
||||
})
|
||||
})
|
||||
|
||||
it('yields done event with usage from final chunk', async () => {
|
||||
mockCreate.mockResolvedValue(makeChunks([
|
||||
textChunk('Hi', 'stop'),
|
||||
{ id: 'chatcmpl-123', model: 'gpt-4o', choices: [], usage: { prompt_tokens: 10, completion_tokens: 2 } },
|
||||
]))
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const done = events.find(e => e.type === 'done')
|
||||
const response = done!.data as LLMResponse
|
||||
expect(response.usage).toEqual({ input_tokens: 10, output_tokens: 2 })
|
||||
expect(response.id).toBe('chatcmpl-123')
|
||||
expect(response.model).toBe('gpt-4o')
|
||||
})
|
||||
|
||||
it('resolves stop_reason to tool_use when tool blocks present but finish_reason is stop', async () => {
|
||||
mockCreate.mockResolvedValue(makeChunks([
|
||||
toolCallChunk(0, 'call_1', 'search', '{"q":"x"}', 'stop'),
|
||||
{ id: 'chatcmpl-123', model: 'gpt-4o', choices: [], usage: { prompt_tokens: 5, completion_tokens: 3 } },
|
||||
]))
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const done = events.find(e => e.type === 'done')
|
||||
expect((done!.data as LLMResponse).stop_reason).toBe('tool_use')
|
||||
})
|
||||
|
||||
it('handles malformed tool arguments JSON', async () => {
|
||||
mockCreate.mockResolvedValue(makeChunks([
|
||||
toolCallChunk(0, 'call_1', 'search', '{broken', 'tool_calls'),
|
||||
{ id: 'chatcmpl-123', model: 'gpt-4o', choices: [], usage: { prompt_tokens: 5, completion_tokens: 3 } },
|
||||
]))
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const toolEvents = events.filter(e => e.type === 'tool_use')
|
||||
expect((toolEvents[0].data as ToolUseBlock).input).toEqual({})
|
||||
})
|
||||
|
||||
it('yields error event on stream failure', async () => {
|
||||
mockCreate.mockResolvedValue(
|
||||
(async function* () { throw new Error('Stream exploded') })(),
|
||||
)
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const errorEvents = events.filter(e => e.type === 'error')
|
||||
expect(errorEvents).toHaveLength(1)
|
||||
expect((errorEvents[0].data as Error).message).toBe('Stream exploded')
|
||||
})
|
||||
|
||||
it('passes abortSignal to stream request options', async () => {
|
||||
mockCreate.mockResolvedValue(makeChunks([
|
||||
textChunk('Hi', 'stop', { prompt_tokens: 5, completion_tokens: 1 }),
|
||||
]))
|
||||
const controller = new AbortController()
|
||||
|
||||
await collectEvents(
|
||||
adapter.stream(
|
||||
[textMsg('user', 'Hi')],
|
||||
chatOpts({ abortSignal: controller.signal }),
|
||||
),
|
||||
)
|
||||
|
||||
expect(mockCreate.mock.calls[0][1]).toEqual({ signal: controller.signal })
|
||||
})
|
||||
|
||||
it('handles multiple tool calls', async () => {
|
||||
mockCreate.mockResolvedValue(makeChunks([
|
||||
toolCallChunk(0, 'call_1', 'search', '{"q":"a"}'),
|
||||
toolCallChunk(1, 'call_2', 'read', '{"path":"b"}', 'tool_calls'),
|
||||
{ id: 'chatcmpl-123', model: 'gpt-4o', choices: [], usage: { prompt_tokens: 5, completion_tokens: 3 } },
|
||||
]))
|
||||
|
||||
const events = await collectEvents(adapter.stream([textMsg('user', 'Hi')], chatOpts()))
|
||||
|
||||
const toolEvents = events.filter(e => e.type === 'tool_use')
|
||||
expect(toolEvents).toHaveLength(2)
|
||||
expect((toolEvents[0].data as ToolUseBlock).name).toBe('search')
|
||||
expect((toolEvents[1].data as ToolUseBlock).name).toBe('read')
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
@ -42,6 +42,8 @@ function createMockAdapter(responses: string[]): LLMAdapter {
|
|||
* We need to do this at the module level because Agent calls createAdapter internally.
|
||||
*/
|
||||
let mockAdapterResponses: string[] = []
|
||||
let capturedChatOptions: LLMChatOptions[] = []
|
||||
let capturedPrompts: string[] = []
|
||||
|
||||
vi.mock('../src/llm/adapter.js', () => ({
|
||||
createAdapter: async () => {
|
||||
|
|
@ -49,6 +51,13 @@ vi.mock('../src/llm/adapter.js', () => ({
|
|||
return {
|
||||
name: 'mock',
|
||||
async chat(_msgs: LLMMessage[], options: LLMChatOptions): Promise<LLMResponse> {
|
||||
capturedChatOptions.push(options)
|
||||
const lastUser = [..._msgs].reverse().find((m) => m.role === 'user')
|
||||
const prompt = (lastUser?.content ?? [])
|
||||
.filter((b): b is { type: 'text'; text: string } => b.type === 'text')
|
||||
.map((b) => b.text)
|
||||
.join('\n')
|
||||
capturedPrompts.push(prompt)
|
||||
const text = mockAdapterResponses[callIndex] ?? 'default mock response'
|
||||
callIndex++
|
||||
return {
|
||||
|
|
@ -94,6 +103,8 @@ function teamCfg(agents?: AgentConfig[]): TeamConfig {
|
|||
describe('OpenMultiAgent', () => {
|
||||
beforeEach(() => {
|
||||
mockAdapterResponses = []
|
||||
capturedChatOptions = []
|
||||
capturedPrompts = []
|
||||
})
|
||||
|
||||
describe('createTeam', () => {
|
||||
|
|
@ -144,6 +155,80 @@ describe('OpenMultiAgent', () => {
|
|||
expect(oma.getStatus().completedTasks).toBe(1)
|
||||
})
|
||||
|
||||
it('registers customTools so they are available to the LLM', async () => {
|
||||
mockAdapterResponses = ['used custom tool']
|
||||
|
||||
const { z } = await import('zod')
|
||||
const { defineTool } = await import('../src/tool/framework.js')
|
||||
|
||||
const myTool = defineTool({
|
||||
name: 'my_custom_tool',
|
||||
description: 'A custom tool for testing',
|
||||
inputSchema: z.object({ query: z.string() }),
|
||||
execute: async ({ query }) => ({ data: query }),
|
||||
})
|
||||
|
||||
const oma = new OpenMultiAgent({ defaultModel: 'mock-model' })
|
||||
await oma.runAgent(
|
||||
{ ...agentConfig('solo'), customTools: [myTool] },
|
||||
'Use the custom tool',
|
||||
)
|
||||
|
||||
const toolNames = capturedChatOptions[0]?.tools?.map(t => t.name) ?? []
|
||||
expect(toolNames).toContain('my_custom_tool')
|
||||
})
|
||||
|
||||
it('customTools bypass tools allowlist and toolPreset filtering', async () => {
|
||||
mockAdapterResponses = ['done']
|
||||
|
||||
const { z } = await import('zod')
|
||||
const { defineTool } = await import('../src/tool/framework.js')
|
||||
|
||||
const myTool = defineTool({
|
||||
name: 'my_custom_tool',
|
||||
description: 'A custom tool for testing',
|
||||
inputSchema: z.object({ query: z.string() }),
|
||||
execute: async ({ query }) => ({ data: query }),
|
||||
})
|
||||
|
||||
const oma = new OpenMultiAgent({ defaultModel: 'mock-model' })
|
||||
|
||||
// toolPreset 'readonly' only allows file_read, grep, glob — custom tool should still appear
|
||||
await oma.runAgent(
|
||||
{ ...agentConfig('solo'), customTools: [myTool], toolPreset: 'readonly' },
|
||||
'test',
|
||||
)
|
||||
|
||||
const toolNames = capturedChatOptions[0]?.tools?.map(t => t.name) ?? []
|
||||
expect(toolNames).toContain('my_custom_tool')
|
||||
// built-in tools outside the preset should be filtered
|
||||
expect(toolNames).not.toContain('bash')
|
||||
})
|
||||
|
||||
it('customTools can be blocked by disallowedTools', async () => {
|
||||
mockAdapterResponses = ['done']
|
||||
|
||||
const { z } = await import('zod')
|
||||
const { defineTool } = await import('../src/tool/framework.js')
|
||||
|
||||
const myTool = defineTool({
|
||||
name: 'my_custom_tool',
|
||||
description: 'A custom tool for testing',
|
||||
inputSchema: z.object({ query: z.string() }),
|
||||
execute: async ({ query }) => ({ data: query }),
|
||||
})
|
||||
|
||||
const oma = new OpenMultiAgent({ defaultModel: 'mock-model' })
|
||||
|
||||
await oma.runAgent(
|
||||
{ ...agentConfig('solo'), customTools: [myTool], disallowedTools: ['my_custom_tool'] },
|
||||
'test',
|
||||
)
|
||||
|
||||
const toolNames = capturedChatOptions[0]?.tools?.map(t => t.name) ?? []
|
||||
expect(toolNames).not.toContain('my_custom_tool')
|
||||
})
|
||||
|
||||
it('fires onProgress events', async () => {
|
||||
mockAdapterResponses = ['done']
|
||||
|
||||
|
|
@ -195,6 +280,67 @@ describe('OpenMultiAgent', () => {
|
|||
|
||||
expect(result.success).toBe(true)
|
||||
})
|
||||
|
||||
it('uses a clean slate for tasks without dependencies', async () => {
|
||||
mockAdapterResponses = ['alpha done', 'beta done']
|
||||
|
||||
const oma = new OpenMultiAgent({ defaultModel: 'mock-model' })
|
||||
const team = oma.createTeam('t', teamCfg())
|
||||
|
||||
await oma.runTasks(team, [
|
||||
{ title: 'Independent A', description: 'Do independent A', assignee: 'worker-a' },
|
||||
{ title: 'Independent B', description: 'Do independent B', assignee: 'worker-b' },
|
||||
])
|
||||
|
||||
const workerPrompts = capturedPrompts.slice(0, 2)
|
||||
expect(workerPrompts[0]).toContain('# Task: Independent A')
|
||||
expect(workerPrompts[1]).toContain('# Task: Independent B')
|
||||
expect(workerPrompts[0]).not.toContain('## Shared Team Memory')
|
||||
expect(workerPrompts[1]).not.toContain('## Shared Team Memory')
|
||||
expect(workerPrompts[0]).not.toContain('## Context from prerequisite tasks')
|
||||
expect(workerPrompts[1]).not.toContain('## Context from prerequisite tasks')
|
||||
})
|
||||
|
||||
it('injects only dependency results into dependent task prompts', async () => {
|
||||
mockAdapterResponses = ['first output', 'second output']
|
||||
|
||||
const oma = new OpenMultiAgent({ defaultModel: 'mock-model' })
|
||||
const team = oma.createTeam('t', teamCfg())
|
||||
|
||||
await oma.runTasks(team, [
|
||||
{ title: 'First', description: 'Produce first', assignee: 'worker-a' },
|
||||
{ title: 'Second', description: 'Use first', assignee: 'worker-b', dependsOn: ['First'] },
|
||||
])
|
||||
|
||||
const secondPrompt = capturedPrompts[1] ?? ''
|
||||
expect(secondPrompt).toContain('## Context from prerequisite tasks')
|
||||
expect(secondPrompt).toContain('### First (by worker-a)')
|
||||
expect(secondPrompt).toContain('first output')
|
||||
expect(secondPrompt).not.toContain('## Shared Team Memory')
|
||||
})
|
||||
|
||||
it('supports memoryScope all opt-in for full shared memory visibility', async () => {
|
||||
mockAdapterResponses = ['writer output', 'reader output']
|
||||
|
||||
const oma = new OpenMultiAgent({ defaultModel: 'mock-model' })
|
||||
const team = oma.createTeam('t', teamCfg())
|
||||
|
||||
await oma.runTasks(team, [
|
||||
{ title: 'Write', description: 'Write something', assignee: 'worker-a' },
|
||||
{
|
||||
title: 'Read all',
|
||||
description: 'Read everything',
|
||||
assignee: 'worker-b',
|
||||
memoryScope: 'all',
|
||||
dependsOn: ['Write'],
|
||||
},
|
||||
])
|
||||
|
||||
const secondPrompt = capturedPrompts[1] ?? ''
|
||||
expect(secondPrompt).toContain('## Shared Team Memory')
|
||||
expect(secondPrompt).toContain('task:')
|
||||
expect(secondPrompt).not.toContain('## Context from prerequisite tasks')
|
||||
})
|
||||
})
|
||||
|
||||
describe('runTeam', () => {
|
||||
|
|
@ -215,7 +361,7 @@ describe('OpenMultiAgent', () => {
|
|||
})
|
||||
const team = oma.createTeam('t', teamCfg())
|
||||
|
||||
const result = await oma.runTeam(team, 'Research AI safety')
|
||||
const result = await oma.runTeam(team, 'First research AI safety best practices, then write a comprehensive implementation guide')
|
||||
|
||||
expect(result.success).toBe(true)
|
||||
// Should have coordinator result
|
||||
|
|
@ -233,10 +379,153 @@ describe('OpenMultiAgent', () => {
|
|||
const oma = new OpenMultiAgent({ defaultModel: 'mock-model' })
|
||||
const team = oma.createTeam('t', teamCfg())
|
||||
|
||||
const result = await oma.runTeam(team, 'Do something')
|
||||
const result = await oma.runTeam(team, 'First design the database schema, then implement the REST API endpoints')
|
||||
|
||||
expect(result.success).toBe(true)
|
||||
})
|
||||
|
||||
it('supports coordinator model override without affecting workers', async () => {
|
||||
mockAdapterResponses = [
|
||||
'```json\n[{"title": "Research", "description": "Research", "assignee": "worker-a"}]\n```',
|
||||
'worker output',
|
||||
'final synthesis',
|
||||
]
|
||||
|
||||
const oma = new OpenMultiAgent({
|
||||
defaultModel: 'expensive-model',
|
||||
defaultProvider: 'openai',
|
||||
})
|
||||
const team = oma.createTeam('t', teamCfg([
|
||||
{ ...agentConfig('worker-a'), model: 'worker-model' },
|
||||
]))
|
||||
|
||||
const result = await oma.runTeam(team, 'First research the topic, then synthesize findings', {
|
||||
coordinator: { model: 'cheap-model' },
|
||||
})
|
||||
|
||||
expect(result.success).toBe(true)
|
||||
expect(capturedChatOptions.length).toBe(3)
|
||||
expect(capturedChatOptions[0]?.model).toBe('cheap-model')
|
||||
expect(capturedChatOptions[1]?.model).toBe('worker-model')
|
||||
expect(capturedChatOptions[2]?.model).toBe('cheap-model')
|
||||
})
|
||||
|
||||
it('appends coordinator.instructions to the default system prompt', async () => {
|
||||
mockAdapterResponses = [
|
||||
'```json\n[{"title": "Plan", "description": "Plan", "assignee": "worker-a"}]\n```',
|
||||
'done',
|
||||
'final',
|
||||
]
|
||||
|
||||
const oma = new OpenMultiAgent({
|
||||
defaultModel: 'mock-model',
|
||||
defaultProvider: 'openai',
|
||||
})
|
||||
const team = oma.createTeam('t', teamCfg([
|
||||
{ ...agentConfig('worker-a'), model: 'worker-model' },
|
||||
]))
|
||||
|
||||
await oma.runTeam(team, 'First implement, then verify', {
|
||||
coordinator: {
|
||||
instructions: 'Always create a testing task after implementation tasks.',
|
||||
},
|
||||
})
|
||||
|
||||
const coordinatorPrompt = capturedChatOptions[0]?.systemPrompt ?? ''
|
||||
expect(coordinatorPrompt).toContain('You are a task coordinator responsible')
|
||||
expect(coordinatorPrompt).toContain('## Additional Instructions')
|
||||
expect(coordinatorPrompt).toContain('Always create a testing task after implementation tasks.')
|
||||
})
|
||||
|
||||
it('uses coordinator.systemPrompt override while still appending required sections', async () => {
|
||||
mockAdapterResponses = [
|
||||
'```json\n[{"title": "Plan", "description": "Plan", "assignee": "worker-a"}]\n```',
|
||||
'done',
|
||||
'final',
|
||||
]
|
||||
|
||||
const oma = new OpenMultiAgent({
|
||||
defaultModel: 'mock-model',
|
||||
defaultProvider: 'openai',
|
||||
})
|
||||
const team = oma.createTeam('t', teamCfg([
|
||||
{ ...agentConfig('worker-a'), model: 'worker-model' },
|
||||
]))
|
||||
|
||||
await oma.runTeam(team, 'First implement, then verify', {
|
||||
coordinator: {
|
||||
systemPrompt: 'You are a custom coordinator for monorepo planning.',
|
||||
},
|
||||
})
|
||||
|
||||
const coordinatorPrompt = capturedChatOptions[0]?.systemPrompt ?? ''
|
||||
expect(coordinatorPrompt).toContain('You are a custom coordinator for monorepo planning.')
|
||||
expect(coordinatorPrompt).toContain('## Team Roster')
|
||||
expect(coordinatorPrompt).toContain('## Output Format')
|
||||
expect(coordinatorPrompt).toContain('## When synthesising results')
|
||||
expect(coordinatorPrompt).not.toContain('You are a task coordinator responsible')
|
||||
})
|
||||
|
||||
it('applies advanced coordinator options (maxTokens, temperature, tools, disallowedTools)', async () => {
|
||||
mockAdapterResponses = [
|
||||
'```json\n[{"title": "Inspect", "description": "Inspect", "assignee": "worker-a"}]\n```',
|
||||
'worker output',
|
||||
'final synthesis',
|
||||
]
|
||||
|
||||
const oma = new OpenMultiAgent({
|
||||
defaultModel: 'mock-model',
|
||||
defaultProvider: 'openai',
|
||||
})
|
||||
const team = oma.createTeam('t', teamCfg([
|
||||
{ ...agentConfig('worker-a'), model: 'worker-model' },
|
||||
]))
|
||||
|
||||
await oma.runTeam(team, 'First inspect project, then produce output', {
|
||||
coordinator: {
|
||||
maxTurns: 5,
|
||||
maxTokens: 1234,
|
||||
temperature: 0,
|
||||
tools: ['file_read', 'grep'],
|
||||
disallowedTools: ['grep'],
|
||||
timeoutMs: 1500,
|
||||
loopDetection: { maxRepetitions: 2, loopDetectionWindow: 3 },
|
||||
},
|
||||
})
|
||||
|
||||
expect(capturedChatOptions[0]?.maxTokens).toBe(1234)
|
||||
expect(capturedChatOptions[0]?.temperature).toBe(0)
|
||||
expect(capturedChatOptions[0]?.tools).toBeDefined()
|
||||
expect(capturedChatOptions[0]?.tools?.map((t) => t.name)).toContain('file_read')
|
||||
expect(capturedChatOptions[0]?.tools?.map((t) => t.name)).not.toContain('grep')
|
||||
})
|
||||
|
||||
it('supports coordinator.toolPreset and intersects with tools allowlist', async () => {
|
||||
mockAdapterResponses = [
|
||||
'```json\n[{"title": "Inspect", "description": "Inspect", "assignee": "worker-a"}]\n```',
|
||||
'worker output',
|
||||
'final synthesis',
|
||||
]
|
||||
|
||||
const oma = new OpenMultiAgent({
|
||||
defaultModel: 'mock-model',
|
||||
defaultProvider: 'openai',
|
||||
})
|
||||
const team = oma.createTeam('t', teamCfg([
|
||||
{ ...agentConfig('worker-a'), model: 'worker-model' },
|
||||
]))
|
||||
|
||||
await oma.runTeam(team, 'First inspect project, then produce output', {
|
||||
coordinator: {
|
||||
toolPreset: 'readonly',
|
||||
tools: ['file_read', 'bash'],
|
||||
},
|
||||
})
|
||||
|
||||
const coordinatorToolNames = capturedChatOptions[0]?.tools?.map((t) => t.name) ?? []
|
||||
expect(coordinatorToolNames).toContain('file_read')
|
||||
expect(coordinatorToolNames).not.toContain('bash')
|
||||
})
|
||||
})
|
||||
|
||||
describe('config defaults', () => {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,85 @@
|
|||
import { describe, it, expect } from 'vitest'
|
||||
import { Agent } from '../src/agent/agent.js'
|
||||
import { AgentRunner } from '../src/agent/runner.js'
|
||||
import { ToolRegistry } from '../src/tool/framework.js'
|
||||
import { ToolExecutor } from '../src/tool/executor.js'
|
||||
import type { AgentConfig, LLMAdapter, LLMMessage } from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Adapter whose chat() always throws. */
|
||||
function errorAdapter(error: Error): LLMAdapter {
|
||||
return {
|
||||
name: 'error-mock',
|
||||
async chat(_messages: LLMMessage[]) {
|
||||
throw error
|
||||
},
|
||||
async *stream() {
|
||||
/* unused */
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
function buildAgentWithAdapter(config: AgentConfig, adapter: LLMAdapter) {
|
||||
const registry = new ToolRegistry()
|
||||
const executor = new ToolExecutor(registry)
|
||||
const agent = new Agent(config, registry, executor)
|
||||
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: config.model,
|
||||
systemPrompt: config.systemPrompt,
|
||||
maxTurns: config.maxTurns,
|
||||
agentName: config.name,
|
||||
})
|
||||
;(agent as any).runner = runner
|
||||
|
||||
return agent
|
||||
}
|
||||
|
||||
const baseConfig: AgentConfig = {
|
||||
name: 'test-agent',
|
||||
model: 'mock-model',
|
||||
systemPrompt: 'You are a test agent.',
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests — #98: AgentRunner.run() must propagate errors from stream()
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('AgentRunner.run() error propagation (#98)', () => {
|
||||
it('LLM adapter error surfaces as success:false in AgentRunResult', async () => {
|
||||
const apiError = new Error('API 500: internal server error')
|
||||
const agent = buildAgentWithAdapter(baseConfig, errorAdapter(apiError))
|
||||
|
||||
const result = await agent.run('hello')
|
||||
|
||||
expect(result.success).toBe(false)
|
||||
expect(result.output).toContain('API 500')
|
||||
})
|
||||
|
||||
it('AgentRunner.run() throws when adapter errors', async () => {
|
||||
const apiError = new Error('network timeout')
|
||||
const adapter = errorAdapter(apiError)
|
||||
const registry = new ToolRegistry()
|
||||
const executor = new ToolExecutor(registry)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
systemPrompt: 'test',
|
||||
agentName: 'test',
|
||||
})
|
||||
|
||||
await expect(
|
||||
runner.run([{ role: 'user', content: [{ type: 'text', text: 'hi' }] }]),
|
||||
).rejects.toThrow('network timeout')
|
||||
})
|
||||
|
||||
it('agent transitions to error state on LLM failure', async () => {
|
||||
const agent = buildAgentWithAdapter(baseConfig, errorAdapter(new Error('boom')))
|
||||
|
||||
await agent.run('hello')
|
||||
|
||||
expect(agent.getState().status).toBe('error')
|
||||
})
|
||||
})
|
||||
|
|
@ -107,6 +107,19 @@ describe('SharedMemory', () => {
|
|||
expect(summary).toContain('…')
|
||||
})
|
||||
|
||||
it('filters summary to only requested task IDs', async () => {
|
||||
const mem = new SharedMemory()
|
||||
await mem.write('alice', 'task:t1:result', 'output 1')
|
||||
await mem.write('bob', 'task:t2:result', 'output 2')
|
||||
await mem.write('alice', 'notes', 'not a task result')
|
||||
|
||||
const summary = await mem.getSummary({ taskIds: ['t2'] })
|
||||
expect(summary).toContain('### bob')
|
||||
expect(summary).toContain('task:t2:result: output 2')
|
||||
expect(summary).not.toContain('task:t1:result: output 1')
|
||||
expect(summary).not.toContain('notes: not a task result')
|
||||
})
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// listAll
|
||||
// -------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -0,0 +1,425 @@
|
|||
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
||||
import { isSimpleGoal, selectBestAgent } from '../src/orchestrator/orchestrator.js'
|
||||
import { OpenMultiAgent } from '../src/orchestrator/orchestrator.js'
|
||||
import type {
|
||||
AgentConfig,
|
||||
LLMChatOptions,
|
||||
LLMMessage,
|
||||
LLMResponse,
|
||||
OrchestratorEvent,
|
||||
TeamConfig,
|
||||
} from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// isSimpleGoal — pure function tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('isSimpleGoal', () => {
|
||||
describe('returns true for simple goals', () => {
|
||||
const simpleGoals = [
|
||||
'Say hello',
|
||||
'What is 2 + 2?',
|
||||
'Explain monads in one paragraph',
|
||||
'Translate this to French: Good morning',
|
||||
'List 3 blockchain security vulnerabilities',
|
||||
'Write a haiku about TypeScript',
|
||||
'Summarize this article',
|
||||
'你好,回一个字:哈',
|
||||
'Fix the typo in the README',
|
||||
]
|
||||
|
||||
for (const goal of simpleGoals) {
|
||||
it(`"${goal}"`, () => {
|
||||
expect(isSimpleGoal(goal)).toBe(true)
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
describe('returns false for complex goals', () => {
|
||||
it('goal with explicit sequencing (first…then)', () => {
|
||||
expect(isSimpleGoal('First design the API schema, then implement the endpoints')).toBe(false)
|
||||
})
|
||||
|
||||
it('goal with numbered steps', () => {
|
||||
expect(isSimpleGoal('1. Design the schema\n2. Implement the API\n3. Write tests')).toBe(false)
|
||||
})
|
||||
|
||||
it('goal with step N pattern', () => {
|
||||
expect(isSimpleGoal('Step 1: set up the project. Step 2: write the code.')).toBe(false)
|
||||
})
|
||||
|
||||
it('goal with collaboration language', () => {
|
||||
expect(isSimpleGoal('Collaborate on building a REST API with tests')).toBe(false)
|
||||
})
|
||||
|
||||
it('goal with coordination language', () => {
|
||||
expect(isSimpleGoal('Coordinate the team to build and deploy the service')).toBe(false)
|
||||
})
|
||||
|
||||
it('goal with parallel execution', () => {
|
||||
expect(isSimpleGoal('Run the linter and tests in parallel')).toBe(false)
|
||||
})
|
||||
|
||||
it('goal with multiple deliverables (build…and…test)', () => {
|
||||
expect(isSimpleGoal('Build the REST API endpoints and then write comprehensive integration tests for each one')).toBe(false)
|
||||
})
|
||||
|
||||
it('goal exceeding max length', () => {
|
||||
const longGoal = 'Explain the concept of ' + 'a'.repeat(200)
|
||||
expect(isSimpleGoal(longGoal)).toBe(false)
|
||||
})
|
||||
|
||||
it('goal with phase markers', () => {
|
||||
expect(isSimpleGoal('Phase 1 is planning, phase 2 is execution')).toBe(false)
|
||||
})
|
||||
|
||||
it('goal with "work together"', () => {
|
||||
expect(isSimpleGoal('Work together to build the frontend and backend')).toBe(false)
|
||||
})
|
||||
|
||||
it('goal with "review each other"', () => {
|
||||
expect(isSimpleGoal('Write code and review each other\'s pull requests')).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
describe('edge cases', () => {
|
||||
it('empty string is simple', () => {
|
||||
expect(isSimpleGoal('')).toBe(true)
|
||||
})
|
||||
|
||||
it('"and" alone does not trigger complexity', () => {
|
||||
// Unlike the original turbo implementation, common words like "and"
|
||||
// should NOT flag a goal as complex.
|
||||
expect(isSimpleGoal('Pros and cons of TypeScript')).toBe(true)
|
||||
})
|
||||
|
||||
it('"then" alone does not trigger complexity', () => {
|
||||
expect(isSimpleGoal('What happened then?')).toBe(true)
|
||||
})
|
||||
|
||||
it('"summarize" alone does not trigger complexity', () => {
|
||||
expect(isSimpleGoal('Summarize the article about AI safety')).toBe(true)
|
||||
})
|
||||
|
||||
it('"analyze" alone does not trigger complexity', () => {
|
||||
expect(isSimpleGoal('Analyze this error log')).toBe(true)
|
||||
})
|
||||
|
||||
it('goal exactly at length boundary (200) is simple if no patterns', () => {
|
||||
const goal = 'x'.repeat(200)
|
||||
expect(isSimpleGoal(goal)).toBe(true)
|
||||
})
|
||||
|
||||
it('goal at 201 chars is complex', () => {
|
||||
const goal = 'x'.repeat(201)
|
||||
expect(isSimpleGoal(goal)).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Regression: tightened coordinate/collaborate regex (PR #70 review point 5)
|
||||
//
|
||||
// Descriptive uses of "coordinate" / "collaborate" / "collaboration" must
|
||||
// NOT be flagged as complex — only imperative directives aimed at agents.
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
describe('tightened coordinate/collaborate patterns', () => {
|
||||
it('descriptive "how X coordinates" is simple', () => {
|
||||
expect(isSimpleGoal('Explain how Kubernetes pods coordinate state')).toBe(true)
|
||||
})
|
||||
|
||||
it('descriptive "collaboration" noun is simple', () => {
|
||||
expect(isSimpleGoal('What is microservice collaboration?')).toBe(true)
|
||||
})
|
||||
|
||||
it('descriptive "team that coordinates" is simple', () => {
|
||||
expect(isSimpleGoal('Describe a team that coordinates releases')).toBe(true)
|
||||
})
|
||||
|
||||
it('descriptive "without collaborating" is simple', () => {
|
||||
expect(isSimpleGoal('Show how to deploy without collaborating')).toBe(true)
|
||||
})
|
||||
|
||||
it('imperative "collaborate with X" is complex', () => {
|
||||
expect(isSimpleGoal('Collaborate with the writer to draft a post')).toBe(false)
|
||||
})
|
||||
|
||||
it('imperative "coordinate the team" is complex', () => {
|
||||
expect(isSimpleGoal('Coordinate the team for release')).toBe(false)
|
||||
})
|
||||
|
||||
it('imperative "coordinate across services" is complex', () => {
|
||||
expect(isSimpleGoal('Coordinate across services to roll out the change')).toBe(false)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// selectBestAgent — keyword affinity scoring
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('selectBestAgent', () => {
|
||||
it('selects agent whose systemPrompt best matches the goal', () => {
|
||||
const agents: AgentConfig[] = [
|
||||
{ name: 'researcher', model: 'test', systemPrompt: 'You are a research expert who analyzes data and writes reports' },
|
||||
{ name: 'coder', model: 'test', systemPrompt: 'You are a software engineer who writes TypeScript code' },
|
||||
]
|
||||
|
||||
expect(selectBestAgent('Write TypeScript code for the API', agents)).toBe(agents[1])
|
||||
expect(selectBestAgent('Research the latest AI papers', agents)).toBe(agents[0])
|
||||
})
|
||||
|
||||
it('falls back to first agent when no keywords match', () => {
|
||||
const agents: AgentConfig[] = [
|
||||
{ name: 'alpha', model: 'test' },
|
||||
{ name: 'beta', model: 'test' },
|
||||
]
|
||||
|
||||
expect(selectBestAgent('xyzzy', agents)).toBe(agents[0])
|
||||
})
|
||||
|
||||
it('returns the only agent when team has one member', () => {
|
||||
const agents: AgentConfig[] = [
|
||||
{ name: 'solo', model: 'test', systemPrompt: 'General purpose agent' },
|
||||
]
|
||||
|
||||
expect(selectBestAgent('anything', agents)).toBe(agents[0])
|
||||
})
|
||||
|
||||
it('considers agent name in scoring', () => {
|
||||
const agents: AgentConfig[] = [
|
||||
{ name: 'writer', model: 'test', systemPrompt: 'You help with tasks' },
|
||||
{ name: 'reviewer', model: 'test', systemPrompt: 'You help with tasks' },
|
||||
]
|
||||
|
||||
// "review" should match "reviewer" agent name
|
||||
expect(selectBestAgent('Review this pull request', agents)).toBe(agents[1])
|
||||
})
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Regression: model field asymmetry (PR #70 review point 2)
|
||||
//
|
||||
// selectBestAgent must mirror Scheduler.capability-match exactly:
|
||||
// - agentKeywords includes `model`
|
||||
// - agentText excludes `model`
|
||||
// This means a goal that mentions a model name should boost the agent
|
||||
// bound to that model (via scoreB), even if neither name nor system prompt
|
||||
// contains the keyword.
|
||||
// -------------------------------------------------------------------------
|
||||
it('matches scheduler asymmetry: model name in goal boosts the bound agent', () => {
|
||||
const agents: AgentConfig[] = [
|
||||
// Distinct, non-overlapping prompts so neither one wins on scoreA
|
||||
{ name: 'a1', model: 'haiku-fast-model', systemPrompt: 'You handle quick lookups' },
|
||||
{ name: 'a2', model: 'opus-deep-model', systemPrompt: 'You handle deep analysis' },
|
||||
]
|
||||
|
||||
// Mention "haiku" — this is only present in a1.model, so the bound
|
||||
// agent should win because agentKeywords (which includes model) matches.
|
||||
expect(selectBestAgent('Use the haiku model please', agents)).toBe(agents[0])
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// runTeam short-circuit integration test
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let mockAdapterResponses: string[] = []
|
||||
|
||||
vi.mock('../src/llm/adapter.js', () => ({
|
||||
createAdapter: async () => {
|
||||
let callIndex = 0
|
||||
return {
|
||||
name: 'mock',
|
||||
async chat(_msgs: LLMMessage[], options: LLMChatOptions): Promise<LLMResponse> {
|
||||
const text = mockAdapterResponses[callIndex] ?? 'default mock response'
|
||||
callIndex++
|
||||
return {
|
||||
id: `resp-${callIndex}`,
|
||||
content: [{ type: 'text', text }],
|
||||
model: options.model ?? 'mock-model',
|
||||
stop_reason: 'end_turn',
|
||||
usage: { input_tokens: 10, output_tokens: 20 },
|
||||
}
|
||||
},
|
||||
async *stream() {
|
||||
yield { type: 'done' as const, data: {} }
|
||||
},
|
||||
}
|
||||
},
|
||||
}))
|
||||
|
||||
function agentConfig(name: string, systemPrompt?: string): AgentConfig {
|
||||
return {
|
||||
name,
|
||||
model: 'mock-model',
|
||||
provider: 'openai',
|
||||
systemPrompt: systemPrompt ?? `You are ${name}.`,
|
||||
}
|
||||
}
|
||||
|
||||
function teamCfg(agents?: AgentConfig[]): TeamConfig {
|
||||
return {
|
||||
name: 'test-team',
|
||||
agents: agents ?? [
|
||||
agentConfig('researcher', 'You research topics and analyze data'),
|
||||
agentConfig('coder', 'You write TypeScript code'),
|
||||
],
|
||||
sharedMemory: true,
|
||||
}
|
||||
}
|
||||
|
||||
describe('runTeam short-circuit', () => {
|
||||
beforeEach(() => {
|
||||
mockAdapterResponses = []
|
||||
})
|
||||
|
||||
it('short-circuits simple goals to a single agent (no coordinator)', async () => {
|
||||
// Only ONE response needed — no coordinator decomposition or synthesis
|
||||
mockAdapterResponses = ['Direct answer without coordination']
|
||||
|
||||
const events: OrchestratorEvent[] = []
|
||||
const oma = new OpenMultiAgent({
|
||||
defaultModel: 'mock-model',
|
||||
onProgress: (e) => events.push(e),
|
||||
})
|
||||
const team = oma.createTeam('t', teamCfg())
|
||||
|
||||
const result = await oma.runTeam(team, 'Say hello')
|
||||
|
||||
expect(result.success).toBe(true)
|
||||
expect(result.agentResults.size).toBe(1)
|
||||
// Should NOT have coordinator results — short-circuit bypasses it
|
||||
expect(result.agentResults.has('coordinator')).toBe(false)
|
||||
})
|
||||
|
||||
it('emits progress events for short-circuit path', async () => {
|
||||
mockAdapterResponses = ['done']
|
||||
|
||||
const events: OrchestratorEvent[] = []
|
||||
const oma = new OpenMultiAgent({
|
||||
defaultModel: 'mock-model',
|
||||
onProgress: (e) => events.push(e),
|
||||
})
|
||||
const team = oma.createTeam('t', teamCfg())
|
||||
|
||||
await oma.runTeam(team, 'Say hello')
|
||||
|
||||
const types = events.map(e => e.type)
|
||||
expect(types).toContain('agent_start')
|
||||
expect(types).toContain('agent_complete')
|
||||
})
|
||||
|
||||
it('uses coordinator for complex goals', async () => {
|
||||
// Complex goal — needs coordinator decomposition + execution + synthesis
|
||||
mockAdapterResponses = [
|
||||
'```json\n[{"title": "Research", "description": "Research the topic", "assignee": "researcher"}]\n```',
|
||||
'Research results',
|
||||
'Final synthesis',
|
||||
]
|
||||
|
||||
const oma = new OpenMultiAgent({ defaultModel: 'mock-model' })
|
||||
const team = oma.createTeam('t', teamCfg())
|
||||
|
||||
const result = await oma.runTeam(
|
||||
team,
|
||||
'First research AI safety best practices, then write a comprehensive guide with code examples',
|
||||
)
|
||||
|
||||
expect(result.success).toBe(true)
|
||||
// Complex goal should go through coordinator
|
||||
expect(result.agentResults.has('coordinator')).toBe(true)
|
||||
})
|
||||
|
||||
it('selects best-matching agent for simple goals', async () => {
|
||||
mockAdapterResponses = ['code result']
|
||||
|
||||
const events: OrchestratorEvent[] = []
|
||||
const oma = new OpenMultiAgent({
|
||||
defaultModel: 'mock-model',
|
||||
onProgress: (e) => events.push(e),
|
||||
})
|
||||
const team = oma.createTeam('t', teamCfg())
|
||||
|
||||
await oma.runTeam(team, 'Write TypeScript code')
|
||||
|
||||
// Should pick 'coder' agent based on keyword match
|
||||
const startEvent = events.find(e => e.type === 'agent_start')
|
||||
expect(startEvent?.agent).toBe('coder')
|
||||
})
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Regression: no duplicate progress events (#82)
|
||||
//
|
||||
// The short-circuit path must emit exactly one agent_start and one
|
||||
// agent_complete event. Before the fix, calling this.runAgent() added
|
||||
// a second pair of events on top of the ones emitted by the short-circuit
|
||||
// block itself, and buildTeamRunResult() double-counted completedTasks.
|
||||
// -------------------------------------------------------------------------
|
||||
it('emits exactly one agent_start and one agent_complete (no duplicates)', async () => {
|
||||
mockAdapterResponses = ['done']
|
||||
|
||||
const events: OrchestratorEvent[] = []
|
||||
const oma = new OpenMultiAgent({
|
||||
defaultModel: 'mock-model',
|
||||
onProgress: (e) => events.push(e),
|
||||
})
|
||||
const team = oma.createTeam('t', teamCfg())
|
||||
|
||||
await oma.runTeam(team, 'Say hello')
|
||||
|
||||
const starts = events.filter(e => e.type === 'agent_start')
|
||||
const completes = events.filter(e => e.type === 'agent_complete')
|
||||
expect(starts).toHaveLength(1)
|
||||
expect(completes).toHaveLength(1)
|
||||
})
|
||||
|
||||
it('completedTaskCount is exactly 1 after a successful short-circuit run', async () => {
|
||||
mockAdapterResponses = ['done']
|
||||
const oma = new OpenMultiAgent({ defaultModel: 'mock-model' })
|
||||
const team = oma.createTeam('t', teamCfg())
|
||||
|
||||
await oma.runTeam(team, 'Say hello')
|
||||
|
||||
expect(oma.getStatus().completedTasks).toBe(1)
|
||||
})
|
||||
|
||||
it('aborted signal causes the underlying agent loop to skip the LLM call', async () => {
|
||||
// Pre-aborted controller — runner should break before any chat() call
|
||||
const controller = new AbortController()
|
||||
controller.abort()
|
||||
|
||||
mockAdapterResponses = ['should never be returned']
|
||||
|
||||
const oma = new OpenMultiAgent({ defaultModel: 'mock-model' })
|
||||
const team = oma.createTeam('t', teamCfg())
|
||||
|
||||
const result = await oma.runTeam(team, 'Say hello', { abortSignal: controller.signal })
|
||||
|
||||
// Short-circuit ran one agent, but its loop bailed before any LLM call,
|
||||
// so the agent's output is the empty string and token usage is zero.
|
||||
const agentResult = result.agentResults.values().next().value
|
||||
expect(agentResult?.output).toBe('')
|
||||
expect(agentResult?.tokenUsage.input_tokens).toBe(0)
|
||||
expect(agentResult?.tokenUsage.output_tokens).toBe(0)
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public API surface — internal helpers must stay out of the barrel export
|
||||
// (PR #70 review point 3)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('public API barrel', () => {
|
||||
it('does not re-export isSimpleGoal or selectBestAgent', async () => {
|
||||
const indexExports = await import('../src/index.js')
|
||||
expect((indexExports as Record<string, unknown>).isSimpleGoal).toBeUndefined()
|
||||
expect((indexExports as Record<string, unknown>).selectBestAgent).toBeUndefined()
|
||||
})
|
||||
|
||||
it('still re-exports the documented public symbols', async () => {
|
||||
const indexExports = await import('../src/index.js')
|
||||
expect(indexExports.OpenMultiAgent).toBeDefined()
|
||||
expect(indexExports.executeWithRetry).toBeDefined()
|
||||
expect(indexExports.computeRetryDelay).toBeDefined()
|
||||
})
|
||||
})
|
||||
|
|
@ -27,6 +27,7 @@ describe('TaskQueue', () => {
|
|||
q.add(task('a'))
|
||||
expect(q.list()).toHaveLength(1)
|
||||
expect(q.list()[0].id).toBe('a')
|
||||
expect(q.get('a')?.title).toBe('a')
|
||||
})
|
||||
|
||||
it('fires task:ready for a task with no dependencies', () => {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,248 @@
|
|||
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
||||
import { OpenMultiAgent } from '../src/orchestrator/orchestrator.js'
|
||||
import { Agent } from '../src/agent/agent.js'
|
||||
import { ToolRegistry } from '../src/tool/framework.js'
|
||||
import { ToolExecutor } from '../src/tool/executor.js'
|
||||
import type { AgentConfig, LLMChatOptions, LLMMessage, LLMResponse, OrchestratorEvent } from '../src/types.js'
|
||||
|
||||
let mockAdapterResponses: string[] = []
|
||||
let mockAdapterUsage: Array<{ input_tokens: number; output_tokens: number }> = []
|
||||
|
||||
vi.mock('../src/llm/adapter.js', () => ({
|
||||
createAdapter: async () => {
|
||||
let callIndex = 0
|
||||
return {
|
||||
name: 'mock',
|
||||
async chat(_msgs: LLMMessage[], options: LLMChatOptions): Promise<LLMResponse> {
|
||||
const text = mockAdapterResponses[callIndex] ?? 'default mock response'
|
||||
const usage = mockAdapterUsage[callIndex] ?? { input_tokens: 10, output_tokens: 20 }
|
||||
callIndex++
|
||||
return {
|
||||
id: `resp-${callIndex}`,
|
||||
content: [{ type: 'text', text }],
|
||||
model: options.model ?? 'mock-model',
|
||||
stop_reason: 'end_turn',
|
||||
usage,
|
||||
}
|
||||
},
|
||||
async *stream() {
|
||||
yield { type: 'done' as const, data: {} }
|
||||
},
|
||||
}
|
||||
},
|
||||
}))
|
||||
|
||||
function agentConfig(name: string, maxTokenBudget?: number): AgentConfig {
|
||||
return {
|
||||
name,
|
||||
model: 'mock-model',
|
||||
provider: 'openai',
|
||||
systemPrompt: `You are ${name}.`,
|
||||
maxTokenBudget,
|
||||
}
|
||||
}
|
||||
|
||||
describe('token budget enforcement', () => {
|
||||
beforeEach(() => {
|
||||
mockAdapterResponses = []
|
||||
mockAdapterUsage = []
|
||||
})
|
||||
|
||||
it('enforces agent-level maxTokenBudget in runAgent', async () => {
|
||||
mockAdapterResponses = ['over budget']
|
||||
mockAdapterUsage = [{ input_tokens: 20, output_tokens: 15 }]
|
||||
|
||||
const events: OrchestratorEvent[] = []
|
||||
const oma = new OpenMultiAgent({
|
||||
defaultModel: 'mock-model',
|
||||
onProgress: e => events.push(e),
|
||||
})
|
||||
|
||||
const result = await oma.runAgent(agentConfig('solo', 30), 'test')
|
||||
|
||||
expect(result.success).toBe(false)
|
||||
expect(result.budgetExceeded).toBe(true)
|
||||
expect(result.messages).toHaveLength(1)
|
||||
expect(result.messages[0]?.role).toBe('assistant')
|
||||
expect(result.messages[0]?.content[0]).toMatchObject({ type: 'text', text: 'over budget' })
|
||||
expect(events.some(e => e.type === 'budget_exceeded')).toBe(true)
|
||||
})
|
||||
|
||||
it('emits budget_exceeded stream event without error transition', async () => {
|
||||
mockAdapterResponses = ['over budget']
|
||||
mockAdapterUsage = [{ input_tokens: 20, output_tokens: 15 }]
|
||||
|
||||
const agent = new Agent(
|
||||
agentConfig('streamer', 30),
|
||||
new ToolRegistry(),
|
||||
new ToolExecutor(new ToolRegistry()),
|
||||
)
|
||||
|
||||
const eventTypes: string[] = []
|
||||
for await (const event of agent.stream('test')) {
|
||||
eventTypes.push(event.type)
|
||||
}
|
||||
|
||||
expect(eventTypes).toContain('budget_exceeded')
|
||||
expect(eventTypes).toContain('done')
|
||||
expect(eventTypes).not.toContain('error')
|
||||
expect(agent.getState().status).toBe('completed')
|
||||
})
|
||||
|
||||
it('does not skip in-progress sibling tasks when team budget is exceeded mid-batch', async () => {
|
||||
mockAdapterResponses = ['done-a', 'done-b', 'done-c']
|
||||
mockAdapterUsage = [
|
||||
{ input_tokens: 15, output_tokens: 10 }, // A => 25
|
||||
{ input_tokens: 15, output_tokens: 10 }, // B => 50 total (exceeds 40)
|
||||
{ input_tokens: 15, output_tokens: 10 }, // C should never run
|
||||
]
|
||||
|
||||
const events: OrchestratorEvent[] = []
|
||||
const oma = new OpenMultiAgent({
|
||||
defaultModel: 'mock-model',
|
||||
maxTokenBudget: 40,
|
||||
onProgress: e => events.push(e),
|
||||
})
|
||||
const team = oma.createTeam('team-siblings', {
|
||||
name: 'team-siblings',
|
||||
agents: [agentConfig('worker-a'), agentConfig('worker-b')],
|
||||
sharedMemory: false,
|
||||
})
|
||||
|
||||
await oma.runTasks(team, [
|
||||
{ title: 'Task A', description: 'A', assignee: 'worker-a' },
|
||||
{ title: 'Task B', description: 'B', assignee: 'worker-b' },
|
||||
{ title: 'Task C', description: 'C', assignee: 'worker-a', dependsOn: ['Task A'] },
|
||||
])
|
||||
|
||||
const completedTaskIds = new Set(
|
||||
events.filter(e => e.type === 'task_complete').map(e => e.task).filter(Boolean) as string[],
|
||||
)
|
||||
const skippedTaskIds = new Set(
|
||||
events.filter(e => e.type === 'task_skipped').map(e => e.task).filter(Boolean) as string[],
|
||||
)
|
||||
|
||||
const overlap = [...completedTaskIds].filter(id => skippedTaskIds.has(id))
|
||||
expect(overlap).toHaveLength(0)
|
||||
})
|
||||
|
||||
it('does not trigger budget events when budget is not exceeded', async () => {
|
||||
mockAdapterResponses = ['done-a', 'done-b']
|
||||
mockAdapterUsage = [
|
||||
{ input_tokens: 10, output_tokens: 10 },
|
||||
{ input_tokens: 10, output_tokens: 10 },
|
||||
]
|
||||
const events: OrchestratorEvent[] = []
|
||||
const oma = new OpenMultiAgent({
|
||||
defaultModel: 'mock-model',
|
||||
maxTokenBudget: 100,
|
||||
onProgress: e => events.push(e),
|
||||
})
|
||||
const team = oma.createTeam('team-a', {
|
||||
name: 'team-a',
|
||||
agents: [agentConfig('worker-a'), agentConfig('worker-b')],
|
||||
sharedMemory: false,
|
||||
})
|
||||
|
||||
const result = await oma.runTasks(team, [
|
||||
{ title: 'A', description: 'Do A', assignee: 'worker-a' },
|
||||
{ title: 'B', description: 'Do B', assignee: 'worker-b', dependsOn: ['A'] },
|
||||
])
|
||||
|
||||
expect(result.success).toBe(true)
|
||||
expect(events.some(e => e.type === 'budget_exceeded')).toBe(false)
|
||||
})
|
||||
|
||||
it('enforces team budget in runTasks and skips remaining tasks', async () => {
|
||||
mockAdapterResponses = ['done-a', 'done-b', 'done-c']
|
||||
mockAdapterUsage = [
|
||||
{ input_tokens: 20, output_tokens: 15 }, // A => 35
|
||||
{ input_tokens: 20, output_tokens: 15 }, // B => 70 total (exceeds 60)
|
||||
{ input_tokens: 20, output_tokens: 15 }, // C should not run
|
||||
]
|
||||
|
||||
const events: OrchestratorEvent[] = []
|
||||
const oma = new OpenMultiAgent({
|
||||
defaultModel: 'mock-model',
|
||||
maxTokenBudget: 60,
|
||||
onProgress: e => events.push(e),
|
||||
})
|
||||
const team = oma.createTeam('team-b', {
|
||||
name: 'team-b',
|
||||
agents: [agentConfig('worker')],
|
||||
sharedMemory: false,
|
||||
})
|
||||
|
||||
const result = await oma.runTasks(team, [
|
||||
{ title: 'A', description: 'A', assignee: 'worker' },
|
||||
{ title: 'B', description: 'B', assignee: 'worker', dependsOn: ['A'] },
|
||||
{ title: 'C', description: 'C', assignee: 'worker', dependsOn: ['B'] },
|
||||
])
|
||||
|
||||
expect(result.totalTokenUsage.input_tokens + result.totalTokenUsage.output_tokens).toBe(70)
|
||||
expect(events.some(e => e.type === 'budget_exceeded')).toBe(true)
|
||||
expect(events.some(e => e.type === 'task_skipped')).toBe(true)
|
||||
})
|
||||
|
||||
it('counts retry token usage before enforcing team budget', async () => {
|
||||
mockAdapterResponses = ['attempt-1', 'attempt-2', 'should-skip']
|
||||
mockAdapterUsage = [
|
||||
{ input_tokens: 20, output_tokens: 15 }, // attempt 1
|
||||
{ input_tokens: 20, output_tokens: 15 }, // attempt 2
|
||||
{ input_tokens: 20, output_tokens: 15 }, // next task (should skip)
|
||||
]
|
||||
|
||||
const events: OrchestratorEvent[] = []
|
||||
const oma = new OpenMultiAgent({
|
||||
defaultModel: 'mock-model',
|
||||
maxTokenBudget: 50,
|
||||
onProgress: e => events.push(e),
|
||||
})
|
||||
const team = oma.createTeam('team-c', {
|
||||
name: 'team-c',
|
||||
agents: [agentConfig('retry-worker', 1)],
|
||||
sharedMemory: false,
|
||||
})
|
||||
|
||||
const result = await oma.runTasks(team, [
|
||||
{ title: 'Retrying task', description: 'Will exceed internal budget', assignee: 'retry-worker', maxRetries: 1 },
|
||||
{ title: 'Later task', description: 'Should be skipped', assignee: 'retry-worker', dependsOn: ['Retrying task'] },
|
||||
])
|
||||
|
||||
expect(result.totalTokenUsage.input_tokens + result.totalTokenUsage.output_tokens).toBe(70)
|
||||
expect(events.some(e => e.type === 'budget_exceeded')).toBe(true)
|
||||
expect(events.some(e => e.type === 'error')).toBe(true)
|
||||
})
|
||||
|
||||
it('enforces orchestrator budget in runTeam', async () => {
|
||||
mockAdapterResponses = [
|
||||
'```json\n[{"title":"Task A","description":"Do A","assignee":"worker"}]\n```',
|
||||
'worker result',
|
||||
'synthesis should not run when budget exceeded',
|
||||
]
|
||||
mockAdapterUsage = [
|
||||
{ input_tokens: 20, output_tokens: 15 }, // decomposition => 35
|
||||
{ input_tokens: 20, output_tokens: 15 }, // task => 70 total (exceeds 60)
|
||||
{ input_tokens: 20, output_tokens: 15 }, // synthesis should not execute
|
||||
]
|
||||
|
||||
const events: OrchestratorEvent[] = []
|
||||
const oma = new OpenMultiAgent({
|
||||
defaultModel: 'mock-model',
|
||||
maxTokenBudget: 60,
|
||||
onProgress: e => events.push(e),
|
||||
})
|
||||
const team = oma.createTeam('team-d', {
|
||||
name: 'team-d',
|
||||
agents: [agentConfig('worker')],
|
||||
sharedMemory: false,
|
||||
})
|
||||
|
||||
// Use a goal that explicitly mentions sequencing so the short-circuit
|
||||
// path is skipped and the coordinator decomposition + execution flow
|
||||
// (which this test is exercising) actually runs.
|
||||
const result = await oma.runTeam(team, 'First plan the work, then execute it')
|
||||
expect(result.totalTokenUsage.input_tokens + result.totalTokenUsage.output_tokens).toBe(70)
|
||||
expect(events.some(e => e.type === 'budget_exceeded')).toBe(true)
|
||||
})
|
||||
})
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
import { describe, it, expect, vi } from 'vitest'
|
||||
import { z } from 'zod'
|
||||
import { ToolRegistry, defineTool } from '../src/tool/framework.js'
|
||||
import { ToolExecutor } from '../src/tool/executor.js'
|
||||
import { ToolExecutor, truncateToolOutput } from '../src/tool/executor.js'
|
||||
import type { ToolUseContext } from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -191,3 +191,222 @@ describe('ToolRegistry', () => {
|
|||
expect(defs[0].inputSchema).toHaveProperty('properties')
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// truncateToolOutput
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('truncateToolOutput', () => {
|
||||
it('returns data unchanged when under the limit', () => {
|
||||
const data = 'short output'
|
||||
expect(truncateToolOutput(data, 100)).toBe(data)
|
||||
})
|
||||
|
||||
it('returns data unchanged when exactly at the limit', () => {
|
||||
const data = 'x'.repeat(100)
|
||||
expect(truncateToolOutput(data, 100)).toBe(data)
|
||||
})
|
||||
|
||||
it('truncates data exceeding the limit with head/tail and marker', () => {
|
||||
const data = 'A'.repeat(300) + 'B'.repeat(700)
|
||||
const result = truncateToolOutput(data, 500)
|
||||
expect(result).toContain('[...truncated')
|
||||
expect(result.length).toBeLessThanOrEqual(500)
|
||||
// Head portion starts with As
|
||||
expect(result.startsWith('A')).toBe(true)
|
||||
// Tail portion ends with Bs
|
||||
expect(result.endsWith('B')).toBe(true)
|
||||
})
|
||||
|
||||
it('result never exceeds maxChars', () => {
|
||||
const data = 'x'.repeat(10000)
|
||||
const result = truncateToolOutput(data, 1000)
|
||||
expect(result.length).toBeLessThanOrEqual(1000)
|
||||
expect(result).toContain('[...truncated')
|
||||
})
|
||||
|
||||
it('handles empty string', () => {
|
||||
expect(truncateToolOutput('', 100)).toBe('')
|
||||
})
|
||||
|
||||
it('handles very small maxChars gracefully', () => {
|
||||
const data = 'x'.repeat(100)
|
||||
// With maxChars=1, the marker alone exceeds the budget — falls back to hard slice
|
||||
const result = truncateToolOutput(data, 1)
|
||||
expect(result.length).toBeLessThanOrEqual(1)
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tool output truncation (integration)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('ToolExecutor output truncation', () => {
|
||||
it('truncates output when agent-level maxToolOutputChars is set', async () => {
|
||||
const bigTool = defineTool({
|
||||
name: 'big',
|
||||
description: 'Returns large output.',
|
||||
inputSchema: z.object({}),
|
||||
execute: async () => ({ data: 'x'.repeat(5000) }),
|
||||
})
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(bigTool)
|
||||
const executor = new ToolExecutor(registry, { maxToolOutputChars: 200 })
|
||||
|
||||
const result = await executor.execute('big', {}, dummyContext)
|
||||
expect(result.data.length).toBeLessThan(5000)
|
||||
expect(result.data).toContain('[...truncated')
|
||||
})
|
||||
|
||||
it('does not truncate when output is under the limit', async () => {
|
||||
const smallTool = defineTool({
|
||||
name: 'small',
|
||||
description: 'Returns small output.',
|
||||
inputSchema: z.object({}),
|
||||
execute: async () => ({ data: 'hello' }),
|
||||
})
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(smallTool)
|
||||
const executor = new ToolExecutor(registry, { maxToolOutputChars: 200 })
|
||||
|
||||
const result = await executor.execute('small', {}, dummyContext)
|
||||
expect(result.data).toBe('hello')
|
||||
})
|
||||
|
||||
it('per-tool maxOutputChars overrides agent-level setting (smaller)', async () => {
|
||||
const toolWithLimit = defineTool({
|
||||
name: 'limited',
|
||||
description: 'Has its own limit.',
|
||||
inputSchema: z.object({}),
|
||||
maxOutputChars: 200,
|
||||
execute: async () => ({ data: 'y'.repeat(5000) }),
|
||||
})
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(toolWithLimit)
|
||||
// Agent-level is 1000 but tool-level is 200 -- tool wins
|
||||
const executor = new ToolExecutor(registry, { maxToolOutputChars: 1000 })
|
||||
|
||||
const result = await executor.execute('limited', {}, dummyContext)
|
||||
expect(result.data).toContain('[...truncated')
|
||||
expect(result.data.length).toBeLessThanOrEqual(200)
|
||||
})
|
||||
|
||||
it('per-tool maxOutputChars overrides agent-level setting (larger)', async () => {
|
||||
const toolWithLimit = defineTool({
|
||||
name: 'limited',
|
||||
description: 'Has its own limit.',
|
||||
inputSchema: z.object({}),
|
||||
maxOutputChars: 2000,
|
||||
execute: async () => ({ data: 'y'.repeat(5000) }),
|
||||
})
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(toolWithLimit)
|
||||
// Agent-level is 500 but tool-level is 2000 -- tool wins
|
||||
const executor = new ToolExecutor(registry, { maxToolOutputChars: 500 })
|
||||
|
||||
const result = await executor.execute('limited', {}, dummyContext)
|
||||
expect(result.data).toContain('[...truncated')
|
||||
expect(result.data.length).toBeLessThanOrEqual(2000)
|
||||
expect(result.data.length).toBeGreaterThan(500)
|
||||
})
|
||||
|
||||
it('per-tool maxOutputChars works without agent-level setting', async () => {
|
||||
const toolWithLimit = defineTool({
|
||||
name: 'limited',
|
||||
description: 'Has its own limit.',
|
||||
inputSchema: z.object({}),
|
||||
maxOutputChars: 300,
|
||||
execute: async () => ({ data: 'z'.repeat(5000) }),
|
||||
})
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(toolWithLimit)
|
||||
const executor = new ToolExecutor(registry)
|
||||
|
||||
const result = await executor.execute('limited', {}, dummyContext)
|
||||
expect(result.data).toContain('[...truncated')
|
||||
expect(result.data.length).toBeLessThanOrEqual(300)
|
||||
})
|
||||
|
||||
it('truncates error results too', async () => {
|
||||
const errorTool = defineTool({
|
||||
name: 'errorbig',
|
||||
description: 'Throws a huge error.',
|
||||
inputSchema: z.object({}),
|
||||
execute: async () => { throw new Error('E'.repeat(5000)) },
|
||||
})
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(errorTool)
|
||||
const executor = new ToolExecutor(registry, { maxToolOutputChars: 200 })
|
||||
|
||||
const result = await executor.execute('errorbig', {}, dummyContext)
|
||||
expect(result.isError).toBe(true)
|
||||
expect(result.data).toContain('[...truncated')
|
||||
expect(result.data.length).toBeLessThan(5000)
|
||||
})
|
||||
|
||||
it('no truncation when maxToolOutputChars is 0', async () => {
|
||||
const bigTool = defineTool({
|
||||
name: 'big',
|
||||
description: 'Returns large output.',
|
||||
inputSchema: z.object({}),
|
||||
execute: async () => ({ data: 'x'.repeat(5000) }),
|
||||
})
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(bigTool)
|
||||
const executor = new ToolExecutor(registry, { maxToolOutputChars: 0 })
|
||||
|
||||
const result = await executor.execute('big', {}, dummyContext)
|
||||
expect(result.data.length).toBe(5000)
|
||||
})
|
||||
|
||||
it('no truncation when maxToolOutputChars is negative', async () => {
|
||||
const bigTool = defineTool({
|
||||
name: 'big',
|
||||
description: 'Returns large output.',
|
||||
inputSchema: z.object({}),
|
||||
execute: async () => ({ data: 'x'.repeat(5000) }),
|
||||
})
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(bigTool)
|
||||
const executor = new ToolExecutor(registry, { maxToolOutputChars: -100 })
|
||||
|
||||
const result = await executor.execute('big', {}, dummyContext)
|
||||
expect(result.data.length).toBe(5000)
|
||||
})
|
||||
|
||||
it('defineTool passes maxOutputChars to the ToolDefinition', () => {
|
||||
const tool = defineTool({
|
||||
name: 'test',
|
||||
description: 'test',
|
||||
inputSchema: z.object({}),
|
||||
maxOutputChars: 500,
|
||||
execute: async () => ({ data: 'ok' }),
|
||||
})
|
||||
expect(tool.maxOutputChars).toBe(500)
|
||||
})
|
||||
|
||||
it('defineTool omits maxOutputChars when not specified', () => {
|
||||
const tool = defineTool({
|
||||
name: 'test',
|
||||
description: 'test',
|
||||
inputSchema: z.object({}),
|
||||
execute: async () => ({ data: 'ok' }),
|
||||
})
|
||||
expect(tool.maxOutputChars).toBeUndefined()
|
||||
})
|
||||
|
||||
it('no truncation when neither limit is set', async () => {
|
||||
const bigTool = defineTool({
|
||||
name: 'big',
|
||||
description: 'Returns large output.',
|
||||
inputSchema: z.object({}),
|
||||
execute: async () => ({ data: 'x'.repeat(50000) }),
|
||||
})
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(bigTool)
|
||||
const executor = new ToolExecutor(registry)
|
||||
|
||||
const result = await executor.execute('big', {}, dummyContext)
|
||||
expect(result.data.length).toBe(50000)
|
||||
})
|
||||
})
|
||||
|
|
|
|||
|
|
@ -0,0 +1,365 @@
|
|||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
|
||||
import { AgentRunner, TOOL_PRESETS } from '../src/agent/runner.js'
|
||||
import { ToolRegistry, defineTool } from '../src/tool/framework.js'
|
||||
import { ToolExecutor } from '../src/tool/executor.js'
|
||||
import { z } from 'zod'
|
||||
import type { LLMAdapter, LLMResponse, LLMToolDef } from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock adapter
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const mockAdapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat() {
|
||||
return {
|
||||
id: 'mock-1',
|
||||
content: [{ type: 'text', text: 'response' }],
|
||||
model: 'mock-model',
|
||||
stop_reason: 'end_turn',
|
||||
usage: { input_tokens: 10, output_tokens: 20 },
|
||||
} satisfies LLMResponse
|
||||
},
|
||||
async *stream() {
|
||||
// Not used in these tests
|
||||
},
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test tools
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function createTestTools() {
|
||||
const registry = new ToolRegistry()
|
||||
|
||||
// Register test tools that match our presets
|
||||
registry.register(defineTool({
|
||||
name: 'file_read',
|
||||
description: 'Read file',
|
||||
inputSchema: z.object({ path: z.string() }),
|
||||
execute: async () => ({ data: 'content', isError: false }),
|
||||
}))
|
||||
|
||||
registry.register(defineTool({
|
||||
name: 'file_write',
|
||||
description: 'Write file',
|
||||
inputSchema: z.object({ path: z.string(), content: z.string() }),
|
||||
execute: async () => ({ data: 'ok', isError: false }),
|
||||
}))
|
||||
|
||||
registry.register(defineTool({
|
||||
name: 'file_edit',
|
||||
description: 'Edit file',
|
||||
inputSchema: z.object({ path: z.string(), oldString: z.string(), newString: z.string() }),
|
||||
execute: async () => ({ data: 'ok', isError: false }),
|
||||
}))
|
||||
|
||||
registry.register(defineTool({
|
||||
name: 'grep',
|
||||
description: 'Search text',
|
||||
inputSchema: z.object({ pattern: z.string(), path: z.string() }),
|
||||
execute: async () => ({ data: 'matches', isError: false }),
|
||||
}))
|
||||
|
||||
registry.register(defineTool({
|
||||
name: 'glob',
|
||||
description: 'List paths',
|
||||
inputSchema: z.object({ path: z.string().optional() }),
|
||||
execute: async () => ({ data: 'paths', isError: false }),
|
||||
}))
|
||||
|
||||
registry.register(defineTool({
|
||||
name: 'bash',
|
||||
description: 'Run shell command',
|
||||
inputSchema: z.object({ command: z.string() }),
|
||||
execute: async () => ({ data: 'output', isError: false }),
|
||||
}))
|
||||
|
||||
// Extra tool not in any preset
|
||||
registry.register(defineTool({
|
||||
name: 'custom_tool',
|
||||
description: 'Custom tool',
|
||||
inputSchema: z.object({ input: z.string() }),
|
||||
execute: async () => ({ data: 'custom', isError: false }),
|
||||
}), { runtimeAdded: true })
|
||||
|
||||
return registry
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('Tool filtering', () => {
|
||||
const registry = createTestTools()
|
||||
const executor = new ToolExecutor(registry)
|
||||
|
||||
describe('TOOL_PRESETS', () => {
|
||||
it('readonly preset has correct tools', () => {
|
||||
expect(TOOL_PRESETS.readonly).toEqual(['file_read', 'grep', 'glob'])
|
||||
})
|
||||
|
||||
it('readwrite preset has correct tools', () => {
|
||||
expect(TOOL_PRESETS.readwrite).toEqual(['file_read', 'file_write', 'file_edit', 'grep', 'glob'])
|
||||
})
|
||||
|
||||
it('full preset has correct tools', () => {
|
||||
expect(TOOL_PRESETS.full).toEqual(['file_read', 'file_write', 'file_edit', 'grep', 'glob', 'bash'])
|
||||
})
|
||||
})
|
||||
|
||||
describe('resolveTools - no filtering', () => {
|
||||
it('returns all tools when no filters are set', () => {
|
||||
const runner = new AgentRunner(mockAdapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
})
|
||||
|
||||
const tools = (runner as any).resolveTools() as LLMToolDef[]
|
||||
const toolNames = tools.map((t: LLMToolDef) => t.name).sort()
|
||||
|
||||
expect(toolNames).toEqual([
|
||||
'bash',
|
||||
'custom_tool',
|
||||
'file_edit',
|
||||
'file_read',
|
||||
'file_write',
|
||||
'glob',
|
||||
'grep',
|
||||
])
|
||||
})
|
||||
})
|
||||
|
||||
describe('resolveTools - preset filtering', () => {
|
||||
it('readonly preset filters correctly', () => {
|
||||
const runner = new AgentRunner(mockAdapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
toolPreset: 'readonly',
|
||||
})
|
||||
|
||||
const tools = (runner as any).resolveTools() as LLMToolDef[]
|
||||
const toolNames = tools.map((t: LLMToolDef) => t.name).sort()
|
||||
|
||||
expect(toolNames).toEqual(['custom_tool', 'file_read', 'glob', 'grep'])
|
||||
})
|
||||
|
||||
it('readwrite preset filters correctly', () => {
|
||||
const runner = new AgentRunner(mockAdapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
toolPreset: 'readwrite',
|
||||
})
|
||||
|
||||
const tools = (runner as any).resolveTools() as LLMToolDef[]
|
||||
const toolNames = tools.map((t: LLMToolDef) => t.name).sort()
|
||||
|
||||
expect(toolNames).toEqual([
|
||||
'custom_tool',
|
||||
'file_edit',
|
||||
'file_read',
|
||||
'file_write',
|
||||
'glob',
|
||||
'grep',
|
||||
])
|
||||
})
|
||||
|
||||
it('full preset filters correctly', () => {
|
||||
const runner = new AgentRunner(mockAdapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
toolPreset: 'full',
|
||||
})
|
||||
|
||||
const tools = (runner as any).resolveTools() as LLMToolDef[]
|
||||
const toolNames = tools.map((t: LLMToolDef) => t.name).sort()
|
||||
|
||||
expect(toolNames).toEqual([
|
||||
'bash',
|
||||
'custom_tool',
|
||||
'file_edit',
|
||||
'file_read',
|
||||
'file_write',
|
||||
'glob',
|
||||
'grep',
|
||||
])
|
||||
})
|
||||
})
|
||||
|
||||
describe('resolveTools - allowlist filtering', () => {
|
||||
it('allowlist filters correctly', () => {
|
||||
const runner = new AgentRunner(mockAdapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
allowedTools: ['file_read', 'bash'],
|
||||
})
|
||||
|
||||
const tools = (runner as any).resolveTools() as LLMToolDef[]
|
||||
const toolNames = tools.map((t: LLMToolDef) => t.name).sort()
|
||||
|
||||
expect(toolNames).toEqual(['bash', 'custom_tool', 'file_read'])
|
||||
})
|
||||
|
||||
it('empty allowlist returns no tools', () => {
|
||||
const runner = new AgentRunner(mockAdapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
allowedTools: [],
|
||||
})
|
||||
|
||||
const tools = (runner as any).resolveTools()
|
||||
expect((tools as LLMToolDef[]).map(t => t.name)).toEqual(['custom_tool'])
|
||||
})
|
||||
})
|
||||
|
||||
describe('resolveTools - denylist filtering', () => {
|
||||
it('denylist filters correctly', () => {
|
||||
const runner = new AgentRunner(mockAdapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
disallowedTools: ['bash', 'custom_tool'],
|
||||
})
|
||||
|
||||
const tools = (runner as any).resolveTools() as LLMToolDef[]
|
||||
const toolNames = tools.map((t: LLMToolDef) => t.name).sort()
|
||||
|
||||
// custom_tool is runtime-added but disallowedTools still blocks it
|
||||
expect(toolNames).toEqual([
|
||||
'file_edit',
|
||||
'file_read',
|
||||
'file_write',
|
||||
'glob',
|
||||
'grep',
|
||||
])
|
||||
})
|
||||
|
||||
it('empty denylist returns all tools', () => {
|
||||
const runner = new AgentRunner(mockAdapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
disallowedTools: [],
|
||||
})
|
||||
|
||||
const tools = (runner as any).resolveTools()
|
||||
expect(tools).toHaveLength(7) // All registered tools
|
||||
})
|
||||
})
|
||||
|
||||
describe('resolveTools - combined filtering (preset + allowlist + denylist)', () => {
|
||||
it('preset + allowlist + denylist work together', () => {
|
||||
// Start with readwrite preset: ['file_read', 'file_write', 'file_edit', 'grep', 'glob']
|
||||
// Then allowlist: intersect with ['file_read', 'file_write', 'grep'] = ['file_read', 'file_write', 'grep']
|
||||
// Then denylist: subtract ['file_write'] = ['file_read', 'grep']
|
||||
const runner = new AgentRunner(mockAdapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
toolPreset: 'readwrite',
|
||||
allowedTools: ['file_read', 'file_write', 'grep'],
|
||||
disallowedTools: ['file_write'],
|
||||
})
|
||||
|
||||
const tools = (runner as any).resolveTools() as LLMToolDef[]
|
||||
const toolNames = tools.map((t: LLMToolDef) => t.name).sort()
|
||||
|
||||
expect(toolNames).toEqual(['custom_tool', 'file_read', 'grep'])
|
||||
})
|
||||
|
||||
it('preset filters first, then allowlist intersects, then denylist subtracts', () => {
|
||||
// Start with readonly preset: ['file_read', 'grep', 'glob']
|
||||
// Allowlist intersect with ['file_read', 'bash']: ['file_read']
|
||||
// Denylist subtract ['file_read']: []
|
||||
const runner = new AgentRunner(mockAdapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
toolPreset: 'readonly',
|
||||
allowedTools: ['file_read', 'bash'],
|
||||
disallowedTools: ['file_read'],
|
||||
})
|
||||
|
||||
const tools = (runner as any).resolveTools()
|
||||
expect((tools as LLMToolDef[]).map(t => t.name)).toEqual(['custom_tool'])
|
||||
})
|
||||
})
|
||||
|
||||
describe('resolveTools - custom tool behavior', () => {
|
||||
it('always includes custom tools regardless of filtering', () => {
|
||||
const runner = new AgentRunner(mockAdapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
toolPreset: 'readonly',
|
||||
allowedTools: ['file_read'],
|
||||
disallowedTools: ['file_read', 'bash', 'grep'],
|
||||
})
|
||||
|
||||
const tools = (runner as any).resolveTools() as LLMToolDef[]
|
||||
const toolNames = tools.map((t: LLMToolDef) => t.name).sort()
|
||||
|
||||
expect(toolNames).toEqual(['custom_tool'])
|
||||
})
|
||||
|
||||
it('runtime-added tools are blocked by disallowedTools', () => {
|
||||
const runtimeBuiltinNamedRegistry = new ToolRegistry()
|
||||
runtimeBuiltinNamedRegistry.register(defineTool({
|
||||
name: 'file_read',
|
||||
description: 'Runtime override',
|
||||
inputSchema: z.object({ path: z.string() }),
|
||||
execute: async () => ({ data: 'runtime', isError: false }),
|
||||
}), { runtimeAdded: true })
|
||||
|
||||
const runtimeBuiltinNamedRunner = new AgentRunner(
|
||||
mockAdapter,
|
||||
runtimeBuiltinNamedRegistry,
|
||||
new ToolExecutor(runtimeBuiltinNamedRegistry),
|
||||
{
|
||||
model: 'test-model',
|
||||
disallowedTools: ['file_read'],
|
||||
},
|
||||
)
|
||||
|
||||
const tools = (runtimeBuiltinNamedRunner as any).resolveTools() as LLMToolDef[]
|
||||
expect(tools.map(t => t.name)).toEqual([])
|
||||
})
|
||||
})
|
||||
|
||||
describe('resolveTools - validation warnings', () => {
|
||||
let consoleWarnSpy: any
|
||||
|
||||
beforeEach(() => {
|
||||
consoleWarnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {})
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
consoleWarnSpy.mockRestore()
|
||||
})
|
||||
|
||||
it('warns when tool appears in both allowedTools and disallowedTools', () => {
|
||||
const runner = new AgentRunner(mockAdapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
allowedTools: ['file_read', 'bash'],
|
||||
disallowedTools: ['bash', 'grep'],
|
||||
})
|
||||
|
||||
;(runner as any).resolveTools()
|
||||
|
||||
expect(consoleWarnSpy).toHaveBeenCalledWith(
|
||||
expect.stringContaining('tools ["bash"] appear in both allowedTools and disallowedTools')
|
||||
)
|
||||
})
|
||||
|
||||
it('warns when both toolPreset and allowedTools are set', () => {
|
||||
const runner = new AgentRunner(mockAdapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
toolPreset: 'readonly',
|
||||
allowedTools: ['file_read', 'bash'],
|
||||
})
|
||||
|
||||
;(runner as any).resolveTools()
|
||||
|
||||
expect(consoleWarnSpy).toHaveBeenCalledWith(
|
||||
expect.stringContaining('both toolPreset and allowedTools are set')
|
||||
)
|
||||
})
|
||||
|
||||
it('does not warn when no overlap between allowedTools and disallowedTools', () => {
|
||||
const runner = new AgentRunner(mockAdapter, registry, executor, {
|
||||
model: 'test-model',
|
||||
allowedTools: ['file_read'],
|
||||
disallowedTools: ['bash'],
|
||||
})
|
||||
|
||||
;(runner as any).resolveTools()
|
||||
|
||||
expect(consoleWarnSpy).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
|
|
@ -0,0 +1,498 @@
|
|||
import { describe, it, expect } from 'vitest'
|
||||
import { z } from 'zod'
|
||||
import { AgentRunner } from '../src/agent/runner.js'
|
||||
import { ToolRegistry, defineTool } from '../src/tool/framework.js'
|
||||
import { ToolExecutor } from '../src/tool/executor.js'
|
||||
import type { LLMAdapter, LLMMessage, LLMResponse } from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function textResponse(text: string): LLMResponse {
|
||||
return {
|
||||
id: `resp-${Math.random().toString(36).slice(2)}`,
|
||||
content: [{ type: 'text', text }],
|
||||
model: 'mock-model',
|
||||
stop_reason: 'end_turn',
|
||||
usage: { input_tokens: 10, output_tokens: 20 },
|
||||
}
|
||||
}
|
||||
|
||||
function toolUseResponse(toolName: string, input: Record<string, unknown>): LLMResponse {
|
||||
return {
|
||||
id: `resp-${Math.random().toString(36).slice(2)}`,
|
||||
content: [{
|
||||
type: 'tool_use',
|
||||
id: `tu-${Math.random().toString(36).slice(2)}`,
|
||||
name: toolName,
|
||||
input,
|
||||
}],
|
||||
model: 'mock-model',
|
||||
stop_reason: 'tool_use',
|
||||
usage: { input_tokens: 15, output_tokens: 25 },
|
||||
}
|
||||
}
|
||||
|
||||
function buildRegistryAndExecutor(
|
||||
toolOutput: string = 'x'.repeat(600),
|
||||
): { registry: ToolRegistry; executor: ToolExecutor } {
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(
|
||||
defineTool({
|
||||
name: 'echo',
|
||||
description: 'Echo input',
|
||||
inputSchema: z.object({ message: z.string() }),
|
||||
async execute() {
|
||||
return { data: toolOutput }
|
||||
},
|
||||
}),
|
||||
)
|
||||
return { registry, executor: new ToolExecutor(registry) }
|
||||
}
|
||||
|
||||
function buildErrorRegistryAndExecutor(): { registry: ToolRegistry; executor: ToolExecutor } {
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(
|
||||
defineTool({
|
||||
name: 'fail',
|
||||
description: 'Always fails',
|
||||
inputSchema: z.object({ message: z.string() }),
|
||||
async execute() {
|
||||
return { data: 'E'.repeat(600), isError: true }
|
||||
},
|
||||
}),
|
||||
)
|
||||
return { registry, executor: new ToolExecutor(registry) }
|
||||
}
|
||||
|
||||
/** Extract all tool_result content strings from messages sent to the LLM. */
|
||||
function extractToolResultContents(messages: LLMMessage[]): string[] {
|
||||
return messages.flatMap(m =>
|
||||
m.content
|
||||
.filter((b): b is { type: 'tool_result'; tool_use_id: string; content: string; is_error?: boolean } =>
|
||||
b.type === 'tool_result')
|
||||
.map(b => b.content),
|
||||
)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('AgentRunner compressToolResults', () => {
|
||||
it('does NOT compress when compressToolResults is not set (default)', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const longOutput = 'x'.repeat(600)
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 't1' }),
|
||||
toolUseResponse('echo', { message: 't2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor(longOutput)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 5,
|
||||
// compressToolResults not set
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Turn 3 should still see full tool results from turn 1
|
||||
const turn3Messages = calls[2]!
|
||||
const allToolResults = extractToolResultContents(turn3Messages)
|
||||
expect(allToolResults.every(c => c === longOutput)).toBe(true)
|
||||
})
|
||||
|
||||
it('compresses consumed tool results on turn 3+', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const longOutput = 'x'.repeat(600)
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 't1' }),
|
||||
toolUseResponse('echo', { message: 't2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor(longOutput)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 5,
|
||||
compressToolResults: true,
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Turn 3: the LLM should see a compressed marker for turn 1 results
|
||||
// but the full output for turn 2 results (most recent, not yet consumed).
|
||||
const turn3Messages = calls[2]!
|
||||
const allToolResults = extractToolResultContents(turn3Messages)
|
||||
expect(allToolResults).toHaveLength(2)
|
||||
|
||||
// First result (turn 1) should be compressed
|
||||
expect(allToolResults[0]).toContain('compressed')
|
||||
expect(allToolResults[0]).toContain('600 chars')
|
||||
|
||||
// Second result (turn 2, most recent) should be preserved in full
|
||||
expect(allToolResults[1]).toBe(longOutput)
|
||||
})
|
||||
|
||||
it('preserves tool_use_id on compressed results', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const longOutput = 'x'.repeat(600)
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 't1' }),
|
||||
toolUseResponse('echo', { message: 't2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor(longOutput)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 5,
|
||||
compressToolResults: true,
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Turn 3: verify compressed result still has tool_use_id
|
||||
const turn3Messages = calls[2]!
|
||||
const toolResultBlocks = turn3Messages.flatMap(m =>
|
||||
m.content.filter(b => b.type === 'tool_result'),
|
||||
)
|
||||
for (const block of toolResultBlocks) {
|
||||
expect(block).toHaveProperty('tool_use_id')
|
||||
expect((block as { tool_use_id: string }).tool_use_id).toBeTruthy()
|
||||
}
|
||||
})
|
||||
|
||||
it('skips short tool results below minChars threshold', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const shortOutput = 'short' // 5 chars, well below 500 default
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 't1' }),
|
||||
toolUseResponse('echo', { message: 't2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor(shortOutput)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 5,
|
||||
compressToolResults: true,
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Turn 3: short results should NOT be compressed
|
||||
const turn3Messages = calls[2]!
|
||||
const allToolResults = extractToolResultContents(turn3Messages)
|
||||
expect(allToolResults.every(c => c === shortOutput)).toBe(true)
|
||||
})
|
||||
|
||||
it('respects custom minChars threshold', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const output = 'x'.repeat(200)
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 't1' }),
|
||||
toolUseResponse('echo', { message: 't2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor(output)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 5,
|
||||
compressToolResults: { minChars: 100 },
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// With minChars=100, the 200-char output should be compressed
|
||||
const turn3Messages = calls[2]!
|
||||
const allToolResults = extractToolResultContents(turn3Messages)
|
||||
expect(allToolResults[0]).toContain('compressed')
|
||||
expect(allToolResults[0]).toContain('200 chars')
|
||||
})
|
||||
|
||||
it('never compresses error tool results', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const responses = [
|
||||
toolUseResponse('fail', { message: 't1' }),
|
||||
toolUseResponse('fail', { message: 't2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildErrorRegistryAndExecutor()
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['fail'],
|
||||
maxTurns: 5,
|
||||
compressToolResults: true,
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Error results should never be compressed even if long
|
||||
const turn3Messages = calls[2]!
|
||||
const allToolResults = extractToolResultContents(turn3Messages)
|
||||
expect(allToolResults.every(c => c === 'E'.repeat(600))).toBe(true)
|
||||
})
|
||||
|
||||
it('compresses selectively in multi-block tool_result messages (parallel tool calls)', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
// Two tools: one returns long output, one returns short output
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(
|
||||
defineTool({
|
||||
name: 'long_tool',
|
||||
description: 'Returns long output',
|
||||
inputSchema: z.object({ msg: z.string() }),
|
||||
async execute() { return { data: 'L'.repeat(600) } },
|
||||
}),
|
||||
)
|
||||
registry.register(
|
||||
defineTool({
|
||||
name: 'short_tool',
|
||||
description: 'Returns short output',
|
||||
inputSchema: z.object({ msg: z.string() }),
|
||||
async execute() { return { data: 'S'.repeat(50) } },
|
||||
}),
|
||||
)
|
||||
const executor = new ToolExecutor(registry)
|
||||
|
||||
// Turn 1: model calls both tools in parallel
|
||||
const parallelResponse: LLMResponse = {
|
||||
id: 'resp-parallel',
|
||||
content: [
|
||||
{ type: 'tool_use', id: 'tu-long', name: 'long_tool', input: { msg: 'a' } },
|
||||
{ type: 'tool_use', id: 'tu-short', name: 'short_tool', input: { msg: 'b' } },
|
||||
],
|
||||
model: 'mock-model',
|
||||
stop_reason: 'tool_use',
|
||||
usage: { input_tokens: 15, output_tokens: 25 },
|
||||
}
|
||||
const responses = [
|
||||
parallelResponse,
|
||||
toolUseResponse('long_tool', { msg: 't2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['long_tool', 'short_tool'],
|
||||
maxTurns: 5,
|
||||
compressToolResults: true,
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Turn 3: the parallel results from turn 1 should be selectively compressed.
|
||||
// The long_tool result (600 chars) → compressed. The short_tool result (50 chars) → kept.
|
||||
const turn3Messages = calls[2]!
|
||||
const turn1ToolResults = turn3Messages.flatMap(m =>
|
||||
m.content.filter((b): b is { type: 'tool_result'; tool_use_id: string; content: string } =>
|
||||
b.type === 'tool_result'),
|
||||
)
|
||||
// Find the results from turn 1 (first user message with tool_results)
|
||||
const firstToolResultMsg = turn3Messages.find(
|
||||
m => m.role === 'user' && m.content.some(b => b.type === 'tool_result'),
|
||||
)!
|
||||
const blocks = firstToolResultMsg.content.filter(
|
||||
(b): b is { type: 'tool_result'; tool_use_id: string; content: string } =>
|
||||
b.type === 'tool_result',
|
||||
)
|
||||
|
||||
// One should be compressed (long), one should be intact (short)
|
||||
const compressedBlocks = blocks.filter(b => b.content.includes('compressed'))
|
||||
const intactBlocks = blocks.filter(b => !b.content.includes('compressed'))
|
||||
expect(compressedBlocks).toHaveLength(1)
|
||||
expect(compressedBlocks[0]!.content).toContain('600 chars')
|
||||
expect(intactBlocks).toHaveLength(1)
|
||||
expect(intactBlocks[0]!.content).toBe('S'.repeat(50))
|
||||
})
|
||||
|
||||
it('compounds compression across 4+ turns', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const longOutput = 'x'.repeat(600)
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 't1' }),
|
||||
toolUseResponse('echo', { message: 't2' }),
|
||||
toolUseResponse('echo', { message: 't3' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor(longOutput)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 6,
|
||||
compressToolResults: true,
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Turn 4: turns 1 and 2 should both be compressed, turn 3 should be intact
|
||||
const turn4Messages = calls[3]!
|
||||
const allToolResults = extractToolResultContents(turn4Messages)
|
||||
expect(allToolResults).toHaveLength(3)
|
||||
|
||||
// First two are compressed (turns 1 & 2)
|
||||
expect(allToolResults[0]).toContain('compressed')
|
||||
expect(allToolResults[1]).toContain('compressed')
|
||||
|
||||
// Last one (turn 3, most recent) preserved
|
||||
expect(allToolResults[2]).toBe(longOutput)
|
||||
})
|
||||
|
||||
it('does not re-compress already compressed markers with low minChars', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const longOutput = 'x'.repeat(600)
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 't1' }),
|
||||
toolUseResponse('echo', { message: 't2' }),
|
||||
toolUseResponse('echo', { message: 't3' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor(longOutput)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 6,
|
||||
compressToolResults: { minChars: 10 }, // very low threshold
|
||||
})
|
||||
|
||||
await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
|
||||
|
||||
// Turn 4: turn 1 was compressed in turn 3. With minChars=10 the marker
|
||||
// itself (55 chars) exceeds the threshold. Without the guard it would be
|
||||
// re-compressed with a wrong char count (55 instead of 600).
|
||||
const turn4Messages = calls[3]!
|
||||
const allToolResults = extractToolResultContents(turn4Messages)
|
||||
|
||||
// Turn 1 result: should still show original 600 chars, not re-compressed
|
||||
expect(allToolResults[0]).toContain('600 chars')
|
||||
// Turn 2 result: compressed for the first time this turn
|
||||
expect(allToolResults[1]).toContain('600 chars')
|
||||
// Turn 3 result: most recent, preserved in full
|
||||
expect(allToolResults[2]).toBe(longOutput)
|
||||
})
|
||||
|
||||
it('works together with contextStrategy', async () => {
|
||||
const calls: LLMMessage[][] = []
|
||||
const longOutput = 'x'.repeat(600)
|
||||
const responses = [
|
||||
toolUseResponse('echo', { message: 't1' }),
|
||||
toolUseResponse('echo', { message: 't2' }),
|
||||
textResponse('done'),
|
||||
]
|
||||
let idx = 0
|
||||
const adapter: LLMAdapter = {
|
||||
name: 'mock',
|
||||
async chat(messages) {
|
||||
calls.push(messages.map(m => ({ role: m.role, content: [...m.content] })))
|
||||
return responses[idx++]!
|
||||
},
|
||||
async *stream() { /* unused */ },
|
||||
}
|
||||
const { registry, executor } = buildRegistryAndExecutor(longOutput)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
allowedTools: ['echo'],
|
||||
maxTurns: 5,
|
||||
compressToolResults: true,
|
||||
contextStrategy: { type: 'sliding-window', maxTurns: 10 },
|
||||
})
|
||||
|
||||
const result = await runner.run([
|
||||
{ role: 'user', content: [{ type: 'text', text: 'start' }] },
|
||||
])
|
||||
|
||||
// Should complete without error; both features coexist
|
||||
expect(result.output).toBe('done')
|
||||
|
||||
// Turn 3 should have compressed turn 1 results
|
||||
const turn3Messages = calls[2]!
|
||||
const allToolResults = extractToolResultContents(turn3Messages)
|
||||
expect(allToolResults[0]).toContain('compressed')
|
||||
})
|
||||
})
|
||||
|
|
@ -186,7 +186,7 @@ describe('AgentRunner trace events', () => {
|
|||
})
|
||||
|
||||
const runOptions: RunOptions = {
|
||||
onTrace: (e) => traces.push(e),
|
||||
onTrace: (e) => { traces.push(e) },
|
||||
runId: 'run-1',
|
||||
traceAgent: 'test-agent',
|
||||
}
|
||||
|
|
@ -234,7 +234,7 @@ describe('AgentRunner trace events', () => {
|
|||
|
||||
await runner.run(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'test' }] }],
|
||||
{ onTrace: (e) => traces.push(e), runId: 'run-2', traceAgent: 'tooler' },
|
||||
{ onTrace: (e) => { traces.push(e) }, runId: 'run-2', traceAgent: 'tooler' },
|
||||
)
|
||||
|
||||
const toolTraces = traces.filter(t => t.type === 'tool_call')
|
||||
|
|
@ -273,7 +273,7 @@ describe('AgentRunner trace events', () => {
|
|||
|
||||
await runner.run(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'test' }] }],
|
||||
{ onTrace: (e) => traces.push(e), runId: 'run-3', traceAgent: 'err-agent' },
|
||||
{ onTrace: (e) => { traces.push(e) }, runId: 'run-3', traceAgent: 'err-agent' },
|
||||
)
|
||||
|
||||
const toolTraces = traces.filter(t => t.type === 'tool_call')
|
||||
|
|
@ -316,7 +316,7 @@ describe('Agent trace events', () => {
|
|||
const agent = buildMockAgent(config, [textResponse('Hello world')])
|
||||
|
||||
const runOptions: Partial<RunOptions> = {
|
||||
onTrace: (e) => traces.push(e),
|
||||
onTrace: (e) => { traces.push(e) },
|
||||
runId: 'run-agent-1',
|
||||
traceAgent: 'my-agent',
|
||||
}
|
||||
|
|
@ -367,7 +367,7 @@ describe('Agent trace events', () => {
|
|||
|
||||
const runId = 'shared-run-id'
|
||||
await agent.run('test', {
|
||||
onTrace: (e) => traces.push(e),
|
||||
onTrace: (e) => { traces.push(e) },
|
||||
runId,
|
||||
traceAgent: 'multi-trace-agent',
|
||||
})
|
||||
|
|
@ -436,7 +436,7 @@ describe('Agent trace events', () => {
|
|||
|
||||
await runner.run(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'go' }] }],
|
||||
{ onTrace: (e) => traces.push(e), runId: 'run-tok', traceAgent: 'token-agent' },
|
||||
{ onTrace: (e) => { traces.push(e) }, runId: 'run-tok', traceAgent: 'token-agent' },
|
||||
)
|
||||
|
||||
const llmTraces = traces.filter(t => t.type === 'llm_call')
|
||||
|
|
@ -451,3 +451,4 @@ describe('Agent trace events', () => {
|
|||
expect(llmTraces[1]!.turn).toBe(2)
|
||||
})
|
||||
})
|
||||
|
||||
|
|
|
|||
|
|
@ -5,5 +5,11 @@ export default defineConfig({
|
|||
coverage: {
|
||||
include: ['src/**'],
|
||||
},
|
||||
exclude: [
|
||||
'**/node_modules/**',
|
||||
'**/dist/**',
|
||||
// E2E tests require API keys — run with: npm run test:e2e
|
||||
...(process.env['RUN_E2E'] ? [] : ['tests/e2e/**']),
|
||||
],
|
||||
},
|
||||
})
|
||||
|
|
|
|||
Loading…
Reference in New Issue