From a772312a689bc572f2ff59dd939aec40eaa58c2e Mon Sep 17 00:00:00 2001 From: JackChen Date: Thu, 2 Apr 2026 23:43:54 +0800 Subject: [PATCH] chore: add tests, CI, contributing guide, and PR template - 5 test files, 61 test cases covering TaskQueue, SharedMemory, ToolExecutor, ToolRegistry, and Semaphore - GitHub Actions CI running lint + test on Node 18/20/22 - CONTRIBUTING.md with setup, commands, and PR workflow - Pull request template with checklist --- .github/pull_request_template.md | 14 ++ .github/workflows/ci.yml | 23 +++ CONTRIBUTING.md | 72 +++++++++ tests/semaphore.test.ts | 57 ++++++++ tests/shared-memory.test.ts | 122 ++++++++++++++++ tests/task-queue.test.ts | 244 +++++++++++++++++++++++++++++++ tests/task-utils.test.ts | 155 ++++++++++++++++++++ tests/tool-executor.test.ts | 193 ++++++++++++++++++++++++ 8 files changed, 880 insertions(+) create mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/ci.yml create mode 100644 CONTRIBUTING.md create mode 100644 tests/semaphore.test.ts create mode 100644 tests/shared-memory.test.ts create mode 100644 tests/task-queue.test.ts create mode 100644 tests/task-utils.test.ts create mode 100644 tests/tool-executor.test.ts diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..739d91d --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,14 @@ +## What + + + +## Why + + + +## Checklist + +- [ ] `npm run lint` passes +- [ ] `npm test` passes +- [ ] Added/updated tests for changed behavior +- [ ] No new runtime dependencies (or justified in the PR description) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..6f5b577 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,23 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + node-version: [18, 20, 22] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node-version }} + cache: npm + - run: npm ci + - run: npm run lint + - run: npm test diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..e17dd36 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,72 @@ +# Contributing + +Thanks for your interest in contributing to Open Multi-Agent! This guide covers the basics to get you started. + +## Setup + +```bash +git clone https://github.com/JackChen-me/open-multi-agent.git +cd open-multi-agent +npm install +``` + +Requires Node.js >= 18. + +## Development Commands + +```bash +npm run build # Compile TypeScript (src/ → dist/) +npm run dev # Watch mode compilation +npm run lint # Type-check (tsc --noEmit) +npm test # Run all tests (vitest) +npm run test:watch # Vitest watch mode +``` + +## Running Tests + +All tests live in `tests/`. They test core modules (TaskQueue, SharedMemory, ToolExecutor, Semaphore) without requiring API keys or network access. + +```bash +npm test +``` + +Every PR must pass `npm run lint && npm test`. CI runs both automatically on Node 18, 20, and 22. + +## Making a Pull Request + +1. Fork the repo and create a branch from `main` +2. Make your changes +3. Add or update tests if you changed behavior +4. Run `npm run lint && npm test` locally +5. Open a PR against `main` + +### PR Checklist + +- [ ] `npm run lint` passes +- [ ] `npm test` passes +- [ ] New behavior has test coverage +- [ ] Linked to a relevant issue (if one exists) + +## Code Style + +- TypeScript strict mode, ES modules (`.js` extensions in imports) +- No additional linter/formatter configured — follow existing patterns +- Keep dependencies minimal (currently 3 runtime deps: `@anthropic-ai/sdk`, `openai`, `zod`) + +## Architecture Overview + +See the [README](./README.md#architecture) for an architecture diagram. Key entry points: + +- **Orchestrator**: `src/orchestrator/orchestrator.ts` — top-level API +- **Task system**: `src/task/queue.ts`, `src/task/task.ts` — dependency DAG +- **Agent**: `src/agent/runner.ts` — conversation loop +- **Tools**: `src/tool/framework.ts`, `src/tool/executor.ts` — tool registry and execution +- **LLM adapters**: `src/llm/` — Anthropic, OpenAI, Copilot + +## Where to Contribute + +Check the [issues](https://github.com/JackChen-me/open-multi-agent/issues) page. Issues labeled `good first issue` are scoped and approachable. Issues labeled `help wanted` are larger but well-defined. + +## License + +By contributing, you agree that your contributions will be licensed under the MIT License. diff --git a/tests/semaphore.test.ts b/tests/semaphore.test.ts new file mode 100644 index 0000000..ddc1b34 --- /dev/null +++ b/tests/semaphore.test.ts @@ -0,0 +1,57 @@ +import { describe, it, expect } from 'vitest' +import { Semaphore } from '../src/utils/semaphore.js' + +describe('Semaphore', () => { + it('throws on max < 1', () => { + expect(() => new Semaphore(0)).toThrow() + }) + + it('allows up to max concurrent holders', async () => { + const sem = new Semaphore(2) + let running = 0 + let peak = 0 + + const work = async () => { + await sem.acquire() + running++ + peak = Math.max(peak, running) + await new Promise((r) => setTimeout(r, 30)) + running-- + sem.release() + } + + await Promise.all([work(), work(), work(), work()]) + expect(peak).toBeLessThanOrEqual(2) + }) + + it('run() auto-releases on success', async () => { + const sem = new Semaphore(1) + const result = await sem.run(async () => 42) + expect(result).toBe(42) + expect(sem.active).toBe(0) + }) + + it('run() auto-releases on error', async () => { + const sem = new Semaphore(1) + await expect(sem.run(async () => { throw new Error('oops') })).rejects.toThrow('oops') + expect(sem.active).toBe(0) + }) + + it('tracks active and pending counts', async () => { + const sem = new Semaphore(1) + await sem.acquire() + expect(sem.active).toBe(1) + + // This will queue + const p = sem.acquire() + expect(sem.pending).toBe(1) + + sem.release() + await p + expect(sem.active).toBe(1) + expect(sem.pending).toBe(0) + + sem.release() + expect(sem.active).toBe(0) + }) +}) diff --git a/tests/shared-memory.test.ts b/tests/shared-memory.test.ts new file mode 100644 index 0000000..1467c95 --- /dev/null +++ b/tests/shared-memory.test.ts @@ -0,0 +1,122 @@ +import { describe, it, expect } from 'vitest' +import { SharedMemory } from '../src/memory/shared.js' + +describe('SharedMemory', () => { + // ------------------------------------------------------------------------- + // Write & read + // ------------------------------------------------------------------------- + + it('writes and reads a value under a namespaced key', async () => { + const mem = new SharedMemory() + await mem.write('researcher', 'findings', 'TS 5.5 ships const type params') + + const entry = await mem.read('researcher/findings') + expect(entry).not.toBeNull() + expect(entry!.value).toBe('TS 5.5 ships const type params') + }) + + it('returns null for a non-existent key', async () => { + const mem = new SharedMemory() + expect(await mem.read('nope/nothing')).toBeNull() + }) + + // ------------------------------------------------------------------------- + // Namespace isolation + // ------------------------------------------------------------------------- + + it('isolates writes between agents', async () => { + const mem = new SharedMemory() + await mem.write('alice', 'plan', 'plan A') + await mem.write('bob', 'plan', 'plan B') + + const alice = await mem.read('alice/plan') + const bob = await mem.read('bob/plan') + expect(alice!.value).toBe('plan A') + expect(bob!.value).toBe('plan B') + }) + + it('listByAgent returns only that agent\'s entries', async () => { + const mem = new SharedMemory() + await mem.write('alice', 'a1', 'v1') + await mem.write('alice', 'a2', 'v2') + await mem.write('bob', 'b1', 'v3') + + const aliceEntries = await mem.listByAgent('alice') + expect(aliceEntries).toHaveLength(2) + expect(aliceEntries.every((e) => e.key.startsWith('alice/'))).toBe(true) + }) + + // ------------------------------------------------------------------------- + // Overwrite + // ------------------------------------------------------------------------- + + it('overwrites a value and preserves createdAt', async () => { + const mem = new SharedMemory() + await mem.write('agent', 'key', 'first') + const first = await mem.read('agent/key') + + await mem.write('agent', 'key', 'second') + const second = await mem.read('agent/key') + + expect(second!.value).toBe('second') + expect(second!.createdAt.getTime()).toBe(first!.createdAt.getTime()) + }) + + // ------------------------------------------------------------------------- + // Metadata + // ------------------------------------------------------------------------- + + it('stores metadata alongside the value', async () => { + const mem = new SharedMemory() + await mem.write('agent', 'key', 'val', { priority: 'high' }) + + const entry = await mem.read('agent/key') + expect(entry!.metadata).toMatchObject({ priority: 'high', agent: 'agent' }) + }) + + // ------------------------------------------------------------------------- + // Summary + // ------------------------------------------------------------------------- + + it('returns empty string for an empty store', async () => { + const mem = new SharedMemory() + expect(await mem.getSummary()).toBe('') + }) + + it('produces a markdown summary grouped by agent', async () => { + const mem = new SharedMemory() + await mem.write('researcher', 'findings', 'result A') + await mem.write('coder', 'plan', 'implement X') + + const summary = await mem.getSummary() + expect(summary).toContain('## Shared Team Memory') + expect(summary).toContain('### researcher') + expect(summary).toContain('### coder') + expect(summary).toContain('findings: result A') + expect(summary).toContain('plan: implement X') + }) + + it('truncates long values in the summary', async () => { + const mem = new SharedMemory() + const longValue = 'x'.repeat(300) + await mem.write('agent', 'big', longValue) + + const summary = await mem.getSummary() + // Summary truncates at 200 chars → 197 + '…' + expect(summary.length).toBeLessThan(longValue.length) + expect(summary).toContain('…') + }) + + // ------------------------------------------------------------------------- + // listAll + // ------------------------------------------------------------------------- + + it('listAll returns entries from all agents', async () => { + const mem = new SharedMemory() + await mem.write('a', 'k1', 'v1') + await mem.write('b', 'k2', 'v2') + + const all = await mem.listAll() + expect(all).toHaveLength(2) + }) +}) diff --git a/tests/task-queue.test.ts b/tests/task-queue.test.ts new file mode 100644 index 0000000..87a2500 --- /dev/null +++ b/tests/task-queue.test.ts @@ -0,0 +1,244 @@ +import { describe, it, expect, vi } from 'vitest' +import { TaskQueue } from '../src/task/queue.js' +import { createTask } from '../src/task/task.js' + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** Create a simple task with a predictable id. */ +function task(id: string, opts: { dependsOn?: string[]; assignee?: string } = {}) { + const t = createTask({ title: id, description: `task ${id}`, assignee: opts.assignee }) + // Override the random UUID so tests can reference tasks by name. + return { ...t, id, dependsOn: opts.dependsOn } as ReturnType +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('TaskQueue', () => { + // ------------------------------------------------------------------------- + // Basic add & query + // ------------------------------------------------------------------------- + + it('adds a task and lists it', () => { + const q = new TaskQueue() + q.add(task('a')) + expect(q.list()).toHaveLength(1) + expect(q.list()[0].id).toBe('a') + }) + + it('fires task:ready for a task with no dependencies', () => { + const q = new TaskQueue() + const handler = vi.fn() + q.on('task:ready', handler) + + q.add(task('a')) + expect(handler).toHaveBeenCalledTimes(1) + expect(handler.mock.calls[0][0].id).toBe('a') + }) + + it('blocks a task whose dependency is not yet completed', () => { + const q = new TaskQueue() + q.add(task('a')) + q.add(task('b', { dependsOn: ['a'] })) + + const b = q.list().find((t) => t.id === 'b')! + expect(b.status).toBe('blocked') + }) + + // ------------------------------------------------------------------------- + // Dependency resolution + // ------------------------------------------------------------------------- + + it('unblocks a dependent task when its dependency completes', () => { + const q = new TaskQueue() + const readyHandler = vi.fn() + q.on('task:ready', readyHandler) + + q.add(task('a')) + q.add(task('b', { dependsOn: ['a'] })) + + // 'a' fires task:ready, 'b' is blocked + expect(readyHandler).toHaveBeenCalledTimes(1) + + q.complete('a', 'done') + + // 'b' should now be unblocked → fires task:ready + expect(readyHandler).toHaveBeenCalledTimes(2) + expect(readyHandler.mock.calls[1][0].id).toBe('b') + expect(q.list().find((t) => t.id === 'b')!.status).toBe('pending') + }) + + it('keeps a task blocked until ALL dependencies complete', () => { + const q = new TaskQueue() + q.add(task('a')) + q.add(task('b')) + q.add(task('c', { dependsOn: ['a', 'b'] })) + + q.complete('a') + + const cAfterA = q.list().find((t) => t.id === 'c')! + expect(cAfterA.status).toBe('blocked') + + q.complete('b') + + const cAfterB = q.list().find((t) => t.id === 'c')! + expect(cAfterB.status).toBe('pending') + }) + + // ------------------------------------------------------------------------- + // Cascade failure + // ------------------------------------------------------------------------- + + it('cascades failure to direct dependents', () => { + const q = new TaskQueue() + const failHandler = vi.fn() + q.on('task:failed', failHandler) + + q.add(task('a')) + q.add(task('b', { dependsOn: ['a'] })) + + q.fail('a', 'boom') + + expect(failHandler).toHaveBeenCalledTimes(2) // a + b + expect(q.list().find((t) => t.id === 'b')!.status).toBe('failed') + expect(q.list().find((t) => t.id === 'b')!.result).toContain('dependency') + }) + + it('cascades failure transitively (a → b → c)', () => { + const q = new TaskQueue() + q.add(task('a')) + q.add(task('b', { dependsOn: ['a'] })) + q.add(task('c', { dependsOn: ['b'] })) + + q.fail('a', 'boom') + + expect(q.list().every((t) => t.status === 'failed')).toBe(true) + }) + + it('does not cascade failure to independent tasks', () => { + const q = new TaskQueue() + q.add(task('a')) + q.add(task('b')) + q.add(task('c', { dependsOn: ['a'] })) + + q.fail('a', 'boom') + + expect(q.list().find((t) => t.id === 'b')!.status).toBe('pending') + expect(q.list().find((t) => t.id === 'c')!.status).toBe('failed') + }) + + // ------------------------------------------------------------------------- + // Completion + // ------------------------------------------------------------------------- + + it('fires all:complete when every task reaches a terminal state', () => { + const q = new TaskQueue() + const allComplete = vi.fn() + q.on('all:complete', allComplete) + + q.add(task('a')) + q.add(task('b')) + + q.complete('a') + expect(allComplete).not.toHaveBeenCalled() + + q.complete('b') + expect(allComplete).toHaveBeenCalledTimes(1) + }) + + it('fires all:complete when mix of completed and failed', () => { + const q = new TaskQueue() + const allComplete = vi.fn() + q.on('all:complete', allComplete) + + q.add(task('a')) + q.add(task('b', { dependsOn: ['a'] })) + + q.fail('a', 'err') // cascades to b + expect(allComplete).toHaveBeenCalledTimes(1) + }) + + it('isComplete returns true for an empty queue', () => { + const q = new TaskQueue() + expect(q.isComplete()).toBe(true) + }) + + // ------------------------------------------------------------------------- + // Query: next / nextAvailable + // ------------------------------------------------------------------------- + + it('next() returns a pending task for the given assignee', () => { + const q = new TaskQueue() + q.add(task('a', { assignee: 'alice' })) + q.add(task('b', { assignee: 'bob' })) + + expect(q.next('bob')?.id).toBe('b') + }) + + it('next() returns undefined when no pending task matches', () => { + const q = new TaskQueue() + q.add(task('a', { assignee: 'alice' })) + expect(q.next('bob')).toBeUndefined() + }) + + it('nextAvailable() prefers unassigned tasks', () => { + const q = new TaskQueue() + q.add(task('assigned', { assignee: 'alice' })) + q.add(task('unassigned')) + + expect(q.nextAvailable()?.id).toBe('unassigned') + }) + + // ------------------------------------------------------------------------- + // Progress + // ------------------------------------------------------------------------- + + it('getProgress() returns correct counts', () => { + const q = new TaskQueue() + q.add(task('a')) + q.add(task('b')) + q.add(task('c', { dependsOn: ['a'] })) + + q.complete('a') + + const p = q.getProgress() + expect(p.total).toBe(3) + expect(p.completed).toBe(1) + expect(p.pending).toBe(2) // b + c (unblocked) + expect(p.blocked).toBe(0) + }) + + // ------------------------------------------------------------------------- + // Event unsubscribe + // ------------------------------------------------------------------------- + + it('unsubscribe stops receiving events', () => { + const q = new TaskQueue() + const handler = vi.fn() + const off = q.on('task:ready', handler) + + q.add(task('a')) + expect(handler).toHaveBeenCalledTimes(1) + + off() + q.add(task('b')) + expect(handler).toHaveBeenCalledTimes(1) // no new call + }) + + // ------------------------------------------------------------------------- + // Error cases + // ------------------------------------------------------------------------- + + it('throws when completing a non-existent task', () => { + const q = new TaskQueue() + expect(() => q.complete('ghost')).toThrow('not found') + }) + + it('throws when failing a non-existent task', () => { + const q = new TaskQueue() + expect(() => q.fail('ghost', 'err')).toThrow('not found') + }) +}) diff --git a/tests/task-utils.test.ts b/tests/task-utils.test.ts new file mode 100644 index 0000000..7c3a8f5 --- /dev/null +++ b/tests/task-utils.test.ts @@ -0,0 +1,155 @@ +import { describe, it, expect } from 'vitest' +import { + createTask, + isTaskReady, + getTaskDependencyOrder, + validateTaskDependencies, +} from '../src/task/task.js' +import type { Task } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function task(id: string, opts: { dependsOn?: string[]; status?: Task['status'] } = {}): Task { + const t = createTask({ title: id, description: `task ${id}` }) + return { ...t, id, dependsOn: opts.dependsOn, status: opts.status ?? 'pending' } +} + +// --------------------------------------------------------------------------- +// createTask +// --------------------------------------------------------------------------- + +describe('createTask', () => { + it('creates a task with pending status and timestamps', () => { + const t = createTask({ title: 'Test', description: 'A test task' }) + expect(t.id).toBeDefined() + expect(t.status).toBe('pending') + expect(t.createdAt).toBeInstanceOf(Date) + expect(t.updatedAt).toBeInstanceOf(Date) + }) + + it('copies dependsOn array (no shared reference)', () => { + const deps = ['a'] + const t = createTask({ title: 'T', description: 'D', dependsOn: deps }) + deps.push('b') + expect(t.dependsOn).toEqual(['a']) + }) +}) + +// --------------------------------------------------------------------------- +// isTaskReady +// --------------------------------------------------------------------------- + +describe('isTaskReady', () => { + it('returns true for a pending task with no dependencies', () => { + const t = task('a') + expect(isTaskReady(t, [t])).toBe(true) + }) + + it('returns false for a non-pending task', () => { + const t = task('a', { status: 'blocked' }) + expect(isTaskReady(t, [t])).toBe(false) + }) + + it('returns true when all dependencies are completed', () => { + const dep = task('dep', { status: 'completed' }) + const t = task('a', { dependsOn: ['dep'] }) + expect(isTaskReady(t, [dep, t])).toBe(true) + }) + + it('returns false when a dependency is not yet completed', () => { + const dep = task('dep', { status: 'in_progress' }) + const t = task('a', { dependsOn: ['dep'] }) + expect(isTaskReady(t, [dep, t])).toBe(false) + }) + + it('returns false when a dependency is missing from the task set', () => { + const t = task('a', { dependsOn: ['ghost'] }) + expect(isTaskReady(t, [t])).toBe(false) + }) +}) + +// --------------------------------------------------------------------------- +// getTaskDependencyOrder +// --------------------------------------------------------------------------- + +describe('getTaskDependencyOrder', () => { + it('returns empty array for empty input', () => { + expect(getTaskDependencyOrder([])).toEqual([]) + }) + + it('returns tasks with no deps first', () => { + const a = task('a') + const b = task('b', { dependsOn: ['a'] }) + const ordered = getTaskDependencyOrder([b, a]) + expect(ordered[0].id).toBe('a') + expect(ordered[1].id).toBe('b') + }) + + it('handles a diamond dependency (a → b,c → d)', () => { + const a = task('a') + const b = task('b', { dependsOn: ['a'] }) + const c = task('c', { dependsOn: ['a'] }) + const d = task('d', { dependsOn: ['b', 'c'] }) + + const ordered = getTaskDependencyOrder([d, c, b, a]) + const ids = ordered.map((t) => t.id) + + // a must come before b and c; b and c must come before d + expect(ids.indexOf('a')).toBeLessThan(ids.indexOf('b')) + expect(ids.indexOf('a')).toBeLessThan(ids.indexOf('c')) + expect(ids.indexOf('b')).toBeLessThan(ids.indexOf('d')) + expect(ids.indexOf('c')).toBeLessThan(ids.indexOf('d')) + }) + + it('returns partial result when a cycle exists', () => { + const a = task('a', { dependsOn: ['b'] }) + const b = task('b', { dependsOn: ['a'] }) + const ordered = getTaskDependencyOrder([a, b]) + // Neither can be ordered — result should be empty (or partial) + expect(ordered.length).toBeLessThan(2) + }) +}) + +// --------------------------------------------------------------------------- +// validateTaskDependencies +// --------------------------------------------------------------------------- + +describe('validateTaskDependencies', () => { + it('returns valid for tasks with no deps', () => { + const result = validateTaskDependencies([task('a'), task('b')]) + expect(result.valid).toBe(true) + expect(result.errors).toHaveLength(0) + }) + + it('detects self-dependency', () => { + const t = task('a', { dependsOn: ['a'] }) + const result = validateTaskDependencies([t]) + expect(result.valid).toBe(false) + expect(result.errors[0]).toContain('depends on itself') + }) + + it('detects unknown dependency', () => { + const t = task('a', { dependsOn: ['ghost'] }) + const result = validateTaskDependencies([t]) + expect(result.valid).toBe(false) + expect(result.errors[0]).toContain('unknown dependency') + }) + + it('detects a cycle (a → b → a)', () => { + const a = task('a', { dependsOn: ['b'] }) + const b = task('b', { dependsOn: ['a'] }) + const result = validateTaskDependencies([a, b]) + expect(result.valid).toBe(false) + expect(result.errors.some((e) => e.toLowerCase().includes('cyclic'))).toBe(true) + }) + + it('detects a longer cycle (a → b → c → a)', () => { + const a = task('a', { dependsOn: ['c'] }) + const b = task('b', { dependsOn: ['a'] }) + const c = task('c', { dependsOn: ['b'] }) + const result = validateTaskDependencies([a, b, c]) + expect(result.valid).toBe(false) + }) +}) diff --git a/tests/tool-executor.test.ts b/tests/tool-executor.test.ts new file mode 100644 index 0000000..afa7cb6 --- /dev/null +++ b/tests/tool-executor.test.ts @@ -0,0 +1,193 @@ +import { describe, it, expect, vi } from 'vitest' +import { z } from 'zod' +import { ToolRegistry, defineTool } from '../src/tool/framework.js' +import { ToolExecutor } from '../src/tool/executor.js' +import type { ToolUseContext } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +const dummyContext: ToolUseContext = { + agent: { name: 'test-agent', role: 'tester', model: 'test-model' }, +} + +function echoTool() { + return defineTool({ + name: 'echo', + description: 'Echoes the message.', + inputSchema: z.object({ message: z.string() }), + execute: async ({ message }) => ({ data: message, isError: false }), + }) +} + +function failTool() { + return defineTool({ + name: 'fail', + description: 'Always throws.', + inputSchema: z.object({}), + execute: async () => { + throw new Error('intentional failure') + }, + }) +} + +function makeExecutor(...tools: ReturnType[]) { + const registry = new ToolRegistry() + for (const t of tools) registry.register(t) + return { executor: new ToolExecutor(registry), registry } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('ToolExecutor', () => { + // ------------------------------------------------------------------------- + // Single execution + // ------------------------------------------------------------------------- + + it('executes a tool and returns its result', async () => { + const { executor } = makeExecutor(echoTool()) + const result = await executor.execute('echo', { message: 'hello' }, dummyContext) + expect(result.data).toBe('hello') + expect(result.isError).toBeFalsy() + }) + + it('returns an error result for an unknown tool', async () => { + const { executor } = makeExecutor() + const result = await executor.execute('ghost', {}, dummyContext) + expect(result.isError).toBe(true) + expect(result.data).toContain('not registered') + }) + + it('returns an error result when Zod validation fails', async () => { + const { executor } = makeExecutor(echoTool()) + // 'message' is required but missing + const result = await executor.execute('echo', {}, dummyContext) + expect(result.isError).toBe(true) + expect(result.data).toContain('Invalid input') + }) + + it('catches tool execution errors and returns them as error results', async () => { + const { executor } = makeExecutor(failTool()) + const result = await executor.execute('fail', {}, dummyContext) + expect(result.isError).toBe(true) + expect(result.data).toContain('intentional failure') + }) + + it('returns an error result when aborted before execution', async () => { + const { executor } = makeExecutor(echoTool()) + const controller = new AbortController() + controller.abort() + + const result = await executor.execute( + 'echo', + { message: 'hi' }, + { ...dummyContext, abortSignal: controller.signal }, + ) + expect(result.isError).toBe(true) + expect(result.data).toContain('aborted') + }) + + // ------------------------------------------------------------------------- + // Batch execution + // ------------------------------------------------------------------------- + + it('executeBatch runs multiple tools and returns a map of results', async () => { + const { executor } = makeExecutor(echoTool()) + const results = await executor.executeBatch( + [ + { id: 'c1', name: 'echo', input: { message: 'a' } }, + { id: 'c2', name: 'echo', input: { message: 'b' } }, + ], + dummyContext, + ) + + expect(results.size).toBe(2) + expect(results.get('c1')!.data).toBe('a') + expect(results.get('c2')!.data).toBe('b') + }) + + it('executeBatch isolates errors — one failure does not affect others', async () => { + const { executor } = makeExecutor(echoTool(), failTool()) + const results = await executor.executeBatch( + [ + { id: 'ok', name: 'echo', input: { message: 'fine' } }, + { id: 'bad', name: 'fail', input: {} }, + ], + dummyContext, + ) + + expect(results.get('ok')!.isError).toBeFalsy() + expect(results.get('bad')!.isError).toBe(true) + }) + + // ------------------------------------------------------------------------- + // Concurrency control + // ------------------------------------------------------------------------- + + it('respects maxConcurrency limit', async () => { + let peak = 0 + let running = 0 + + const trackTool = defineTool({ + name: 'track', + description: 'Tracks concurrency.', + inputSchema: z.object({}), + execute: async () => { + running++ + peak = Math.max(peak, running) + await new Promise((r) => setTimeout(r, 50)) + running-- + return { data: 'ok', isError: false } + }, + }) + + const registry = new ToolRegistry() + registry.register(trackTool) + const executor = new ToolExecutor(registry, { maxConcurrency: 2 }) + + await executor.executeBatch( + Array.from({ length: 5 }, (_, i) => ({ id: `t${i}`, name: 'track', input: {} })), + dummyContext, + ) + + expect(peak).toBeLessThanOrEqual(2) + }) +}) + +// --------------------------------------------------------------------------- +// ToolRegistry +// --------------------------------------------------------------------------- + +describe('ToolRegistry', () => { + it('registers and retrieves a tool', () => { + const registry = new ToolRegistry() + registry.register(echoTool()) + expect(registry.get('echo')).toBeDefined() + expect(registry.has('echo')).toBe(true) + }) + + it('throws on duplicate registration', () => { + const registry = new ToolRegistry() + registry.register(echoTool()) + expect(() => registry.register(echoTool())).toThrow('already registered') + }) + + it('unregister removes the tool', () => { + const registry = new ToolRegistry() + registry.register(echoTool()) + registry.unregister('echo') + expect(registry.has('echo')).toBe(false) + }) + + it('toToolDefs produces JSON schema representations', () => { + const registry = new ToolRegistry() + registry.register(echoTool()) + const defs = registry.toToolDefs() + expect(defs).toHaveLength(1) + expect(defs[0].name).toBe('echo') + expect(defs[0].inputSchema).toHaveProperty('properties') + }) +})