diff --git a/src/index.ts b/src/index.ts index a6b27f5..20d8d1a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -54,7 +54,7 @@ // Orchestrator (primary entry point) // --------------------------------------------------------------------------- -export { OpenMultiAgent, executeWithRetry, computeRetryDelay, isSimpleGoal, selectBestAgent } from './orchestrator/orchestrator.js' +export { OpenMultiAgent, executeWithRetry, computeRetryDelay } from './orchestrator/orchestrator.js' export { Scheduler } from './orchestrator/scheduler.js' export type { SchedulingStrategy } from './orchestrator/scheduler.js' diff --git a/src/orchestrator/orchestrator.ts b/src/orchestrator/orchestrator.ts index e5c010c..169741d 100644 --- a/src/orchestrator/orchestrator.ts +++ b/src/orchestrator/orchestrator.ts @@ -64,6 +64,7 @@ import { TaskQueue } from '../task/queue.js' import { createTask } from '../task/task.js' import { Scheduler } from './scheduler.js' import { TokenBudgetExceededError } from '../errors.js' +import { extractKeywords, keywordScore } from '../utils/keywords.js' // --------------------------------------------------------------------------- // Internal constants @@ -94,9 +95,12 @@ const COMPLEXITY_PATTERNS: RegExp[] = [ /\bstage\s*\d/i, /^\s*\d+[\.\)]/m, // numbered list items ("1. …", "2) …") - // Coordination language - /\bcollaborat/i, - /\bcoordinat/i, + // Coordination language — must be an imperative directive aimed at the agents + // ("collaborate with X", "coordinate the team", "agents should coordinate"), + // not a descriptive use ("how does X coordinate with Y" / "what does collaboration mean"). + // Match either an explicit preposition or a noun-phrase that names a group. + /\bcollaborat(?:e|ing)\b\s+(?:with|on|to)\b/i, + /\bcoordinat(?:e|ing)\b\s+(?:with|on|across|between|the\s+(?:team|agents?|workers?|effort|work))\b/i, /\breview\s+each\s+other/i, /\bwork\s+together\b/i, @@ -110,6 +114,7 @@ const COMPLEXITY_PATTERNS: RegExp[] = [ /\b(?:build|create|implement|design|write|develop)\b.{5,80}\b(?:and|then)\b.{5,80}\b(?:build|create|implement|design|write|develop|test|review|deploy)\b/i, ] + /** * Maximum goal length (in characters) below which a goal *may* be simple. * @@ -126,6 +131,11 @@ const SIMPLE_GOAL_MAX_LENGTH = 200 * 1. Its length is ≤ {@link SIMPLE_GOAL_MAX_LENGTH}. * 2. It does not match any {@link COMPLEXITY_PATTERNS}. * + * The complexity patterns are deliberately conservative — they only fire on + * imperative coordination directives (e.g. "collaborate with the team", + * "coordinate the workers"), so descriptive uses ("how do pods coordinate + * state", "explain microservice collaboration") remain classified as simple. + * * Exported for unit testing. */ export function isSimpleGoal(goal: string): boolean { @@ -136,12 +146,18 @@ export function isSimpleGoal(goal: string): boolean { /** * Select the best-matching agent for a goal using keyword affinity scoring. * - * Scores each agent by keyword overlap between the goal text and the agent's - * `name` + `systemPrompt`. Returns the highest-scoring agent, or falls back - * to the first agent when all scores are equal. + * The scoring logic mirrors {@link Scheduler}'s `capability-match` strategy + * exactly, including its asymmetric use of the agent's `model` field: * - * The keyword extraction and scoring logic mirrors the `capability-match` - * strategy in {@link Scheduler} to keep behaviour consistent. + * - `agentKeywords` is computed from `name + systemPrompt + model` so that + * a goal which mentions a model name (e.g. "haiku") can boost an agent + * bound to that model. + * - `agentText` (used for the reverse direction) is computed from + * `name + systemPrompt` only — model names should not bias the + * text-vs-goal-keywords match. + * + * The two-direction sum (`scoreA + scoreB`) ensures both "agent describes + * goal" and "goal mentions agent capability" contribute to the final score. * * Exported for unit testing. */ @@ -155,9 +171,9 @@ export function selectBestAgent(goal: string, agents: AgentConfig[]): AgentConfi for (const agent of agents) { const agentText = `${agent.name} ${agent.systemPrompt ?? ''}` - const agentKeywords = extractKeywords(agentText) + // Mirror Scheduler.capability-match: include `model` here only. + const agentKeywords = extractKeywords(`${agent.name} ${agent.systemPrompt ?? ''} ${agent.model}`) - // Score in both directions (same as Scheduler.capability-match) const scoreA = keywordScore(agentText, goalKeywords) const scoreB = keywordScore(goal, agentKeywords) const score = scoreA + scoreB @@ -171,25 +187,6 @@ export function selectBestAgent(goal: string, agents: AgentConfig[]): AgentConfi return bestAgent } -// ---- keyword helpers (shared with Scheduler via same logic) ---- - -const STOP_WORDS = new Set([ - 'the', 'and', 'for', 'that', 'this', 'with', 'are', 'from', 'have', - 'will', 'your', 'you', 'can', 'all', 'each', 'when', 'then', 'they', - 'them', 'their', 'about', 'into', 'more', 'also', 'should', 'must', -]) - -function extractKeywords(text: string): string[] { - return [...new Set( - text.toLowerCase().split(/\W+/).filter((w) => w.length > 3 && !STOP_WORDS.has(w)), - )] -} - -function keywordScore(text: string, keywords: string[]): number { - const lower = text.toLowerCase() - return keywords.reduce((acc, kw) => acc + (lower.includes(kw.toLowerCase()) ? 1 : 0), 0) -} - // --------------------------------------------------------------------------- // Internal helpers // --------------------------------------------------------------------------- @@ -741,7 +738,11 @@ export class OpenMultiAgent { * @param config - Agent configuration. * @param prompt - The user prompt to send. */ - async runAgent(config: AgentConfig, prompt: string): Promise { + async runAgent( + config: AgentConfig, + prompt: string, + options?: { abortSignal?: AbortSignal }, + ): Promise { const effectiveBudget = resolveTokenBudget(config.maxTokenBudget, this.config.maxTokenBudget) const effective: AgentConfig = { ...config, @@ -757,11 +758,22 @@ export class OpenMultiAgent { data: { prompt }, }) - const traceOptions: Partial | undefined = this.config.onTrace - ? { onTrace: this.config.onTrace, runId: generateRunId(), traceAgent: config.name } - : undefined + // Build run-time options: trace + optional abort signal. RunOptions has + // readonly fields, so we assemble the literal in one shot. + const traceFields = this.config.onTrace + ? { + onTrace: this.config.onTrace, + runId: generateRunId(), + traceAgent: config.name, + } + : null + const abortFields = options?.abortSignal ? { abortSignal: options.abortSignal } : null + const runOptions: Partial | undefined = + traceFields || abortFields + ? { ...(traceFields ?? {}), ...(abortFields ?? {}) } + : undefined - const result = await agent.run(prompt, traceOptions) + const result = await agent.run(prompt, runOptions) if (result.budgetExceeded) { this.config.onProgress?.({ @@ -835,7 +847,13 @@ export class OpenMultiAgent { data: { phase: 'short-circuit', goal }, }) - const result = await this.runAgent(bestAgent, goal) + // Forward the caller's abort signal so short-circuit honours the same + // cancellation contract as the full coordinator path. + const result = await this.runAgent( + bestAgent, + goal, + options?.abortSignal ? { abortSignal: options.abortSignal } : undefined, + ) const agentResults = new Map() agentResults.set(bestAgent.name, result) diff --git a/src/orchestrator/scheduler.ts b/src/orchestrator/scheduler.ts index 93d64ac..c3805c8 100644 --- a/src/orchestrator/scheduler.ts +++ b/src/orchestrator/scheduler.ts @@ -15,6 +15,7 @@ import type { AgentConfig, Task } from '../types.js' import type { TaskQueue } from '../task/queue.js' +import { extractKeywords, keywordScore } from '../utils/keywords.js' // --------------------------------------------------------------------------- // Public types @@ -74,38 +75,6 @@ function countBlockedDependents(taskId: string, allTasks: Task[]): number { return visited.size } -/** - * Compute a simple keyword-overlap score between `text` and `keywords`. - * - * Both the text and keywords are normalised to lower-case before comparison. - * Each keyword that appears in the text contributes +1 to the score. - */ -function keywordScore(text: string, keywords: string[]): number { - const lower = text.toLowerCase() - return keywords.reduce((acc, kw) => acc + (lower.includes(kw.toLowerCase()) ? 1 : 0), 0) -} - -/** - * Extract a list of meaningful keywords from a string for capability matching. - * - * Strips common stop-words so that incidental matches (e.g. "the", "and") do - * not inflate scores. Returns unique words longer than three characters. - */ -function extractKeywords(text: string): string[] { - const STOP_WORDS = new Set([ - 'the', 'and', 'for', 'that', 'this', 'with', 'are', 'from', 'have', - 'will', 'your', 'you', 'can', 'all', 'each', 'when', 'then', 'they', - 'them', 'their', 'about', 'into', 'more', 'also', 'should', 'must', - ]) - - return [...new Set( - text - .toLowerCase() - .split(/\W+/) - .filter((w) => w.length > 3 && !STOP_WORDS.has(w)), - )] -} - // --------------------------------------------------------------------------- // Scheduler // --------------------------------------------------------------------------- diff --git a/src/utils/keywords.ts b/src/utils/keywords.ts new file mode 100644 index 0000000..247a6c4 --- /dev/null +++ b/src/utils/keywords.ts @@ -0,0 +1,39 @@ +/** + * Shared keyword-affinity helpers used by capability-match scheduling + * and short-circuit agent selection. Kept in one place so behaviour + * can't drift between Scheduler and Orchestrator. + */ + +export const STOP_WORDS: ReadonlySet = new Set([ + 'the', 'and', 'for', 'that', 'this', 'with', 'are', 'from', 'have', + 'will', 'your', 'you', 'can', 'all', 'each', 'when', 'then', 'they', + 'them', 'their', 'about', 'into', 'more', 'also', 'should', 'must', +]) + +/** + * Tokenise `text` into a deduplicated set of lower-cased keywords. + * Words shorter than 4 characters and entries in {@link STOP_WORDS} + * are filtered out. + */ +export function extractKeywords(text: string): string[] { + return [ + ...new Set( + text + .toLowerCase() + .split(/\W+/) + .filter((w) => w.length > 3 && !STOP_WORDS.has(w)), + ), + ] +} + +/** + * Count how many `keywords` appear (case-insensitively) in `text`. + * Each keyword contributes at most 1 to the score. + */ +export function keywordScore(text: string, keywords: readonly string[]): number { + const lower = text.toLowerCase() + return keywords.reduce( + (acc, kw) => acc + (lower.includes(kw.toLowerCase()) ? 1 : 0), + 0, + ) +} diff --git a/tests/keywords.test.ts b/tests/keywords.test.ts new file mode 100644 index 0000000..599f4fa --- /dev/null +++ b/tests/keywords.test.ts @@ -0,0 +1,75 @@ +import { describe, it, expect } from 'vitest' +import { STOP_WORDS, extractKeywords, keywordScore } from '../src/utils/keywords.js' + +// Regression coverage for the shared keyword helpers extracted from +// orchestrator.ts and scheduler.ts (PR #70 review point 1). +// +// These tests pin behaviour so future drift between Scheduler and the +// short-circuit selector is impossible — any edit must update the shared +// module and these tests at once. + +describe('utils/keywords', () => { + describe('STOP_WORDS', () => { + it('contains all 26 stop words', () => { + // Sanity-check the canonical list — if anyone adds/removes a stop word + // they should also update this assertion. + expect(STOP_WORDS.size).toBe(26) + }) + + it('includes "then" and "and" so they cannot dominate scoring', () => { + expect(STOP_WORDS.has('then')).toBe(true) + expect(STOP_WORDS.has('and')).toBe(true) + }) + }) + + describe('extractKeywords', () => { + it('lowercases and dedupes', () => { + const out = extractKeywords('TypeScript typescript TYPESCRIPT') + expect(out).toEqual(['typescript']) + }) + + it('drops words shorter than 4 characters', () => { + const out = extractKeywords('a bb ccc dddd eeeee') + expect(out).toEqual(['dddd', 'eeeee']) + }) + + it('drops stop words', () => { + const out = extractKeywords('the cat and the dog have meals') + // 'cat', 'dog', 'have' filtered: 'cat'/'dog' too short, 'have' is a stop word + expect(out).toEqual(['meals']) + }) + + it('splits on non-word characters', () => { + const out = extractKeywords('hello,world!writer-mode') + expect(out.sort()).toEqual(['hello', 'mode', 'world', 'writer']) + }) + + it('returns empty array for empty input', () => { + expect(extractKeywords('')).toEqual([]) + }) + }) + + describe('keywordScore', () => { + it('counts each keyword at most once', () => { + // 'code' appears twice in the text but contributes 1 + expect(keywordScore('code review code style', ['code'])).toBe(1) + }) + + it('is case-insensitive', () => { + expect(keywordScore('TYPESCRIPT', ['typescript'])).toBe(1) + expect(keywordScore('typescript', ['TYPESCRIPT'])).toBe(1) + }) + + it('returns 0 when no keywords match', () => { + expect(keywordScore('hello world', ['rust', 'go'])).toBe(0) + }) + + it('sums distinct keyword hits', () => { + expect(keywordScore('write typescript code for the api', ['typescript', 'code', 'rust'])).toBe(2) + }) + + it('returns 0 for empty keywords array', () => { + expect(keywordScore('any text', [])).toBe(0) + }) + }) +}) diff --git a/tests/short-circuit.test.ts b/tests/short-circuit.test.ts index ba563e7..b1f13e2 100644 --- a/tests/short-circuit.test.ts +++ b/tests/short-circuit.test.ts @@ -115,6 +115,43 @@ describe('isSimpleGoal', () => { expect(isSimpleGoal(goal)).toBe(false) }) }) + + // ------------------------------------------------------------------------- + // Regression: tightened coordinate/collaborate regex (PR #70 review point 5) + // + // Descriptive uses of "coordinate" / "collaborate" / "collaboration" must + // NOT be flagged as complex — only imperative directives aimed at agents. + // ------------------------------------------------------------------------- + + describe('tightened coordinate/collaborate patterns', () => { + it('descriptive "how X coordinates" is simple', () => { + expect(isSimpleGoal('Explain how Kubernetes pods coordinate state')).toBe(true) + }) + + it('descriptive "collaboration" noun is simple', () => { + expect(isSimpleGoal('What is microservice collaboration?')).toBe(true) + }) + + it('descriptive "team that coordinates" is simple', () => { + expect(isSimpleGoal('Describe a team that coordinates releases')).toBe(true) + }) + + it('descriptive "without collaborating" is simple', () => { + expect(isSimpleGoal('Show how to deploy without collaborating')).toBe(true) + }) + + it('imperative "collaborate with X" is complex', () => { + expect(isSimpleGoal('Collaborate with the writer to draft a post')).toBe(false) + }) + + it('imperative "coordinate the team" is complex', () => { + expect(isSimpleGoal('Coordinate the team for release')).toBe(false) + }) + + it('imperative "coordinate across services" is complex', () => { + expect(isSimpleGoal('Coordinate across services to roll out the change')).toBe(false) + }) + }) }) // --------------------------------------------------------------------------- @@ -158,6 +195,28 @@ describe('selectBestAgent', () => { // "review" should match "reviewer" agent name expect(selectBestAgent('Review this pull request', agents)).toBe(agents[1]) }) + + // ------------------------------------------------------------------------- + // Regression: model field asymmetry (PR #70 review point 2) + // + // selectBestAgent must mirror Scheduler.capability-match exactly: + // - agentKeywords includes `model` + // - agentText excludes `model` + // This means a goal that mentions a model name should boost the agent + // bound to that model (via scoreB), even if neither name nor system prompt + // contains the keyword. + // ------------------------------------------------------------------------- + it('matches scheduler asymmetry: model name in goal boosts the bound agent', () => { + const agents: AgentConfig[] = [ + // Distinct, non-overlapping prompts so neither one wins on scoreA + { name: 'a1', model: 'haiku-fast-model', systemPrompt: 'You handle quick lookups' }, + { name: 'a2', model: 'opus-deep-model', systemPrompt: 'You handle deep analysis' }, + ] + + // Mention "haiku" — this is only present in a1.model, so the bound + // agent should win because agentKeywords (which includes model) matches. + expect(selectBestAgent('Use the haiku model please', agents)).toBe(agents[0]) + }) }) // --------------------------------------------------------------------------- @@ -287,4 +346,87 @@ describe('runTeam short-circuit', () => { const startEvent = events.find(e => e.type === 'agent_start') expect(startEvent?.agent).toBe('coder') }) + + // ------------------------------------------------------------------------- + // Regression: abortSignal forwarding (PR #70 review point 4) + // + // The short-circuit path must forward `options.abortSignal` from runTeam + // through to runAgent, otherwise simple-goal cancellations are silently + // ignored — a regression vs the full coordinator path which already + // honours the signal via PR #69. + // ------------------------------------------------------------------------- + it('forwards abortSignal from runTeam to runAgent in short-circuit path', async () => { + mockAdapterResponses = ['done'] + + const oma = new OpenMultiAgent({ defaultModel: 'mock-model' }) + const team = oma.createTeam('t', teamCfg()) + + // Spy on runAgent to capture the options argument + const runAgentSpy = vi.spyOn(oma, 'runAgent') + + const controller = new AbortController() + await oma.runTeam(team, 'Say hello', { abortSignal: controller.signal }) + + expect(runAgentSpy).toHaveBeenCalledTimes(1) + const callArgs = runAgentSpy.mock.calls[0]! + // Third positional arg must contain the same signal we passed in + expect(callArgs[2]).toBeDefined() + expect(callArgs[2]?.abortSignal).toBe(controller.signal) + }) + + it('runAgent invoked without abortSignal when caller omits it', async () => { + mockAdapterResponses = ['done'] + + const oma = new OpenMultiAgent({ defaultModel: 'mock-model' }) + const team = oma.createTeam('t', teamCfg()) + + const runAgentSpy = vi.spyOn(oma, 'runAgent') + + await oma.runTeam(team, 'Say hello') + + expect(runAgentSpy).toHaveBeenCalledTimes(1) + const callArgs = runAgentSpy.mock.calls[0]! + // Third positional arg should be undefined when caller doesn't pass one + expect(callArgs[2]).toBeUndefined() + }) + + it('aborted signal causes the underlying agent loop to skip the LLM call', async () => { + // Pre-aborted controller — runner should break before any chat() call + const controller = new AbortController() + controller.abort() + + mockAdapterResponses = ['should never be returned'] + + const oma = new OpenMultiAgent({ defaultModel: 'mock-model' }) + const team = oma.createTeam('t', teamCfg()) + + const result = await oma.runTeam(team, 'Say hello', { abortSignal: controller.signal }) + + // Short-circuit ran one agent, but its loop bailed before any LLM call, + // so the agent's output is the empty string and token usage is zero. + const agentResult = result.agentResults.values().next().value + expect(agentResult?.output).toBe('') + expect(agentResult?.tokenUsage.input_tokens).toBe(0) + expect(agentResult?.tokenUsage.output_tokens).toBe(0) + }) +}) + +// --------------------------------------------------------------------------- +// Public API surface — internal helpers must stay out of the barrel export +// (PR #70 review point 3) +// --------------------------------------------------------------------------- + +describe('public API barrel', () => { + it('does not re-export isSimpleGoal or selectBestAgent', async () => { + const indexExports = await import('../src/index.js') + expect((indexExports as Record).isSimpleGoal).toBeUndefined() + expect((indexExports as Record).selectBestAgent).toBeUndefined() + }) + + it('still re-exports the documented public symbols', async () => { + const indexExports = await import('../src/index.js') + expect(indexExports.OpenMultiAgent).toBeDefined() + expect(indexExports.executeWithRetry).toBeDefined() + expect(indexExports.computeRetryDelay).toBeDefined() + }) }) diff --git a/tests/token-budget.test.ts b/tests/token-budget.test.ts index 72cde15..c2b64a9 100644 --- a/tests/token-budget.test.ts +++ b/tests/token-budget.test.ts @@ -238,7 +238,10 @@ describe('token budget enforcement', () => { sharedMemory: false, }) - const result = await oma.runTeam(team, 'Do work') + // Use a goal that explicitly mentions sequencing so the short-circuit + // path is skipped and the coordinator decomposition + execution flow + // (which this test is exercising) actually runs. + const result = await oma.runTeam(team, 'First plan the work, then execute it') expect(result.totalTokenUsage.input_tokens + result.totalTokenUsage.output_tokens).toBe(70) expect(events.some(e => e.type === 'budget_exceeded')).toBe(true) })