From 6cec006359f96dedb96f3aedaf41f934274258dc Mon Sep 17 00:00:00 2001 From: Optimisttt Date: Thu, 23 Apr 2026 22:43:56 +0800 Subject: [PATCH] examples: research aggregation schema + provider env + test (#159) --- examples/patterns/research-aggregation.ts | 212 ++++++++++++++++++---- tests/research-aggregation.test.ts | 199 ++++++++++++++++++++ 2 files changed, 378 insertions(+), 33 deletions(-) create mode 100644 tests/research-aggregation.test.ts diff --git a/examples/patterns/research-aggregation.ts b/examples/patterns/research-aggregation.ts index 8efefc3..c944dce 100644 --- a/examples/patterns/research-aggregation.ts +++ b/examples/patterns/research-aggregation.ts @@ -14,20 +14,104 @@ * [technical-analyst, market-analyst, community-analyst] (parallel) → synthesizer * * Run: - * npx tsx examples/patterns/research-aggregation.ts + * npx tsx examples/patterns/research-aggregation.ts "" * - * Prerequisites: - * ANTHROPIC_API_KEY env var must be set. + * Provider selection (env): + * - LLM_PROVIDER=anthropic (default) → requires ANTHROPIC_API_KEY + * - LLM_PROVIDER=gemini → requires GEMINI_API_KEY (+ optional peer dep @google/genai) + * - LLM_PROVIDER=groq → requires GROQ_API_KEY + * - LLM_PROVIDER=openrouter → requires OPENROUTER_API_KEY + * + * Optional: + * - LLM_MODEL=... overrides the default model for the selected provider. */ +import { z } from 'zod' import { OpenMultiAgent } from '../../src/index.js' import type { AgentConfig, OrchestratorEvent } from '../../src/types.js' // --------------------------------------------------------------------------- -// Topic +// Topic + provider selection // --------------------------------------------------------------------------- -const TOPIC = 'WebAssembly adoption in 2026' +const TOPIC = process.argv[2] ?? 'WebAssembly adoption in 2026' + +type ProviderChoice = 'anthropic' | 'gemini' | 'groq' | 'openrouter' + +function resolveProvider(): { + label: ProviderChoice + model: string + provider: NonNullable + baseURL?: string + apiKey?: string +} { + const raw = (process.env.LLM_PROVIDER ?? 'anthropic').toLowerCase() as ProviderChoice + const modelOverride = process.env.LLM_MODEL + + switch (raw) { + case 'gemini': + return { label: 'gemini', provider: 'gemini', model: modelOverride ?? 'gemini-2.5-flash' } + case 'groq': + return { + label: 'groq', + provider: 'openai', + baseURL: 'https://api.groq.com/openai/v1', + apiKey: process.env.GROQ_API_KEY, + model: modelOverride ?? 'llama-3.3-70b-versatile', + } + case 'openrouter': + return { + label: 'openrouter', + provider: 'openai', + baseURL: 'https://openrouter.ai/api/v1', + apiKey: process.env.OPENROUTER_API_KEY, + model: modelOverride ?? 'openai/gpt-4o-mini', + } + case 'anthropic': + default: + return { label: 'anthropic', provider: 'anthropic', model: modelOverride ?? 'claude-sonnet-4-6' } + } +} + +const PROVIDER = resolveProvider() +if (PROVIDER.label === 'groq' && !PROVIDER.apiKey) { + throw new Error('LLM_PROVIDER=groq requires GROQ_API_KEY') +} +if (PROVIDER.label === 'openrouter' && !PROVIDER.apiKey) { + throw new Error('LLM_PROVIDER=openrouter requires OPENROUTER_API_KEY') +} + +// --------------------------------------------------------------------------- +// Output schema (synthesizer) +// --------------------------------------------------------------------------- + +const FindingSchema = z.object({ + title: z.string().describe('One-sentence finding'), + detail: z.string().describe('2-4 sentence explanation'), + analysts: z.array(z.enum(['technical-analyst', 'market-analyst', 'community-analyst'])) + .min(1) + .describe('Analyst agent names that support this finding'), + confidence: z.number().min(0).max(1).describe('0..1 confidence score'), +}) + +const ContradictionSchema = z.object({ + claim_a: z.string().describe('Claim from analyst A (quote or tight paraphrase)'), + claim_b: z.string().describe('Contradicting claim from analyst B (quote or tight paraphrase)'), + analysts: z.tuple([ + z.enum(['technical-analyst', 'market-analyst', 'community-analyst']), + z.enum(['technical-analyst', 'market-analyst', 'community-analyst']), + ]) + .describe('Exactly two analyst agent names (must be different)'), +}).refine((x) => x.analysts[0] !== x.analysts[1], { + message: 'contradictions.analysts must reference two different analysts', + path: ['analysts'], +}) + +const ResearchAggregationSchema = z.object({ + summary: z.string().describe('High-level executive summary'), + findings: z.array(FindingSchema).describe('Key findings extracted from the analyst reports'), + contradictions: z.array(ContradictionSchema).describe('Explicit contradictions (may be empty)'), +}) // --------------------------------------------------------------------------- // Agents — three analysts + one synthesizer @@ -35,45 +119,71 @@ const TOPIC = 'WebAssembly adoption in 2026' const technicalAnalyst: AgentConfig = { name: 'technical-analyst', - model: 'claude-sonnet-4-6', - systemPrompt: `You are a technical analyst. Given a topic, research its technical -capabilities, limitations, performance characteristics, and architectural patterns. -Write your findings as structured markdown. Keep it to 200-300 words.`, - maxTurns: 2, + model: PROVIDER.model, + systemPrompt: `You are a technical analyst. + +Task: Given a topic, produce a compact report that is easy to cross-reference. +Output markdown with EXACT sections: + +## Claims (max 6 bullets) +Each bullet is one falsifiable technical claim. + +## Evidence (max 4 bullets) +Concrete examples, benchmarks, or implementation details. + +Constraints: <= 160 words total. No filler.`, + maxTurns: 1, } const marketAnalyst: AgentConfig = { name: 'market-analyst', - model: 'claude-sonnet-4-6', - systemPrompt: `You are a market analyst. Given a topic, research industry adoption -rates, key companies using the technology, market size estimates, and competitive -landscape. Write your findings as structured markdown. Keep it to 200-300 words.`, - maxTurns: 2, + model: PROVIDER.model, + systemPrompt: `You are a market analyst. + +Output markdown with EXACT sections: + +## Claims (max 6 bullets) +Adoption, players, market dynamics. + +## Evidence (max 4 bullets) +Metrics, segments, named companies, or directional estimates. + +Constraints: <= 160 words total. No filler.`, + maxTurns: 1, } const communityAnalyst: AgentConfig = { name: 'community-analyst', - model: 'claude-sonnet-4-6', - systemPrompt: `You are a developer community analyst. Given a topic, research -developer sentiment, ecosystem maturity, learning resources, community size, -and conference/meetup activity. Write your findings as structured markdown. -Keep it to 200-300 words.`, - maxTurns: 2, + model: PROVIDER.model, + systemPrompt: `You are a developer community analyst. + +Output markdown with EXACT sections: + +## Claims (max 6 bullets) +Sentiment, ecosystem maturity, learning curve, community signals. + +## Evidence (max 4 bullets) +Tooling, docs, conferences, repos, surveys. + +Constraints: <= 160 words total. No filler.`, + maxTurns: 1, } const synthesizer: AgentConfig = { name: 'synthesizer', - model: 'claude-sonnet-4-6', - systemPrompt: `You are a research director who synthesizes multiple analyst reports -into a single cohesive document. You will receive all prior analyst outputs -automatically. Then: + model: PROVIDER.model, + outputSchema: ResearchAggregationSchema, + systemPrompt: `You are a research director. You will receive three analyst reports. -1. Cross-reference claims across reports - flag agreements and contradictions -2. Identify the 3 most important insights -3. Produce a structured report with: Executive Summary, Key Findings, - Areas of Agreement, Open Questions, and Recommendation +Your job: produce ONLY a JSON object matching the required schema. -Keep the final report to 300-400 words.`, +Rules: +1. Extract 3-6 findings. Each finding MUST list the analyst names that support it. +2. Extract contradictions as explicit pairs of claims. Each contradiction MUST: + - include claim_a and claim_b copied VERBATIM from the analysts' "## Claims" bullets + - include analysts as a 2-item array with the two analyst names +3. contradictions MUST be an array (may be empty). +4. No markdown, no code fences, no extra text. JSON only.`, maxTurns: 2, } @@ -91,7 +201,10 @@ function handleProgress(event: OrchestratorEvent): void { } const orchestrator = new OpenMultiAgent({ - defaultModel: 'claude-sonnet-4-6', + defaultModel: PROVIDER.model, + defaultProvider: PROVIDER.provider, + ...(PROVIDER.baseURL ? { defaultBaseURL: PROVIDER.baseURL } : {}), + ...(PROVIDER.apiKey ? { defaultApiKey: PROVIDER.apiKey } : {}), onProgress: handleProgress, }) @@ -136,12 +249,39 @@ const tasks = [ console.log('Multi-Source Research Aggregation') console.log('='.repeat(60)) console.log(`Topic: ${TOPIC}`) +console.log(`Provider: ${PROVIDER.label} (model=${PROVIDER.model})`) console.log('Pipeline: 3 analysts (parallel) → synthesizer') console.log('='.repeat(60)) console.log() const result = await orchestrator.runTasks(team, tasks) +// --------------------------------------------------------------------------- +// Parallelism assertion (analysts should benefit from concurrency) +// --------------------------------------------------------------------------- + +const analystTitles = new Set(['Technical analysis', 'Market analysis', 'Community analysis']) +const analystTasks = (result.tasks ?? []).filter((t) => analystTitles.has(t.title)) + +if ( + analystTasks.length === 3 + && analystTasks.every((t) => t.metrics?.startMs !== undefined && t.metrics?.endMs !== undefined) +) { + const durations = analystTasks.map((t) => Math.max(0, (t.metrics!.endMs - t.metrics!.startMs))) + const serialSum = durations.reduce((a, b) => a + b, 0) + const minStart = Math.min(...analystTasks.map((t) => t.metrics!.startMs)) + const maxEnd = Math.max(...analystTasks.map((t) => t.metrics!.endMs)) + const parallelWall = Math.max(0, maxEnd - minStart) + + // Require parallel wall time < 70% of the serial sum. + if (serialSum > 0 && parallelWall >= 0.7 * serialSum) { + throw new Error( + `Parallelism assertion failed: parallelWall=${parallelWall}ms, serialSum=${serialSum}ms (need < 0.7x). ` + + `Tighten analyst prompts or increase concurrency.`, + ) + } +} + // --------------------------------------------------------------------------- // Output // --------------------------------------------------------------------------- @@ -160,10 +300,16 @@ for (const [name, r] of result.agentResults) { const synthResult = result.agentResults.get('synthesizer') if (synthResult?.success) { console.log('\n' + '='.repeat(60)) - console.log('SYNTHESIZED REPORT') + console.log('SYNTHESIZED OUTPUT (JSON)') console.log('='.repeat(60)) console.log() - console.log(synthResult.output) + + if (synthResult.structured) { + console.log(JSON.stringify(synthResult.structured, null, 2)) + } else { + // Should not happen when outputSchema succeeds, but keep a fallback. + console.log(synthResult.output) + } } console.log('\nDone.') diff --git a/tests/research-aggregation.test.ts b/tests/research-aggregation.test.ts new file mode 100644 index 0000000..5c2f96e --- /dev/null +++ b/tests/research-aggregation.test.ts @@ -0,0 +1,199 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest' +import { z } from 'zod' +import { OpenMultiAgent } from '../src/orchestrator/orchestrator.js' +import type { AgentConfig, LLMChatOptions, LLMMessage, LLMResponse, TeamConfig } from '../src/types.js' + +// --------------------------------------------------------------------------- +// Mock createAdapter so tests do not require network access or API keys. +// --------------------------------------------------------------------------- + +const CLAIM_ACCELERATING = 'Wasm adoption is accelerating rapidly in 2026.' +const CLAIM_STAGNATING = 'Wasm adoption is stagnating in 2026.' + +let capturedPrompts: string[] = [] + +function lastUserText(msgs: LLMMessage[]): string { + const lastUser = [...msgs].reverse().find((m) => m.role === 'user') + return (lastUser?.content ?? []) + .filter((b): b is { type: 'text'; text: string } => b.type === 'text') + .map((b) => b.text) + .join('\n') +} + +vi.mock('../src/llm/adapter.js', () => ({ + createAdapter: async () => { + return { + name: 'mock', + async chat(msgs: LLMMessage[], options: LLMChatOptions): Promise { + const prompt = lastUserText(msgs) + capturedPrompts.push(prompt) + + const isTechnical = prompt.includes('# Task: Technical analysis') + const isMarket = prompt.includes('# Task: Market analysis') + const isCommunity = prompt.includes('# Task: Community analysis') + const isSynth = prompt.includes('# Task: Synthesize report') + + let text = 'default mock response' + + if (isTechnical) { + text = [ + '## Claims (max 6 bullets)', + `- ${CLAIM_ACCELERATING}`, + '- Runtime sandboxing reduces risk compared to native plugins.', + '', + '## Evidence (max 4 bullets)', + '- Multiple runtimes optimized for near-native speed exist.', + ].join('\n') + } else if (isMarket) { + text = [ + '## Claims (max 6 bullets)', + `- ${CLAIM_STAGNATING}`, + '- Enterprises are cautious due to tooling fragmentation.', + '', + '## Evidence (max 4 bullets)', + '- Hiring signals lag behind hype cycles.', + ].join('\n') + } else if (isCommunity) { + text = [ + '## Claims (max 6 bullets)', + '- Developer interest is steady but polarized by use-case.', + '', + '## Evidence (max 4 bullets)', + '- Tutorials focus on edge runtimes and plugin systems.', + ].join('\n') + } else if (isSynth) { + // Minimal "extraction": if we see both contradictory claims in the prompt context, + // surface them in the contradictions array. + const hasA = prompt.includes(CLAIM_ACCELERATING) + const hasB = prompt.includes(CLAIM_STAGNATING) + const contradictions = (hasA && hasB) + ? [{ + claim_a: CLAIM_ACCELERATING, + claim_b: CLAIM_STAGNATING, + analysts: ['technical-analyst', 'market-analyst'], + }] + : [] + + const payload = { + summary: 'Mock synthesis summary.', + findings: [ + { + title: 'Adoption signals are mixed.', + detail: 'Technical capability is improving, but market pull is uncertain. This is consistent with contradictory near-term signals.', + analysts: ['technical-analyst', 'market-analyst', 'community-analyst'], + confidence: 0.6, + }, + ], + contradictions, + } + text = JSON.stringify(payload) + } + + return { + id: 'mock-1', + content: [{ type: 'text', text }], + model: options.model ?? 'mock-model', + stop_reason: 'end_turn', + usage: { input_tokens: 10, output_tokens: 20 }, + } satisfies LLMResponse + }, + async *stream() { + /* unused */ + }, + } + }, +})) + +// --------------------------------------------------------------------------- +// Schema under test (matches the issue acceptance requirements) +// --------------------------------------------------------------------------- + +const FindingSchema = z.object({ + title: z.string(), + detail: z.string(), + analysts: z.array(z.enum(['technical-analyst', 'market-analyst', 'community-analyst'])).min(1), + confidence: z.number().min(0).max(1), +}) + +const ContradictionSchema = z.object({ + claim_a: z.string(), + claim_b: z.string(), + analysts: z.tuple([ + z.enum(['technical-analyst', 'market-analyst', 'community-analyst']), + z.enum(['technical-analyst', 'market-analyst', 'community-analyst']), + ]), +}).refine((x) => x.analysts[0] !== x.analysts[1], { path: ['analysts'], message: 'must be different' }) + +const ResearchAggregationSchema = z.object({ + summary: z.string(), + findings: z.array(FindingSchema), + contradictions: z.array(ContradictionSchema), +}) + +// --------------------------------------------------------------------------- +// Test +// --------------------------------------------------------------------------- + +function teamCfg(agents: AgentConfig[]): TeamConfig { + return { name: 'research-team', agents, sharedMemory: true } +} + +describe('research aggregation (mocked) surfaces contradictions in structured output', () => { + beforeEach(() => { + capturedPrompts = [] + }) + + it('returns synthesizer.structured with contradictions array containing known claims', async () => { + const oma = new OpenMultiAgent({ + defaultProvider: 'openai', + defaultModel: 'mock-model', + maxConcurrency: 3, + }) + + const agents: AgentConfig[] = [ + { name: 'technical-analyst', model: 'mock-model', systemPrompt: 'technical', maxTurns: 1 }, + { name: 'market-analyst', model: 'mock-model', systemPrompt: 'market', maxTurns: 1 }, + { name: 'community-analyst', model: 'mock-model', systemPrompt: 'community', maxTurns: 1 }, + { name: 'synthesizer', model: 'mock-model', systemPrompt: 'synth', outputSchema: ResearchAggregationSchema, maxTurns: 2 }, + ] + + const team = oma.createTeam('research-team', teamCfg(agents)) + + const tasks = [ + { title: 'Technical analysis', description: 'Analyze tech', assignee: 'technical-analyst' }, + { title: 'Market analysis', description: 'Analyze market', assignee: 'market-analyst' }, + { title: 'Community analysis', description: 'Analyze community', assignee: 'community-analyst' }, + { + title: 'Synthesize report', + description: 'Synthesize', + assignee: 'synthesizer', + dependsOn: ['Technical analysis', 'Market analysis', 'Community analysis'], + }, + ] as const + + const result = await oma.runTasks(team, tasks) + expect(result.success).toBe(true) + + const synth = result.agentResults.get('synthesizer') + expect(synth?.success).toBe(true) + expect(synth?.structured).toBeDefined() + + const structured = synth!.structured as z.infer + expect(Array.isArray(structured.contradictions)).toBe(true) + + // Assert that the known contradiction is surfaced. + expect(structured.contradictions).toEqual([ + { + claim_a: CLAIM_ACCELERATING, + claim_b: CLAIM_STAGNATING, + analysts: ['technical-analyst', 'market-analyst'], + }, + ]) + + // Sanity check: the synthesizer prompt actually contained the analyst outputs. + const synthPrompt = capturedPrompts.find((p) => p.includes('# Task: Synthesize report')) ?? '' + expect(synthPrompt).toContain(CLAIM_ACCELERATING) + expect(synthPrompt).toContain(CLAIM_STAGNATING) + }) +}) +