examples: research aggregation schema + provider env + test (#159)

2026-04-23 22:43:56 +08:00 · 2026-04-23 22:43:56 +08:00 · 6cec006359
parent b850b4d5d9
commit 6cec006359
2 changed files with 378 additions and 33 deletions
--- a/examples/patterns/research-aggregation.ts
+++ b/examples/patterns/research-aggregation.ts
@ -14,20 +14,104 @@
 *   [technical-analyst, market-analyst, community-analyst] (parallel) → synthesizer
 *
 * Run:
- *   npx tsx examples/patterns/research-aggregation.ts
+ *   npx tsx examples/patterns/research-aggregation.ts "<topic>"
 *
- * Prerequisites:
- *   ANTHROPIC_API_KEY env var must be set.
+ * Provider selection (env):
+ *   - LLM_PROVIDER=anthropic   (default)  → requires ANTHROPIC_API_KEY
+ *   - LLM_PROVIDER=gemini                 → requires GEMINI_API_KEY (+ optional peer dep @google/genai)
+ *   - LLM_PROVIDER=groq                   → requires GROQ_API_KEY
+ *   - LLM_PROVIDER=openrouter             → requires OPENROUTER_API_KEY
+ *
+ * Optional:
+ *   - LLM_MODEL=... overrides the default model for the selected provider.
 */

+import { z } from 'zod'
 import { OpenMultiAgent } from '../../src/index.js'
 import type { AgentConfig, OrchestratorEvent } from '../../src/types.js'

 // ---------------------------------------------------------------------------
-// Topic
+// Topic + provider selection
 // ---------------------------------------------------------------------------

-const TOPIC = 'WebAssembly adoption in 2026'
+const TOPIC = process.argv[2] ?? 'WebAssembly adoption in 2026'
+
+type ProviderChoice = 'anthropic' | 'gemini' | 'groq' | 'openrouter'
+
+function resolveProvider(): {
+  label: ProviderChoice
+  model: string
+  provider: NonNullable<AgentConfig['provider']>
+  baseURL?: string
+  apiKey?: string
+} {
+  const raw = (process.env.LLM_PROVIDER ?? 'anthropic').toLowerCase() as ProviderChoice
+  const modelOverride = process.env.LLM_MODEL
+
+  switch (raw) {
+    case 'gemini':
+      return { label: 'gemini', provider: 'gemini', model: modelOverride ?? 'gemini-2.5-flash' }
+    case 'groq':
+      return {
+        label: 'groq',
+        provider: 'openai',
+        baseURL: 'https://api.groq.com/openai/v1',
+        apiKey: process.env.GROQ_API_KEY,
+        model: modelOverride ?? 'llama-3.3-70b-versatile',
+      }
+    case 'openrouter':
+      return {
+        label: 'openrouter',
+        provider: 'openai',
+        baseURL: 'https://openrouter.ai/api/v1',
+        apiKey: process.env.OPENROUTER_API_KEY,
+        model: modelOverride ?? 'openai/gpt-4o-mini',
+      }
+    case 'anthropic':
+    default:
+      return { label: 'anthropic', provider: 'anthropic', model: modelOverride ?? 'claude-sonnet-4-6' }
+  }
+}
+
+const PROVIDER = resolveProvider()
+if (PROVIDER.label === 'groq' && !PROVIDER.apiKey) {
+  throw new Error('LLM_PROVIDER=groq requires GROQ_API_KEY')
+}
+if (PROVIDER.label === 'openrouter' && !PROVIDER.apiKey) {
+  throw new Error('LLM_PROVIDER=openrouter requires OPENROUTER_API_KEY')
+}
+
+// ---------------------------------------------------------------------------
+// Output schema (synthesizer)
+// ---------------------------------------------------------------------------
+
+const FindingSchema = z.object({
+  title: z.string().describe('One-sentence finding'),
+  detail: z.string().describe('2-4 sentence explanation'),
+  analysts: z.array(z.enum(['technical-analyst', 'market-analyst', 'community-analyst']))
+    .min(1)
+    .describe('Analyst agent names that support this finding'),
+  confidence: z.number().min(0).max(1).describe('0..1 confidence score'),
+})
+
+const ContradictionSchema = z.object({
+  claim_a: z.string().describe('Claim from analyst A (quote or tight paraphrase)'),
+  claim_b: z.string().describe('Contradicting claim from analyst B (quote or tight paraphrase)'),
+  analysts: z.tuple([
+    z.enum(['technical-analyst', 'market-analyst', 'community-analyst']),
+    z.enum(['technical-analyst', 'market-analyst', 'community-analyst']),
+  ])
+    .describe('Exactly two analyst agent names (must be different)'),
+}).refine((x) => x.analysts[0] !== x.analysts[1], {
+  message: 'contradictions.analysts must reference two different analysts',
+  path: ['analysts'],
+})
+
+const ResearchAggregationSchema = z.object({
+  summary: z.string().describe('High-level executive summary'),
+  findings: z.array(FindingSchema).describe('Key findings extracted from the analyst reports'),
+  contradictions: z.array(ContradictionSchema).describe('Explicit contradictions (may be empty)'),
+})

 // ---------------------------------------------------------------------------
 // Agents — three analysts + one synthesizer
@ -35,45 +119,71 @@ const TOPIC = 'WebAssembly adoption in 2026'

 const technicalAnalyst: AgentConfig = {
  name: 'technical-analyst',
-  model: 'claude-sonnet-4-6',
-  systemPrompt: `You are a technical analyst. Given a topic, research its technical
-capabilities, limitations, performance characteristics, and architectural patterns.
-Write your findings as structured markdown. Keep it to 200-300 words.`,
-  maxTurns: 2,
+  model: PROVIDER.model,
+  systemPrompt: `You are a technical analyst.
+
+Task: Given a topic, produce a compact report that is easy to cross-reference.
+Output markdown with EXACT sections:
+
+## Claims (max 6 bullets)
+Each bullet is one falsifiable technical claim.
+
+## Evidence (max 4 bullets)
+Concrete examples, benchmarks, or implementation details.
+
+Constraints: <= 160 words total. No filler.`,
+  maxTurns: 1,
 }

 const marketAnalyst: AgentConfig = {
  name: 'market-analyst',
-  model: 'claude-sonnet-4-6',
-  systemPrompt: `You are a market analyst. Given a topic, research industry adoption
-rates, key companies using the technology, market size estimates, and competitive
-landscape. Write your findings as structured markdown. Keep it to 200-300 words.`,
-  maxTurns: 2,
+  model: PROVIDER.model,
+  systemPrompt: `You are a market analyst.
+
+Output markdown with EXACT sections:
+
+## Claims (max 6 bullets)
+Adoption, players, market dynamics.
+
+## Evidence (max 4 bullets)
+Metrics, segments, named companies, or directional estimates.
+
+Constraints: <= 160 words total. No filler.`,
+  maxTurns: 1,
 }

 const communityAnalyst: AgentConfig = {
  name: 'community-analyst',
-  model: 'claude-sonnet-4-6',
-  systemPrompt: `You are a developer community analyst. Given a topic, research
-developer sentiment, ecosystem maturity, learning resources, community size,
-and conference/meetup activity. Write your findings as structured markdown.
-Keep it to 200-300 words.`,
-  maxTurns: 2,
+  model: PROVIDER.model,
+  systemPrompt: `You are a developer community analyst.
+
+Output markdown with EXACT sections:
+
+## Claims (max 6 bullets)
+Sentiment, ecosystem maturity, learning curve, community signals.
+
+## Evidence (max 4 bullets)
+Tooling, docs, conferences, repos, surveys.
+
+Constraints: <= 160 words total. No filler.`,
+  maxTurns: 1,
 }

 const synthesizer: AgentConfig = {
  name: 'synthesizer',
-  model: 'claude-sonnet-4-6',
-  systemPrompt: `You are a research director who synthesizes multiple analyst reports
-into a single cohesive document. You will receive all prior analyst outputs
-automatically. Then:
+  model: PROVIDER.model,
+  outputSchema: ResearchAggregationSchema,
+  systemPrompt: `You are a research director. You will receive three analyst reports.

-1. Cross-reference claims across reports - flag agreements and contradictions
-2. Identify the 3 most important insights
-3. Produce a structured report with: Executive Summary, Key Findings,
-   Areas of Agreement, Open Questions, and Recommendation
+Your job: produce ONLY a JSON object matching the required schema.

-Keep the final report to 300-400 words.`,
+Rules:
+1. Extract 3-6 findings. Each finding MUST list the analyst names that support it.
+2. Extract contradictions as explicit pairs of claims. Each contradiction MUST:
+   - include claim_a and claim_b copied VERBATIM from the analysts' "## Claims" bullets
+   - include analysts as a 2-item array with the two analyst names
+3. contradictions MUST be an array (may be empty).
+4. No markdown, no code fences, no extra text. JSON only.`,
  maxTurns: 2,
 }

@ -91,7 +201,10 @@ function handleProgress(event: OrchestratorEvent): void {
 }

 const orchestrator = new OpenMultiAgent({
-  defaultModel: 'claude-sonnet-4-6',
+  defaultModel: PROVIDER.model,
+  defaultProvider: PROVIDER.provider,
+  ...(PROVIDER.baseURL ? { defaultBaseURL: PROVIDER.baseURL } : {}),
+  ...(PROVIDER.apiKey ? { defaultApiKey: PROVIDER.apiKey } : {}),
  onProgress: handleProgress,
 })

@ -136,12 +249,39 @@ const tasks = [
 console.log('Multi-Source Research Aggregation')
 console.log('='.repeat(60))
 console.log(`Topic: ${TOPIC}`)
+console.log(`Provider: ${PROVIDER.label} (model=${PROVIDER.model})`)
 console.log('Pipeline: 3 analysts (parallel) → synthesizer')
 console.log('='.repeat(60))
 console.log()

 const result = await orchestrator.runTasks(team, tasks)

+// ---------------------------------------------------------------------------
+// Parallelism assertion (analysts should benefit from concurrency)
+// ---------------------------------------------------------------------------
+
+const analystTitles = new Set(['Technical analysis', 'Market analysis', 'Community analysis'])
+const analystTasks = (result.tasks ?? []).filter((t) => analystTitles.has(t.title))
+
+if (
+  analystTasks.length === 3
+  && analystTasks.every((t) => t.metrics?.startMs !== undefined && t.metrics?.endMs !== undefined)
+) {
+  const durations = analystTasks.map((t) => Math.max(0, (t.metrics!.endMs - t.metrics!.startMs)))
+  const serialSum = durations.reduce((a, b) => a + b, 0)
+  const minStart = Math.min(...analystTasks.map((t) => t.metrics!.startMs))
+  const maxEnd = Math.max(...analystTasks.map((t) => t.metrics!.endMs))
+  const parallelWall = Math.max(0, maxEnd - minStart)
+
+  // Require parallel wall time < 70% of the serial sum.
+  if (serialSum > 0 && parallelWall >= 0.7 * serialSum) {
+    throw new Error(
+      `Parallelism assertion failed: parallelWall=${parallelWall}ms, serialSum=${serialSum}ms (need < 0.7x). ` +
+      `Tighten analyst prompts or increase concurrency.`,
+    )
+  }
+}
+
 // ---------------------------------------------------------------------------
 // Output
 // ---------------------------------------------------------------------------
@ -160,10 +300,16 @@ for (const [name, r] of result.agentResults) {
 const synthResult = result.agentResults.get('synthesizer')
 if (synthResult?.success) {
  console.log('\n' + '='.repeat(60))
-  console.log('SYNTHESIZED REPORT')
+  console.log('SYNTHESIZED OUTPUT (JSON)')
  console.log('='.repeat(60))
  console.log()
-  console.log(synthResult.output)
+
+  if (synthResult.structured) {
+    console.log(JSON.stringify(synthResult.structured, null, 2))
+  } else {
+    // Should not happen when outputSchema succeeds, but keep a fallback.
+    console.log(synthResult.output)
+  }
 }

 console.log('\nDone.')
--- a/tests/research-aggregation.test.ts
+++ b/tests/research-aggregation.test.ts
@ -0,0 +1,199 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest'
+import { z } from 'zod'
+import { OpenMultiAgent } from '../src/orchestrator/orchestrator.js'
+import type { AgentConfig, LLMChatOptions, LLMMessage, LLMResponse, TeamConfig } from '../src/types.js'
+
+// ---------------------------------------------------------------------------
+// Mock createAdapter so tests do not require network access or API keys.
+// ---------------------------------------------------------------------------
+
+const CLAIM_ACCELERATING = 'Wasm adoption is accelerating rapidly in 2026.'
+const CLAIM_STAGNATING = 'Wasm adoption is stagnating in 2026.'
+
+let capturedPrompts: string[] = []
+
+function lastUserText(msgs: LLMMessage[]): string {
+  const lastUser = [...msgs].reverse().find((m) => m.role === 'user')
+  return (lastUser?.content ?? [])
+    .filter((b): b is { type: 'text'; text: string } => b.type === 'text')
+    .map((b) => b.text)
+    .join('\n')
+}
+
+vi.mock('../src/llm/adapter.js', () => ({
+  createAdapter: async () => {
+    return {
+      name: 'mock',
+      async chat(msgs: LLMMessage[], options: LLMChatOptions): Promise<LLMResponse> {
+        const prompt = lastUserText(msgs)
+        capturedPrompts.push(prompt)
+
+        const isTechnical = prompt.includes('# Task: Technical analysis')
+        const isMarket = prompt.includes('# Task: Market analysis')
+        const isCommunity = prompt.includes('# Task: Community analysis')
+        const isSynth = prompt.includes('# Task: Synthesize report')
+
+        let text = 'default mock response'
+
+        if (isTechnical) {
+          text = [
+            '## Claims (max 6 bullets)',
+            `- ${CLAIM_ACCELERATING}`,
+            '- Runtime sandboxing reduces risk compared to native plugins.',
+            '',
+            '## Evidence (max 4 bullets)',
+            '- Multiple runtimes optimized for near-native speed exist.',
+          ].join('\n')
+        } else if (isMarket) {
+          text = [
+            '## Claims (max 6 bullets)',
+            `- ${CLAIM_STAGNATING}`,
+            '- Enterprises are cautious due to tooling fragmentation.',
+            '',
+            '## Evidence (max 4 bullets)',
+            '- Hiring signals lag behind hype cycles.',
+          ].join('\n')
+        } else if (isCommunity) {
+          text = [
+            '## Claims (max 6 bullets)',
+            '- Developer interest is steady but polarized by use-case.',
+            '',
+            '## Evidence (max 4 bullets)',
+            '- Tutorials focus on edge runtimes and plugin systems.',
+          ].join('\n')
+        } else if (isSynth) {
+          // Minimal "extraction": if we see both contradictory claims in the prompt context,
+          // surface them in the contradictions array.
+          const hasA = prompt.includes(CLAIM_ACCELERATING)
+          const hasB = prompt.includes(CLAIM_STAGNATING)
+          const contradictions = (hasA && hasB)
+            ? [{
+                claim_a: CLAIM_ACCELERATING,
+                claim_b: CLAIM_STAGNATING,
+                analysts: ['technical-analyst', 'market-analyst'],
+              }]
+            : []
+
+          const payload = {
+            summary: 'Mock synthesis summary.',
+            findings: [
+              {
+                title: 'Adoption signals are mixed.',
+                detail: 'Technical capability is improving, but market pull is uncertain. This is consistent with contradictory near-term signals.',
+                analysts: ['technical-analyst', 'market-analyst', 'community-analyst'],
+                confidence: 0.6,
+              },
+            ],
+            contradictions,
+          }
+          text = JSON.stringify(payload)
+        }
+
+        return {
+          id: 'mock-1',
+          content: [{ type: 'text', text }],
+          model: options.model ?? 'mock-model',
+          stop_reason: 'end_turn',
+          usage: { input_tokens: 10, output_tokens: 20 },
+        } satisfies LLMResponse
+      },
+      async *stream() {
+        /* unused */
+      },
+    }
+  },
+}))
+
+// ---------------------------------------------------------------------------
+// Schema under test (matches the issue acceptance requirements)
+// ---------------------------------------------------------------------------
+
+const FindingSchema = z.object({
+  title: z.string(),
+  detail: z.string(),
+  analysts: z.array(z.enum(['technical-analyst', 'market-analyst', 'community-analyst'])).min(1),
+  confidence: z.number().min(0).max(1),
+})
+
+const ContradictionSchema = z.object({
+  claim_a: z.string(),
+  claim_b: z.string(),
+  analysts: z.tuple([
+    z.enum(['technical-analyst', 'market-analyst', 'community-analyst']),
+    z.enum(['technical-analyst', 'market-analyst', 'community-analyst']),
+  ]),
+}).refine((x) => x.analysts[0] !== x.analysts[1], { path: ['analysts'], message: 'must be different' })
+
+const ResearchAggregationSchema = z.object({
+  summary: z.string(),
+  findings: z.array(FindingSchema),
+  contradictions: z.array(ContradictionSchema),
+})
+
+// ---------------------------------------------------------------------------
+// Test
+// ---------------------------------------------------------------------------
+
+function teamCfg(agents: AgentConfig[]): TeamConfig {
+  return { name: 'research-team', agents, sharedMemory: true }
+}
+
+describe('research aggregation (mocked) surfaces contradictions in structured output', () => {
+  beforeEach(() => {
+    capturedPrompts = []
+  })
+
+  it('returns synthesizer.structured with contradictions array containing known claims', async () => {
+    const oma = new OpenMultiAgent({
+      defaultProvider: 'openai',
+      defaultModel: 'mock-model',
+      maxConcurrency: 3,
+    })
+
+    const agents: AgentConfig[] = [
+      { name: 'technical-analyst', model: 'mock-model', systemPrompt: 'technical', maxTurns: 1 },
+      { name: 'market-analyst', model: 'mock-model', systemPrompt: 'market', maxTurns: 1 },
+      { name: 'community-analyst', model: 'mock-model', systemPrompt: 'community', maxTurns: 1 },
+      { name: 'synthesizer', model: 'mock-model', systemPrompt: 'synth', outputSchema: ResearchAggregationSchema, maxTurns: 2 },
+    ]
+
+    const team = oma.createTeam('research-team', teamCfg(agents))
+
+    const tasks = [
+      { title: 'Technical analysis', description: 'Analyze tech', assignee: 'technical-analyst' },
+      { title: 'Market analysis', description: 'Analyze market', assignee: 'market-analyst' },
+      { title: 'Community analysis', description: 'Analyze community', assignee: 'community-analyst' },
+      {
+        title: 'Synthesize report',
+        description: 'Synthesize',
+        assignee: 'synthesizer',
+        dependsOn: ['Technical analysis', 'Market analysis', 'Community analysis'],
+      },
+    ] as const
+
+    const result = await oma.runTasks(team, tasks)
+    expect(result.success).toBe(true)
+
+    const synth = result.agentResults.get('synthesizer')
+    expect(synth?.success).toBe(true)
+    expect(synth?.structured).toBeDefined()
+
+    const structured = synth!.structured as z.infer<typeof ResearchAggregationSchema>
+    expect(Array.isArray(structured.contradictions)).toBe(true)
+
+    // Assert that the known contradiction is surfaced.
+    expect(structured.contradictions).toEqual([
+      {
+        claim_a: CLAIM_ACCELERATING,
+        claim_b: CLAIM_STAGNATING,
+        analysts: ['technical-analyst', 'market-analyst'],
+      },
+    ])
+
+    // Sanity check: the synthesizer prompt actually contained the analyst outputs.
+    const synthPrompt = capturedPrompts.find((p) => p.includes('# Task: Synthesize report')) ?? ''
+    expect(synthPrompt).toContain(CLAIM_ACCELERATING)
+    expect(synthPrompt).toContain(CLAIM_STAGNATING)
+  })
+})
+