feat(llm): add Gemini adapter (#28)

feat: Add support for Gemini model
This commit is contained in:
JackChen 2026-04-05 11:48:32 +08:00 committed by GitHub
commit 9a81a13982
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 1506 additions and 801 deletions

View File

@ -29,7 +29,12 @@ Requires Node.js >= 18.
npm install @jackchen_me/open-multi-agent npm install @jackchen_me/open-multi-agent
``` ```
Set `ANTHROPIC_API_KEY` (and optionally `OPENAI_API_KEY` or `GITHUB_TOKEN` for Copilot) in your environment. Local models via Ollama require no API key — see [example 06](examples/06-local-model.ts). Set the API key for your provider. Local models via Ollama require no API key — see [example 06](examples/06-local-model.ts).
- `ANTHROPIC_API_KEY`
- `OPENAI_API_KEY`
- `GEMINI_API_KEY`
- `GITHUB_TOKEN` (for Copilot)
Three agents, one goal — the framework handles the rest: Three agents, one goal — the framework handles the rest:
@ -156,6 +161,7 @@ npx tsx examples/01-single-agent.ts
│ - stream() │ │ - AnthropicAdapter │ │ - stream() │ │ - AnthropicAdapter │
└────────┬──────────┘ │ - OpenAIAdapter │ └────────┬──────────┘ │ - OpenAIAdapter │
│ │ - CopilotAdapter │ │ │ - CopilotAdapter │
│ │ - GeminiAdapter │
│ └──────────────────────┘ │ └──────────────────────┘
┌────────▼──────────┐ ┌────────▼──────────┐
│ AgentRunner │ ┌──────────────────────┐ │ AgentRunner │ ┌──────────────────────┐
@ -183,6 +189,7 @@ npx tsx examples/01-single-agent.ts
| OpenAI (GPT) | `provider: 'openai'` | `OPENAI_API_KEY` | Verified | | OpenAI (GPT) | `provider: 'openai'` | `OPENAI_API_KEY` | Verified |
| Grok (xAI) | `provider: 'grok'` | `XAI_API_KEY` | Verified | | Grok (xAI) | `provider: 'grok'` | `XAI_API_KEY` | Verified |
| GitHub Copilot | `provider: 'copilot'` | `GITHUB_TOKEN` | Verified | | GitHub Copilot | `provider: 'copilot'` | `GITHUB_TOKEN` | Verified |
| Gemini | `provider: 'gemini'` | `GEMINI_API_KEY` | Verified |
| Ollama / vLLM / LM Studio | `provider: 'openai'` + `baseURL` | — | Verified | | Ollama / vLLM / LM Studio | `provider: 'openai'` + `baseURL` | — | Verified |
| llama.cpp server | `provider: 'openai'` + `baseURL` | — | Verified | | llama.cpp server | `provider: 'openai'` + `baseURL` | — | Verified |

View File

@ -155,6 +155,7 @@ npx tsx examples/01-single-agent.ts
│ - stream() │ │ - AnthropicAdapter │ │ - stream() │ │ - AnthropicAdapter │
└────────┬──────────┘ │ - OpenAIAdapter │ └────────┬──────────┘ │ - OpenAIAdapter │
│ │ - CopilotAdapter │ │ │ - CopilotAdapter │
│ │ - GeminiAdapter │
│ └──────────────────────┘ │ └──────────────────────┘
┌────────▼──────────┐ ┌────────▼──────────┐
│ AgentRunner │ ┌──────────────────────┐ │ AgentRunner │ ┌──────────────────────┐
@ -181,6 +182,7 @@ npx tsx examples/01-single-agent.ts
| Anthropic (Claude) | `provider: 'anthropic'` | `ANTHROPIC_API_KEY` | 已验证 | | Anthropic (Claude) | `provider: 'anthropic'` | `ANTHROPIC_API_KEY` | 已验证 |
| OpenAI (GPT) | `provider: 'openai'` | `OPENAI_API_KEY` | 已验证 | | OpenAI (GPT) | `provider: 'openai'` | `OPENAI_API_KEY` | 已验证 |
| GitHub Copilot | `provider: 'copilot'` | `GITHUB_TOKEN` | 已验证 | | GitHub Copilot | `provider: 'copilot'` | `GITHUB_TOKEN` | 已验证 |
| Gemini | `provider: 'gemini'` | `GEMINI_API_KEY` | 已验证 |
| Ollama / vLLM / LM Studio | `provider: 'openai'` + `baseURL` | — | 已验证 | | Ollama / vLLM / LM Studio | `provider: 'openai'` + `baseURL` | — | 已验证 |
已验证支持 tool-calling 的本地模型:**Gemma 4**(见[示例 08](examples/08-gemma4-local.ts))。 已验证支持 tool-calling 的本地模型:**Gemma 4**(见[示例 08](examples/08-gemma4-local.ts))。

48
examples/13-gemini.ts Normal file
View File

@ -0,0 +1,48 @@
/**
* Quick smoke test for the Gemini adapter.
*
* Run:
* npx tsx examples/13-gemini.ts
*
* If GEMINI_API_KEY is not set, the adapter will not work.
*/
import { OpenMultiAgent } from '../src/index.js'
import type { OrchestratorEvent } from '../src/types.js'
const orchestrator = new OpenMultiAgent({
defaultModel: 'gemini-2.5-flash',
defaultProvider: 'gemini',
onProgress: (event: OrchestratorEvent) => {
if (event.type === 'agent_start') {
console.log(`[start] agent=${event.agent}`)
} else if (event.type === 'agent_complete') {
console.log(`[complete] agent=${event.agent}`)
}
},
})
console.log('Testing Gemini adapter with gemini-2.5-flash...\n')
const result = await orchestrator.runAgent(
{
name: 'assistant',
model: 'gemini-2.5-flash',
provider: 'gemini',
systemPrompt: 'You are a helpful assistant. Keep answers brief.',
maxTurns: 1,
maxTokens: 256,
},
'What is 2 + 2? Reply in one sentence.',
)
if (result.success) {
console.log('\nAgent output:')
console.log('─'.repeat(60))
console.log(result.output)
console.log('─'.repeat(60))
console.log(`\nTokens: input=${result.tokenUsage.input_tokens}, output=${result.tokenUsage.output_tokens}`)
} else {
console.error('Agent failed:', result.output)
process.exit(1)
}

1753
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -41,6 +41,14 @@
"openai": "^4.73.0", "openai": "^4.73.0",
"zod": "^3.23.0" "zod": "^3.23.0"
}, },
"peerDependencies": {
"@google/genai": "^1.48.0"
},
"peerDependenciesMeta": {
"@google/genai": {
"optional": true
}
},
"devDependencies": { "devDependencies": {
"@types/node": "^22.0.0", "@types/node": "^22.0.0",
"tsx": "^4.21.0", "tsx": "^4.21.0",

View File

@ -11,6 +11,7 @@
* *
* const anthropic = createAdapter('anthropic') * const anthropic = createAdapter('anthropic')
* const openai = createAdapter('openai', process.env.OPENAI_API_KEY) * const openai = createAdapter('openai', process.env.OPENAI_API_KEY)
* const gemini = createAdapter('gemini', process.env.GEMINI_API_KEY)
* ``` * ```
*/ */
@ -37,7 +38,7 @@ import type { LLMAdapter } from '../types.js'
* Additional providers can be integrated by implementing {@link LLMAdapter} * Additional providers can be integrated by implementing {@link LLMAdapter}
* directly and bypassing this factory. * directly and bypassing this factory.
*/ */
export type SupportedProvider = 'anthropic' | 'copilot' | 'grok' | 'openai' export type SupportedProvider = 'anthropic' | 'copilot' | 'grok' | 'openai' | 'gemini'
/** /**
* Instantiate the appropriate {@link LLMAdapter} for the given provider. * Instantiate the appropriate {@link LLMAdapter} for the given provider.
@ -46,6 +47,7 @@ export type SupportedProvider = 'anthropic' | 'copilot' | 'grok' | 'openai'
* explicitly: * explicitly:
* - `anthropic` `ANTHROPIC_API_KEY` * - `anthropic` `ANTHROPIC_API_KEY`
* - `openai` `OPENAI_API_KEY` * - `openai` `OPENAI_API_KEY`
* - `gemini` `GEMINI_API_KEY` / `GOOGLE_API_KEY`
* - `grok` `XAI_API_KEY` * - `grok` `XAI_API_KEY`
* - `copilot` `GITHUB_COPILOT_TOKEN` / `GITHUB_TOKEN`, or interactive * - `copilot` `GITHUB_COPILOT_TOKEN` / `GITHUB_TOKEN`, or interactive
* OAuth2 device flow if neither is set * OAuth2 device flow if neither is set
@ -75,6 +77,10 @@ export async function createAdapter(
const { CopilotAdapter } = await import('./copilot.js') const { CopilotAdapter } = await import('./copilot.js')
return new CopilotAdapter(apiKey) return new CopilotAdapter(apiKey)
} }
case 'gemini': {
const { GeminiAdapter } = await import('./gemini.js')
return new GeminiAdapter(apiKey)
}
case 'openai': { case 'openai': {
const { OpenAIAdapter } = await import('./openai.js') const { OpenAIAdapter } = await import('./openai.js')
return new OpenAIAdapter(apiKey, baseURL) return new OpenAIAdapter(apiKey, baseURL)

378
src/llm/gemini.ts Normal file
View File

@ -0,0 +1,378 @@
/**
* @fileoverview Google Gemini adapter implementing {@link LLMAdapter}.
*
* Built for `@google/genai` (the unified Google Gen AI SDK, v1.x), NOT the
* legacy `@google/generative-ai` package.
*
* Converts between the framework's internal {@link ContentBlock} types and the
* `@google/genai` SDK's wire format, handling tool definitions, system prompts,
* and both batch and streaming response paths.
*
* API key resolution order:
* 1. `apiKey` constructor argument
* 2. `GEMINI_API_KEY` environment variable
* 3. `GOOGLE_API_KEY` environment variable
*
* @example
* ```ts
* import { GeminiAdapter } from './gemini.js'
*
* const adapter = new GeminiAdapter()
* const response = await adapter.chat(messages, {
* model: 'gemini-2.5-flash',
* maxTokens: 1024,
* })
* ```
*/
import {
GoogleGenAI,
FunctionCallingConfigMode,
type Content,
type FunctionDeclaration,
type GenerateContentConfig,
type GenerateContentResponse,
type Part,
type Tool as GeminiTool,
} from '@google/genai'
import type {
ContentBlock,
LLMAdapter,
LLMChatOptions,
LLMMessage,
LLMResponse,
LLMStreamOptions,
LLMToolDef,
StreamEvent,
ToolUseBlock,
} from '../types.js'
// ---------------------------------------------------------------------------
// Internal helpers
// ---------------------------------------------------------------------------
/**
* Map framework role names to Gemini role names.
*
* Gemini uses `"model"` instead of `"assistant"`.
*/
function toGeminiRole(role: 'user' | 'assistant'): string {
return role === 'assistant' ? 'model' : 'user'
}
/**
* Convert framework messages into Gemini's {@link Content}[] format.
*
* Key differences from Anthropic:
* - Gemini uses `"model"` instead of `"assistant"`.
* - `functionResponse` parts (tool results) must appear in `"user"` turns.
* - `functionCall` parts appear in `"model"` turns.
* - We build a name lookup map from tool_use blocks so tool_result blocks
* can resolve the function name required by Gemini's `functionResponse`.
*/
function toGeminiContents(messages: LLMMessage[]): Content[] {
// First pass: build id → name map for resolving tool results.
const toolNameById = new Map<string, string>()
for (const msg of messages) {
for (const block of msg.content) {
if (block.type === 'tool_use') {
toolNameById.set(block.id, block.name)
}
}
}
return messages.map((msg): Content => {
const parts: Part[] = msg.content.map((block): Part => {
switch (block.type) {
case 'text':
return { text: block.text }
case 'tool_use':
return {
functionCall: {
id: block.id,
name: block.name,
args: block.input,
},
}
case 'tool_result': {
const name = toolNameById.get(block.tool_use_id) ?? block.tool_use_id
return {
functionResponse: {
id: block.tool_use_id,
name,
response: {
content:
typeof block.content === 'string'
? block.content
: JSON.stringify(block.content),
isError: block.is_error ?? false,
},
},
}
}
case 'image':
return {
inlineData: {
mimeType: block.source.media_type,
data: block.source.data,
},
}
default: {
const _exhaustive: never = block
throw new Error(`Unhandled content block type: ${JSON.stringify(_exhaustive)}`)
}
}
})
return { role: toGeminiRole(msg.role), parts }
})
}
/**
* Convert framework {@link LLMToolDef}s into a Gemini `tools` config array.
*
* In `@google/genai`, function declarations use `parametersJsonSchema` (not
* `parameters` or `input_schema`). All declarations are grouped under a single
* tool entry.
*/
function toGeminiTools(tools: readonly LLMToolDef[]): GeminiTool[] {
const functionDeclarations: FunctionDeclaration[] = tools.map((t) => ({
name: t.name,
description: t.description,
parametersJsonSchema: t.inputSchema as Record<string, unknown>,
}))
return [{ functionDeclarations }]
}
/**
* Build the {@link GenerateContentConfig} shared by chat() and stream().
*/
function buildConfig(
options: LLMChatOptions | LLMStreamOptions,
): GenerateContentConfig {
return {
maxOutputTokens: options.maxTokens ?? 4096,
temperature: options.temperature,
systemInstruction: options.systemPrompt,
tools: options.tools ? toGeminiTools(options.tools) : undefined,
toolConfig: options.tools
? { functionCallingConfig: { mode: FunctionCallingConfigMode.AUTO } }
: undefined,
}
}
/**
* Generate a stable pseudo-random ID string for tool use blocks.
*
* Gemini may not always return call IDs (especially in streaming), so we
* fabricate them when absent to satisfy the framework's {@link ToolUseBlock}
* contract.
*/
function generateId(): string {
return `gemini-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`
}
/**
* Extract the function call ID from a Gemini part, or generate one.
*
* The `id` field exists in newer API versions but may be absent in older
* responses, so we cast conservatively and fall back to a generated ID.
*/
function getFunctionCallId(part: Part): string {
return (part.functionCall as { id?: string } | undefined)?.id ?? generateId()
}
/**
* Convert a Gemini {@link GenerateContentResponse} into a framework
* {@link LLMResponse}.
*/
function fromGeminiResponse(
response: GenerateContentResponse,
id: string,
model: string,
): LLMResponse {
const candidate = response.candidates?.[0]
const content: ContentBlock[] = []
for (const part of candidate?.content?.parts ?? []) {
if (part.text !== undefined && part.text !== '') {
content.push({ type: 'text', text: part.text })
} else if (part.functionCall !== undefined) {
content.push({
type: 'tool_use',
id: getFunctionCallId(part),
name: part.functionCall.name ?? '',
input: (part.functionCall.args ?? {}) as Record<string, unknown>,
})
}
// inlineData echoes and other part types are silently ignored.
}
// Map Gemini finish reasons to framework stop_reason vocabulary.
const finishReason = candidate?.finishReason as string | undefined
let stop_reason: LLMResponse['stop_reason'] = 'end_turn'
if (finishReason === 'MAX_TOKENS') {
stop_reason = 'max_tokens'
} else if (content.some((b) => b.type === 'tool_use')) {
// Gemini may report STOP even when it returned function calls.
stop_reason = 'tool_use'
}
const usage = response.usageMetadata
return {
id,
content,
model,
stop_reason,
usage: {
input_tokens: usage?.promptTokenCount ?? 0,
output_tokens: usage?.candidatesTokenCount ?? 0,
},
}
}
// ---------------------------------------------------------------------------
// Adapter implementation
// ---------------------------------------------------------------------------
/**
* LLM adapter backed by the Google Gemini API via `@google/genai`.
*
* Thread-safe a single instance may be shared across concurrent agent runs.
* The underlying SDK client is stateless across requests.
*/
export class GeminiAdapter implements LLMAdapter {
readonly name = 'gemini'
readonly #client: GoogleGenAI
constructor(apiKey?: string) {
this.#client = new GoogleGenAI({
apiKey: apiKey ?? process.env['GEMINI_API_KEY'] ?? process.env['GOOGLE_API_KEY'],
})
}
// -------------------------------------------------------------------------
// chat()
// -------------------------------------------------------------------------
/**
* Send a synchronous (non-streaming) chat request and return the complete
* {@link LLMResponse}.
*
* Uses `ai.models.generateContent()` with the full conversation as `contents`,
* which is the idiomatic pattern for `@google/genai`.
*/
async chat(messages: LLMMessage[], options: LLMChatOptions): Promise<LLMResponse> {
const id = generateId()
const contents = toGeminiContents(messages)
const response = await this.#client.models.generateContent({
model: options.model,
contents,
config: buildConfig(options),
})
return fromGeminiResponse(response, id, options.model)
}
// -------------------------------------------------------------------------
// stream()
// -------------------------------------------------------------------------
/**
* Send a streaming chat request and yield {@link StreamEvent}s as they
* arrive from the API.
*
* Uses `ai.models.generateContentStream()` which returns an
* `AsyncGenerator<GenerateContentResponse>`. Each yielded chunk has the same
* shape as a full response but contains only the delta for that chunk.
*
* Because `@google/genai` doesn't expose a `finalMessage()` helper like the
* Anthropic SDK, we accumulate content and token counts as we stream so that
* the terminal `done` event carries a complete and accurate {@link LLMResponse}.
*
* Sequence guarantees (matching the Anthropic adapter):
* - Zero or more `text` events with incremental deltas
* - Zero or more `tool_use` events (one per call; Gemini doesn't stream args)
* - Exactly one terminal event: `done` or `error`
*/
async *stream(
messages: LLMMessage[],
options: LLMStreamOptions,
): AsyncIterable<StreamEvent> {
const id = generateId()
const contents = toGeminiContents(messages)
try {
const streamResponse = await this.#client.models.generateContentStream({
model: options.model,
contents,
config: buildConfig(options),
})
// Accumulators for building the done payload.
const accumulatedContent: ContentBlock[] = []
let inputTokens = 0
let outputTokens = 0
let lastFinishReason: string | undefined
for await (const chunk of streamResponse) {
const candidate = chunk.candidates?.[0]
// Accumulate token counts — the API emits these on the final chunk.
if (chunk.usageMetadata) {
inputTokens = chunk.usageMetadata.promptTokenCount ?? inputTokens
outputTokens = chunk.usageMetadata.candidatesTokenCount ?? outputTokens
}
if (candidate?.finishReason) {
lastFinishReason = candidate.finishReason as string
}
for (const part of candidate?.content?.parts ?? []) {
if (part.text) {
accumulatedContent.push({ type: 'text', text: part.text })
yield { type: 'text', data: part.text } satisfies StreamEvent
} else if (part.functionCall) {
const toolId = getFunctionCallId(part)
const toolUseBlock: ToolUseBlock = {
type: 'tool_use',
id: toolId,
name: part.functionCall.name ?? '',
input: (part.functionCall.args ?? {}) as Record<string, unknown>,
}
accumulatedContent.push(toolUseBlock)
yield { type: 'tool_use', data: toolUseBlock } satisfies StreamEvent
}
}
}
// Determine stop_reason from the accumulated response.
const hasToolUse = accumulatedContent.some((b) => b.type === 'tool_use')
let stop_reason: LLMResponse['stop_reason'] = 'end_turn'
if (lastFinishReason === 'MAX_TOKENS') {
stop_reason = 'max_tokens'
} else if (hasToolUse) {
stop_reason = 'tool_use'
}
const finalResponse: LLMResponse = {
id,
content: accumulatedContent,
model: options.model,
stop_reason,
usage: { input_tokens: inputTokens, output_tokens: outputTokens },
}
yield { type: 'done', data: finalResponse } satisfies StreamEvent
} catch (err) {
const error = err instanceof Error ? err : new Error(String(err))
yield { type: 'error', data: error } satisfies StreamEvent
}
}
}

View File

@ -194,7 +194,7 @@ export interface BeforeRunHookContext {
export interface AgentConfig { export interface AgentConfig {
readonly name: string readonly name: string
readonly model: string readonly model: string
readonly provider?: 'anthropic' | 'copilot' | 'grok' | 'openai' readonly provider?: 'anthropic' | 'copilot' | 'grok' | 'openai' | 'gemini'
/** /**
* Custom base URL for OpenAI-compatible APIs (Ollama, vLLM, LM Studio, etc.). * Custom base URL for OpenAI-compatible APIs (Ollama, vLLM, LM Studio, etc.).
* Note: local servers that don't require auth still need `apiKey` set to a * Note: local servers that don't require auth still need `apiKey` set to a
@ -338,7 +338,7 @@ export interface OrchestratorEvent {
export interface OrchestratorConfig { export interface OrchestratorConfig {
readonly maxConcurrency?: number readonly maxConcurrency?: number
readonly defaultModel?: string readonly defaultModel?: string
readonly defaultProvider?: 'anthropic' | 'copilot' | 'grok' | 'openai' readonly defaultProvider?: 'anthropic' | 'copilot' | 'grok' | 'openai' | 'gemini'
readonly defaultBaseURL?: string readonly defaultBaseURL?: string
readonly defaultApiKey?: string readonly defaultApiKey?: string
readonly onProgress?: (event: OrchestratorEvent) => void readonly onProgress?: (event: OrchestratorEvent) => void

View File

@ -0,0 +1,97 @@
import { describe, it, expect, vi, beforeEach } from 'vitest'
// ---------------------------------------------------------------------------
// Mock GoogleGenAI constructor (must be hoisted for Vitest)
// ---------------------------------------------------------------------------
const GoogleGenAIMock = vi.hoisted(() => vi.fn())
vi.mock('@google/genai', () => ({
GoogleGenAI: GoogleGenAIMock,
FunctionCallingConfigMode: { AUTO: 'AUTO' },
}))
import { GeminiAdapter } from '../src/llm/gemini.js'
import { createAdapter } from '../src/llm/adapter.js'
// ---------------------------------------------------------------------------
// GeminiAdapter tests
// ---------------------------------------------------------------------------
describe('GeminiAdapter', () => {
beforeEach(() => {
GoogleGenAIMock.mockClear()
})
it('has name "gemini"', () => {
const adapter = new GeminiAdapter()
expect(adapter.name).toBe('gemini')
})
it('uses GEMINI_API_KEY by default', () => {
const originalGemini = process.env['GEMINI_API_KEY']
const originalGoogle = process.env['GOOGLE_API_KEY']
process.env['GEMINI_API_KEY'] = 'gemini-env-key'
delete process.env['GOOGLE_API_KEY']
try {
new GeminiAdapter()
expect(GoogleGenAIMock).toHaveBeenCalledWith(
expect.objectContaining({
apiKey: 'gemini-env-key',
}),
)
} finally {
if (originalGemini === undefined) {
delete process.env['GEMINI_API_KEY']
} else {
process.env['GEMINI_API_KEY'] = originalGemini
}
if (originalGoogle === undefined) {
delete process.env['GOOGLE_API_KEY']
} else {
process.env['GOOGLE_API_KEY'] = originalGoogle
}
}
})
it('falls back to GOOGLE_API_KEY when GEMINI_API_KEY is unset', () => {
const originalGemini = process.env['GEMINI_API_KEY']
const originalGoogle = process.env['GOOGLE_API_KEY']
delete process.env['GEMINI_API_KEY']
process.env['GOOGLE_API_KEY'] = 'google-env-key'
try {
new GeminiAdapter()
expect(GoogleGenAIMock).toHaveBeenCalledWith(
expect.objectContaining({
apiKey: 'google-env-key',
}),
)
} finally {
if (originalGemini === undefined) {
delete process.env['GEMINI_API_KEY']
} else {
process.env['GEMINI_API_KEY'] = originalGemini
}
if (originalGoogle === undefined) {
delete process.env['GOOGLE_API_KEY']
} else {
process.env['GOOGLE_API_KEY'] = originalGoogle
}
}
})
it('allows overriding apiKey explicitly', () => {
new GeminiAdapter('explicit-key')
expect(GoogleGenAIMock).toHaveBeenCalledWith(
expect.objectContaining({
apiKey: 'explicit-key',
}),
)
})
it('createAdapter("gemini") returns GeminiAdapter instance', async () => {
const adapter = await createAdapter('gemini')
expect(adapter).toBeInstanceOf(GeminiAdapter)
})
})