From 7bc23521f42d9c40f0d79e05166e196c12ff2588 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 1 Apr 2026 19:58:54 +0000 Subject: [PATCH] feat: add OllamaAdapter with streaming, tool calling, and factory wiring Agent-Logs-Url: https://github.com/m-prunty/open-multi-agent/sessions/1057f1b1-f24b-4363-8cdb-ab9188e5a262 Co-authored-by: m-prunty <27181505+m-prunty@users.noreply.github.com> --- src/index.ts | 1 + src/llm/adapter.ts | 27 ++- src/llm/ollama.ts | 544 +++++++++++++++++++++++++++++++++++++++++++++ src/types.ts | 4 +- 4 files changed, 566 insertions(+), 10 deletions(-) create mode 100644 src/llm/ollama.ts diff --git a/src/index.ts b/src/index.ts index 814996f..0d81c46 100644 --- a/src/index.ts +++ b/src/index.ts @@ -105,6 +105,7 @@ export { export { createAdapter } from './llm/adapter.js' export type { SupportedProvider } from './llm/adapter.js' +export { OllamaAdapter } from './llm/ollama.js' // --------------------------------------------------------------------------- // Memory diff --git a/src/llm/adapter.ts b/src/llm/adapter.ts index 979f37c..041999c 100644 --- a/src/llm/adapter.ts +++ b/src/llm/adapter.ts @@ -11,6 +11,8 @@ * * const anthropic = createAdapter('anthropic') * const openai = createAdapter('openai', process.env.OPENAI_API_KEY) + * const ollama = createAdapter('ollama') // uses http://localhost:11434 + * const ollamaRemote = createAdapter('ollama', 'http://my-server:11434') * ``` */ @@ -37,33 +39,42 @@ import type { LLMAdapter } from '../types.js' * Additional providers can be integrated by implementing {@link LLMAdapter} * directly and bypassing this factory. */ -export type SupportedProvider = 'anthropic' | 'openai' +export type SupportedProvider = 'anthropic' | 'openai' | 'ollama' /** * Instantiate the appropriate {@link LLMAdapter} for the given provider. * - * API keys fall back to the standard environment variables - * (`ANTHROPIC_API_KEY` / `OPENAI_API_KEY`) when not supplied explicitly. + * For `'anthropic'` and `'openai'`, the second argument is an API key that + * falls back to the standard environment variables (`ANTHROPIC_API_KEY` / + * `OPENAI_API_KEY`) when not supplied explicitly. + * + * For `'ollama'`, the second argument is the base URL of the Ollama server + * (e.g. `'http://localhost:11434'`). It falls back to the `OLLAMA_BASE_URL` + * environment variable, then `http://localhost:11434`. * * Adapters are imported lazily so that projects using only one provider * are not forced to install the SDK for the other. * - * @param provider - Which LLM provider to target. - * @param apiKey - Optional API key override; falls back to env var. + * @param provider - Which LLM provider to target. + * @param credential - API key (anthropic/openai) or base URL (ollama). * @throws {Error} When the provider string is not recognised. */ export async function createAdapter( provider: SupportedProvider, - apiKey?: string, + credential?: string, ): Promise { switch (provider) { case 'anthropic': { const { AnthropicAdapter } = await import('./anthropic.js') - return new AnthropicAdapter(apiKey) + return new AnthropicAdapter(credential) } case 'openai': { const { OpenAIAdapter } = await import('./openai.js') - return new OpenAIAdapter(apiKey) + return new OpenAIAdapter(credential) + } + case 'ollama': { + const { OllamaAdapter } = await import('./ollama.js') + return new OllamaAdapter(credential) } default: { // The `never` cast here makes TypeScript enforce exhaustiveness. diff --git a/src/llm/ollama.ts b/src/llm/ollama.ts new file mode 100644 index 0000000..43172a6 --- /dev/null +++ b/src/llm/ollama.ts @@ -0,0 +1,544 @@ +/// + +/** + * @fileoverview Ollama adapter implementing {@link LLMAdapter}. + * + * Calls the Ollama HTTP API at `/api/chat` to run local models (Qwen, Llama, + * Mistral, etc.) without any external SDK. Uses the Node.js built-in `fetch` + * API (available since Node 18). + * + * Key mapping decisions: + * + * - Framework `tool_use` blocks in assistant messages → Ollama `tool_calls` + * - Framework `tool_result` blocks in user messages → Ollama `tool` role messages + * - System prompt in {@link LLMChatOptions} → prepended `system` message + * - Ollama does not return IDs for tool calls; → IDs are generated with + * `crypto.randomUUID()` + * + * Base URL resolution order: + * 1. `baseUrl` constructor argument + * 2. `OLLAMA_BASE_URL` environment variable + * 3. `http://localhost:11434` + * + * @example + * ```ts + * import { OllamaAdapter } from './ollama.js' + * + * const adapter = new OllamaAdapter() + * const response = await adapter.chat(messages, { + * model: 'qwen2.5', + * maxTokens: 1024, + * }) + * ``` + */ + +import type { + ContentBlock, + LLMAdapter, + LLMChatOptions, + LLMMessage, + LLMResponse, + LLMStreamOptions, + LLMToolDef, + StreamEvent, + TextBlock, + ToolUseBlock, +} from '../types.js' + +// --------------------------------------------------------------------------- +// Ollama wire types +// --------------------------------------------------------------------------- + +interface OllamaToolCallFunction { + name: string + arguments: Record +} + +interface OllamaToolCall { + function: OllamaToolCallFunction +} + +interface OllamaMessage { + role: 'user' | 'assistant' | 'system' | 'tool' + content: string + tool_calls?: OllamaToolCall[] +} + +interface OllamaTool { + type: 'function' + function: { + name: string + description: string + parameters: Record + } +} + +interface OllamaOptions { + temperature?: number + num_predict?: number +} + +interface OllamaChatRequest { + model: string + messages: OllamaMessage[] + tools?: OllamaTool[] + options?: OllamaOptions + stream: boolean +} + +interface OllamaChatResponse { + model: string + created_at: string + message: OllamaMessage + done: boolean + done_reason?: string + prompt_eval_count?: number + eval_count?: number +} + +// --------------------------------------------------------------------------- +// Internal helpers — framework → Ollama +// --------------------------------------------------------------------------- + +/** + * Convert a framework {@link LLMToolDef} to an Ollama tool definition. + */ +function toOllamaTool(tool: LLMToolDef): OllamaTool { + return { + type: 'function', + function: { + name: tool.name, + description: tool.description, + parameters: tool.inputSchema as Record, + }, + } +} + +/** + * Convert framework {@link LLMMessage} array (plus optional system prompt) + * into an Ollama messages array. + * + * - `tool_result` blocks become separate `tool`-role messages (one per block) + * - `tool_use` blocks in assistant messages become `tool_calls` + * - A system prompt is prepended as a `system`-role message + */ +function toOllamaMessages( + messages: LLMMessage[], + systemPrompt: string | undefined, +): OllamaMessage[] { + const result: OllamaMessage[] = [] + + if (systemPrompt !== undefined && systemPrompt.length > 0) { + result.push({ role: 'system', content: systemPrompt }) + } + + for (const msg of messages) { + if (msg.role === 'assistant') { + result.push(toOllamaAssistantMessage(msg)) + } else { + // user role — split tool_result blocks out as separate `tool` messages + const toolResultBlocks = msg.content.filter((b) => b.type === 'tool_result') + const otherBlocks = msg.content.filter((b) => b.type !== 'tool_result') + + if (otherBlocks.length > 0) { + const textContent = otherBlocks + .filter((b): b is TextBlock => b.type === 'text') + .map((b) => b.text) + .join('\n') + result.push({ role: 'user', content: textContent }) + } + + for (const block of toolResultBlocks) { + if (block.type === 'tool_result') { + result.push({ role: 'tool', content: block.content }) + } + } + } + } + + return result +} + +/** + * Convert an `assistant`-role framework message to an Ollama assistant message. + * + * Any `tool_use` blocks become `tool_calls`; `text` blocks become the message + * content string. + */ +function toOllamaAssistantMessage(msg: LLMMessage): OllamaMessage { + const toolCalls: OllamaToolCall[] = [] + const textParts: string[] = [] + + for (const block of msg.content) { + if (block.type === 'tool_use') { + toolCalls.push({ + function: { + name: block.name, + arguments: block.input, + }, + }) + } else if (block.type === 'text') { + textParts.push(block.text) + } + } + + const message: OllamaMessage = { + role: 'assistant', + content: textParts.join(''), + } + + if (toolCalls.length > 0) { + message.tool_calls = toolCalls + } + + return message +} + +// --------------------------------------------------------------------------- +// Internal helpers — Ollama → framework +// --------------------------------------------------------------------------- + +/** + * Parse tool call arguments from an Ollama response. + * + * Ollama returns `arguments` as a JSON object directly, but we defensively + * handle the case where it arrives as a serialised string (e.g. some older + * model releases). + */ +function parseToolArguments(args: unknown): Record { + if (args !== null && typeof args === 'object' && !Array.isArray(args)) { + return args as Record + } + if (typeof args === 'string') { + try { + const parsed: unknown = JSON.parse(args) + if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) { + return parsed as Record + } + } catch { + // Malformed JSON — surface as empty object. + } + } + return {} +} + +/** + * Normalise an Ollama `done_reason` string to the framework's canonical + * stop-reason vocabulary. + * + * Mapping: + * - `'stop'` → `'end_turn'` + * - `'tool_calls'` → `'tool_use'` + * - `'length'` → `'max_tokens'` + * - anything else → passed through unchanged + */ +function normalizeDoneReason(reason: string | undefined): string { + switch (reason) { + case 'stop': return 'end_turn' + case 'tool_calls': return 'tool_use' + case 'length': return 'max_tokens' + default: return reason ?? 'end_turn' + } +} + +/** + * Convert an {@link OllamaChatResponse} into a framework {@link LLMResponse}. + */ +function fromOllamaResponse(data: OllamaChatResponse): LLMResponse { + const content: ContentBlock[] = [] + const message = data.message + + if (message.content.length > 0) { + const textBlock: TextBlock = { type: 'text', text: message.content } + content.push(textBlock) + } + + for (const tc of message.tool_calls ?? []) { + const toolUseBlock: ToolUseBlock = { + type: 'tool_use', + id: crypto.randomUUID(), + name: tc.function.name, + input: parseToolArguments(tc.function.arguments), + } + content.push(toolUseBlock) + } + + return { + id: crypto.randomUUID(), + content, + model: data.model, + stop_reason: normalizeDoneReason(data.done_reason), + usage: { + input_tokens: data.prompt_eval_count ?? 0, + output_tokens: data.eval_count ?? 0, + }, + } +} + +// --------------------------------------------------------------------------- +// Adapter implementation +// --------------------------------------------------------------------------- + +/** + * LLM adapter backed by the Ollama HTTP API. + * + * Requires a locally-running Ollama instance. The default base URL is + * `http://localhost:11434`; override via the constructor or the + * `OLLAMA_BASE_URL` environment variable. + * + * No API key is required — Ollama runs entirely on your own hardware. + * + * Thread-safe — a single instance may be shared across concurrent agent runs. + */ +export class OllamaAdapter implements LLMAdapter { + readonly name = 'ollama' + + readonly #baseUrl: string + + constructor(baseUrl?: string) { + this.#baseUrl = ( + baseUrl ?? process.env['OLLAMA_BASE_URL'] ?? 'http://localhost:11434' + ).replace(/\/$/, '') + } + + // ------------------------------------------------------------------------- + // chat() + // ------------------------------------------------------------------------- + + /** + * Send a synchronous (non-streaming) chat request and return the complete + * {@link LLMResponse}. + * + * Throws an `Error` on non-2xx responses or network failures. Callers should + * catch and handle these (e.g. model not found, Ollama not running). + */ + async chat(messages: LLMMessage[], options: LLMChatOptions): Promise { + const ollamaMessages = toOllamaMessages(messages, options.systemPrompt) + + const body: OllamaChatRequest = { + model: options.model, + messages: ollamaMessages, + stream: false, + } + + if (options.tools !== undefined && options.tools.length > 0) { + body.tools = options.tools.map(toOllamaTool) + } + + const ollamaOptions: OllamaOptions = {} + if (options.temperature !== undefined) ollamaOptions.temperature = options.temperature + if (options.maxTokens !== undefined) ollamaOptions.num_predict = options.maxTokens + if (Object.keys(ollamaOptions).length > 0) body.options = ollamaOptions + + const response = await fetch(`${this.#baseUrl}/api/chat`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + signal: options.abortSignal, + }) + + if (!response.ok) { + const errorText = await response.text().catch(() => response.statusText) + throw new Error(`Ollama API error ${response.status}: ${errorText}`) + } + + const data: OllamaChatResponse = (await response.json()) as OllamaChatResponse + return fromOllamaResponse(data) + } + + // ------------------------------------------------------------------------- + // stream() + // ------------------------------------------------------------------------- + + /** + * Send a streaming chat request and yield {@link StreamEvent}s incrementally. + * + * Ollama streams responses as NDJSON (newline-delimited JSON). Text deltas + * are emitted immediately; tool calls (when present) are accumulated and + * emitted after the stream ends, matching the contract of other adapters. + * + * Sequence guarantees: + * - Zero or more `text` events (incremental text deltas) + * - Zero or more `tool_use` events (emitted once per tool call, after stream ends) + * - Exactly one terminal event: `done` or `error` + */ + async *stream( + messages: LLMMessage[], + options: LLMStreamOptions, + ): AsyncIterable { + const ollamaMessages = toOllamaMessages(messages, options.systemPrompt) + + const body: OllamaChatRequest = { + model: options.model, + messages: ollamaMessages, + stream: true, + } + + if (options.tools !== undefined && options.tools.length > 0) { + body.tools = options.tools.map(toOllamaTool) + } + + const ollamaOptions: OllamaOptions = {} + if (options.temperature !== undefined) ollamaOptions.temperature = options.temperature + if (options.maxTokens !== undefined) ollamaOptions.num_predict = options.maxTokens + if (Object.keys(ollamaOptions).length > 0) body.options = ollamaOptions + + try { + const response = await fetch(`${this.#baseUrl}/api/chat`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + signal: options.abortSignal, + }) + + if (!response.ok) { + const errorText = await response.text().catch(() => response.statusText) + throw new Error(`Ollama API error ${response.status}: ${errorText}`) + } + + if (response.body === null) { + throw new Error('Ollama streaming response has no body') + } + + // Accumulate state across chunks. + let responseModel = options.model + let doneReason: string | undefined + let inputTokens = 0 + let outputTokens = 0 + let fullText = '' + const toolCallBuffers: OllamaToolCall[] = [] + + // Read the NDJSON stream line by line. + const reader = response.body.getReader() + const decoder = new TextDecoder() + let lineBuffer = '' + + try { + while (true) { + const { done, value } = await reader.read() + if (done) break + + lineBuffer += decoder.decode(value, { stream: true }) + const lines = lineBuffer.split('\n') + // Keep the last (possibly incomplete) line in the buffer. + lineBuffer = lines.pop() ?? '' + + for (const line of lines) { + const event = parseStreamChunk(line) + if (event === null) continue + + responseModel = event.model + + if (event.message.content.length > 0) { + fullText += event.message.content + const textEvent: StreamEvent = { type: 'text', data: event.message.content } + yield textEvent + } + + if (event.message.tool_calls !== undefined) { + for (const tc of event.message.tool_calls) { + toolCallBuffers.push(tc) + } + } + + if (event.done) { + doneReason = event.done_reason + inputTokens = event.prompt_eval_count ?? 0 + outputTokens = event.eval_count ?? 0 + } + } + } + + // Handle any remaining data left in the line buffer after EOF. + const finalEvent = parseStreamChunk(lineBuffer) + if (finalEvent !== null) { + responseModel = finalEvent.model + if (finalEvent.message.content.length > 0) { + fullText += finalEvent.message.content + const textEvent: StreamEvent = { type: 'text', data: finalEvent.message.content } + yield textEvent + } + if (finalEvent.message.tool_calls !== undefined) { + for (const tc of finalEvent.message.tool_calls) { + toolCallBuffers.push(tc) + } + } + if (finalEvent.done) { + doneReason = finalEvent.done_reason + inputTokens = finalEvent.prompt_eval_count ?? 0 + outputTokens = finalEvent.eval_count ?? 0 + } + } + } finally { + reader.releaseLock() + } + + // Emit accumulated tool_use events after the stream ends. + const finalToolUseBlocks: ToolUseBlock[] = [] + for (const tc of toolCallBuffers) { + const toolUseBlock: ToolUseBlock = { + type: 'tool_use', + id: crypto.randomUUID(), + name: tc.function.name, + input: parseToolArguments(tc.function.arguments), + } + finalToolUseBlocks.push(toolUseBlock) + const toolUseEvent: StreamEvent = { type: 'tool_use', data: toolUseBlock } + yield toolUseEvent + } + + // Build the complete content array for the done response. + const doneContent: ContentBlock[] = [] + if (fullText.length > 0) { + const textBlock: TextBlock = { type: 'text', text: fullText } + doneContent.push(textBlock) + } + doneContent.push(...finalToolUseBlocks) + + const finalResponse: LLMResponse = { + id: crypto.randomUUID(), + content: doneContent, + model: responseModel, + stop_reason: normalizeDoneReason(doneReason), + usage: { input_tokens: inputTokens, output_tokens: outputTokens }, + } + + const doneEvent: StreamEvent = { type: 'done', data: finalResponse } + yield doneEvent + } catch (err) { + const error = err instanceof Error ? err : new Error(String(err)) + const errorEvent: StreamEvent = { type: 'error', data: error } + yield errorEvent + } + } +} + +// --------------------------------------------------------------------------- +// Private utility +// --------------------------------------------------------------------------- + +/** + * Parse a single NDJSON line from the Ollama streaming response. + * Returns `null` for empty or unparseable lines. + */ +function parseStreamChunk(line: string): OllamaChatResponse | null { + const trimmed = line.trim() + if (trimmed.length === 0) return null + try { + return JSON.parse(trimmed) as OllamaChatResponse + } catch { + return null + } +} + +// Re-export types that consumers of this module commonly need alongside the adapter. +export type { + ContentBlock, + LLMAdapter, + LLMChatOptions, + LLMMessage, + LLMResponse, + LLMStreamOptions, + LLMToolDef, + StreamEvent, +} diff --git a/src/types.ts b/src/types.ts index 2875a35..90a7119 100644 --- a/src/types.ts +++ b/src/types.ts @@ -186,7 +186,7 @@ export interface ToolDefinition> { export interface AgentConfig { readonly name: string readonly model: string - readonly provider?: 'anthropic' | 'openai' + readonly provider?: 'anthropic' | 'openai' | 'ollama' readonly systemPrompt?: string /** Names of tools (from the tool registry) available to this agent. */ readonly tools?: readonly string[] @@ -285,7 +285,7 @@ export interface OrchestratorEvent { export interface OrchestratorConfig { readonly maxConcurrency?: number readonly defaultModel?: string - readonly defaultProvider?: 'anthropic' | 'openai' + readonly defaultProvider?: 'anthropic' | 'openai' | 'ollama' onProgress?: (event: OrchestratorEvent) => void }