feat: add OllamaAdapter with streaming, tool calling, and factory wiring

Agent-Logs-Url: https://github.com/m-prunty/open-multi-agent/sessions/1057f1b1-f24b-4363-8cdb-ab9188e5a262 Co-authored-by: m-prunty <27181505+m-prunty@users.noreply.github.com>
2026-04-01 19:58:54 +00:00 · 2026-04-01 19:58:54 +00:00 · 7bc23521f4
parent ad317610ff
commit 7bc23521f4
4 changed files with 566 additions and 10 deletions
--- a/src/index.ts
+++ b/src/index.ts
@ -105,6 +105,7 @@ export {

 export { createAdapter } from './llm/adapter.js'
 export type { SupportedProvider } from './llm/adapter.js'
+export { OllamaAdapter } from './llm/ollama.js'

 // ---------------------------------------------------------------------------
 // Memory
--- a/src/llm/adapter.ts
+++ b/src/llm/adapter.ts
@ -11,6 +11,8 @@
 *
 * const anthropic = createAdapter('anthropic')
 * const openai    = createAdapter('openai', process.env.OPENAI_API_KEY)
+ * const ollama    = createAdapter('ollama')           // uses http://localhost:11434
+ * const ollamaRemote = createAdapter('ollama', 'http://my-server:11434')
 * ```
 */

@ -37,33 +39,42 @@ import type { LLMAdapter } from '../types.js'
 * Additional providers can be integrated by implementing {@link LLMAdapter}
 * directly and bypassing this factory.
 */
-export type SupportedProvider = 'anthropic' | 'openai'
+export type SupportedProvider = 'anthropic' | 'openai' | 'ollama'

 /**
 * Instantiate the appropriate {@link LLMAdapter} for the given provider.
 *
- * API keys fall back to the standard environment variables
- * (`ANTHROPIC_API_KEY` / `OPENAI_API_KEY`) when not supplied explicitly.
+ * For `'anthropic'` and `'openai'`, the second argument is an API key that
+ * falls back to the standard environment variables (`ANTHROPIC_API_KEY` /
+ * `OPENAI_API_KEY`) when not supplied explicitly.
+ *
+ * For `'ollama'`, the second argument is the base URL of the Ollama server
+ * (e.g. `'http://localhost:11434'`). It falls back to the `OLLAMA_BASE_URL`
+ * environment variable, then `http://localhost:11434`.
 *
 * Adapters are imported lazily so that projects using only one provider
 * are not forced to install the SDK for the other.
 *
- * @param provider - Which LLM provider to target.
- * @param apiKey   - Optional API key override; falls back to env var.
+ * @param provider   - Which LLM provider to target.
+ * @param credential - API key (anthropic/openai) or base URL (ollama).
 * @throws {Error} When the provider string is not recognised.
 */
 export async function createAdapter(
  provider: SupportedProvider,
-  apiKey?: string,
+  credential?: string,
 ): Promise<LLMAdapter> {
  switch (provider) {
    case 'anthropic': {
      const { AnthropicAdapter } = await import('./anthropic.js')
-      return new AnthropicAdapter(apiKey)
+      return new AnthropicAdapter(credential)
    }
    case 'openai': {
      const { OpenAIAdapter } = await import('./openai.js')
-      return new OpenAIAdapter(apiKey)
+      return new OpenAIAdapter(credential)
+    }
+    case 'ollama': {
+      const { OllamaAdapter } = await import('./ollama.js')
+      return new OllamaAdapter(credential)
    }
    default: {
      // The `never` cast here makes TypeScript enforce exhaustiveness.
--- a/src/llm/ollama.ts
+++ b/src/llm/ollama.ts
@ -0,0 +1,544 @@
+/// <reference types="node" />
+
+/**
+ * @fileoverview Ollama adapter implementing {@link LLMAdapter}.
+ *
+ * Calls the Ollama HTTP API at `/api/chat` to run local models (Qwen, Llama,
+ * Mistral, etc.) without any external SDK. Uses the Node.js built-in `fetch`
+ * API (available since Node 18).
+ *
+ * Key mapping decisions:
+ *
+ * - Framework `tool_use` blocks in assistant messages → Ollama `tool_calls`
+ * - Framework `tool_result` blocks in user messages  → Ollama `tool` role messages
+ * - System prompt in {@link LLMChatOptions}           → prepended `system` message
+ * - Ollama does not return IDs for tool calls;        → IDs are generated with
+ *   `crypto.randomUUID()`
+ *
+ * Base URL resolution order:
+ *   1. `baseUrl` constructor argument
+ *   2. `OLLAMA_BASE_URL` environment variable
+ *   3. `http://localhost:11434`
+ *
+ * @example
+ * ```ts
+ * import { OllamaAdapter } from './ollama.js'
+ *
+ * const adapter = new OllamaAdapter()
+ * const response = await adapter.chat(messages, {
+ *   model: 'qwen2.5',
+ *   maxTokens: 1024,
+ * })
+ * ```
+ */
+
+import type {
+  ContentBlock,
+  LLMAdapter,
+  LLMChatOptions,
+  LLMMessage,
+  LLMResponse,
+  LLMStreamOptions,
+  LLMToolDef,
+  StreamEvent,
+  TextBlock,
+  ToolUseBlock,
+} from '../types.js'
+
+// ---------------------------------------------------------------------------
+// Ollama wire types
+// ---------------------------------------------------------------------------
+
+interface OllamaToolCallFunction {
+  name: string
+  arguments: Record<string, unknown>
+}
+
+interface OllamaToolCall {
+  function: OllamaToolCallFunction
+}
+
+interface OllamaMessage {
+  role: 'user' | 'assistant' | 'system' | 'tool'
+  content: string
+  tool_calls?: OllamaToolCall[]
+}
+
+interface OllamaTool {
+  type: 'function'
+  function: {
+    name: string
+    description: string
+    parameters: Record<string, unknown>
+  }
+}
+
+interface OllamaOptions {
+  temperature?: number
+  num_predict?: number
+}
+
+interface OllamaChatRequest {
+  model: string
+  messages: OllamaMessage[]
+  tools?: OllamaTool[]
+  options?: OllamaOptions
+  stream: boolean
+}
+
+interface OllamaChatResponse {
+  model: string
+  created_at: string
+  message: OllamaMessage
+  done: boolean
+  done_reason?: string
+  prompt_eval_count?: number
+  eval_count?: number
+}
+
+// ---------------------------------------------------------------------------
+// Internal helpers — framework → Ollama
+// ---------------------------------------------------------------------------
+
+/**
+ * Convert a framework {@link LLMToolDef} to an Ollama tool definition.
+ */
+function toOllamaTool(tool: LLMToolDef): OllamaTool {
+  return {
+    type: 'function',
+    function: {
+      name: tool.name,
+      description: tool.description,
+      parameters: tool.inputSchema as Record<string, unknown>,
+    },
+  }
+}
+
+/**
+ * Convert framework {@link LLMMessage} array (plus optional system prompt)
+ * into an Ollama messages array.
+ *
+ * - `tool_result` blocks become separate `tool`-role messages (one per block)
+ * - `tool_use` blocks in assistant messages become `tool_calls`
+ * - A system prompt is prepended as a `system`-role message
+ */
+function toOllamaMessages(
+  messages: LLMMessage[],
+  systemPrompt: string | undefined,
+): OllamaMessage[] {
+  const result: OllamaMessage[] = []
+
+  if (systemPrompt !== undefined && systemPrompt.length > 0) {
+    result.push({ role: 'system', content: systemPrompt })
+  }
+
+  for (const msg of messages) {
+    if (msg.role === 'assistant') {
+      result.push(toOllamaAssistantMessage(msg))
+    } else {
+      // user role — split tool_result blocks out as separate `tool` messages
+      const toolResultBlocks = msg.content.filter((b) => b.type === 'tool_result')
+      const otherBlocks = msg.content.filter((b) => b.type !== 'tool_result')
+
+      if (otherBlocks.length > 0) {
+        const textContent = otherBlocks
+          .filter((b): b is TextBlock => b.type === 'text')
+          .map((b) => b.text)
+          .join('\n')
+        result.push({ role: 'user', content: textContent })
+      }
+
+      for (const block of toolResultBlocks) {
+        if (block.type === 'tool_result') {
+          result.push({ role: 'tool', content: block.content })
+        }
+      }
+    }
+  }
+
+  return result
+}
+
+/**
+ * Convert an `assistant`-role framework message to an Ollama assistant message.
+ *
+ * Any `tool_use` blocks become `tool_calls`; `text` blocks become the message
+ * content string.
+ */
+function toOllamaAssistantMessage(msg: LLMMessage): OllamaMessage {
+  const toolCalls: OllamaToolCall[] = []
+  const textParts: string[] = []
+
+  for (const block of msg.content) {
+    if (block.type === 'tool_use') {
+      toolCalls.push({
+        function: {
+          name: block.name,
+          arguments: block.input,
+        },
+      })
+    } else if (block.type === 'text') {
+      textParts.push(block.text)
+    }
+  }
+
+  const message: OllamaMessage = {
+    role: 'assistant',
+    content: textParts.join(''),
+  }
+
+  if (toolCalls.length > 0) {
+    message.tool_calls = toolCalls
+  }
+
+  return message
+}
+
+// ---------------------------------------------------------------------------
+// Internal helpers — Ollama → framework
+// ---------------------------------------------------------------------------
+
+/**
+ * Parse tool call arguments from an Ollama response.
+ *
+ * Ollama returns `arguments` as a JSON object directly, but we defensively
+ * handle the case where it arrives as a serialised string (e.g. some older
+ * model releases).
+ */
+function parseToolArguments(args: unknown): Record<string, unknown> {
+  if (args !== null && typeof args === 'object' && !Array.isArray(args)) {
+    return args as Record<string, unknown>
+  }
+  if (typeof args === 'string') {
+    try {
+      const parsed: unknown = JSON.parse(args)
+      if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) {
+        return parsed as Record<string, unknown>
+      }
+    } catch {
+      // Malformed JSON — surface as empty object.
+    }
+  }
+  return {}
+}
+
+/**
+ * Normalise an Ollama `done_reason` string to the framework's canonical
+ * stop-reason vocabulary.
+ *
+ * Mapping:
+ * - `'stop'`       → `'end_turn'`
+ * - `'tool_calls'` → `'tool_use'`
+ * - `'length'`     → `'max_tokens'`
+ * - anything else  → passed through unchanged
+ */
+function normalizeDoneReason(reason: string | undefined): string {
+  switch (reason) {
+    case 'stop':       return 'end_turn'
+    case 'tool_calls': return 'tool_use'
+    case 'length':     return 'max_tokens'
+    default:           return reason ?? 'end_turn'
+  }
+}
+
+/**
+ * Convert an {@link OllamaChatResponse} into a framework {@link LLMResponse}.
+ */
+function fromOllamaResponse(data: OllamaChatResponse): LLMResponse {
+  const content: ContentBlock[] = []
+  const message = data.message
+
+  if (message.content.length > 0) {
+    const textBlock: TextBlock = { type: 'text', text: message.content }
+    content.push(textBlock)
+  }
+
+  for (const tc of message.tool_calls ?? []) {
+    const toolUseBlock: ToolUseBlock = {
+      type: 'tool_use',
+      id: crypto.randomUUID(),
+      name: tc.function.name,
+      input: parseToolArguments(tc.function.arguments),
+    }
+    content.push(toolUseBlock)
+  }
+
+  return {
+    id: crypto.randomUUID(),
+    content,
+    model: data.model,
+    stop_reason: normalizeDoneReason(data.done_reason),
+    usage: {
+      input_tokens: data.prompt_eval_count ?? 0,
+      output_tokens: data.eval_count ?? 0,
+    },
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Adapter implementation
+// ---------------------------------------------------------------------------
+
+/**
+ * LLM adapter backed by the Ollama HTTP API.
+ *
+ * Requires a locally-running Ollama instance. The default base URL is
+ * `http://localhost:11434`; override via the constructor or the
+ * `OLLAMA_BASE_URL` environment variable.
+ *
+ * No API key is required — Ollama runs entirely on your own hardware.
+ *
+ * Thread-safe — a single instance may be shared across concurrent agent runs.
+ */
+export class OllamaAdapter implements LLMAdapter {
+  readonly name = 'ollama'
+
+  readonly #baseUrl: string
+
+  constructor(baseUrl?: string) {
+    this.#baseUrl = (
+      baseUrl ?? process.env['OLLAMA_BASE_URL'] ?? 'http://localhost:11434'
+    ).replace(/\/$/, '')
+  }
+
+  // -------------------------------------------------------------------------
+  // chat()
+  // -------------------------------------------------------------------------
+
+  /**
+   * Send a synchronous (non-streaming) chat request and return the complete
+   * {@link LLMResponse}.
+   *
+   * Throws an `Error` on non-2xx responses or network failures. Callers should
+   * catch and handle these (e.g. model not found, Ollama not running).
+   */
+  async chat(messages: LLMMessage[], options: LLMChatOptions): Promise<LLMResponse> {
+    const ollamaMessages = toOllamaMessages(messages, options.systemPrompt)
+
+    const body: OllamaChatRequest = {
+      model: options.model,
+      messages: ollamaMessages,
+      stream: false,
+    }
+
+    if (options.tools !== undefined && options.tools.length > 0) {
+      body.tools = options.tools.map(toOllamaTool)
+    }
+
+    const ollamaOptions: OllamaOptions = {}
+    if (options.temperature !== undefined) ollamaOptions.temperature = options.temperature
+    if (options.maxTokens !== undefined) ollamaOptions.num_predict = options.maxTokens
+    if (Object.keys(ollamaOptions).length > 0) body.options = ollamaOptions
+
+    const response = await fetch(`${this.#baseUrl}/api/chat`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify(body),
+      signal: options.abortSignal,
+    })
+
+    if (!response.ok) {
+      const errorText = await response.text().catch(() => response.statusText)
+      throw new Error(`Ollama API error ${response.status}: ${errorText}`)
+    }
+
+    const data: OllamaChatResponse = (await response.json()) as OllamaChatResponse
+    return fromOllamaResponse(data)
+  }
+
+  // -------------------------------------------------------------------------
+  // stream()
+  // -------------------------------------------------------------------------
+
+  /**
+   * Send a streaming chat request and yield {@link StreamEvent}s incrementally.
+   *
+   * Ollama streams responses as NDJSON (newline-delimited JSON). Text deltas
+   * are emitted immediately; tool calls (when present) are accumulated and
+   * emitted after the stream ends, matching the contract of other adapters.
+   *
+   * Sequence guarantees:
+   * - Zero or more `text` events (incremental text deltas)
+   * - Zero or more `tool_use` events (emitted once per tool call, after stream ends)
+   * - Exactly one terminal event: `done` or `error`
+   */
+  async *stream(
+    messages: LLMMessage[],
+    options: LLMStreamOptions,
+  ): AsyncIterable<StreamEvent> {
+    const ollamaMessages = toOllamaMessages(messages, options.systemPrompt)
+
+    const body: OllamaChatRequest = {
+      model: options.model,
+      messages: ollamaMessages,
+      stream: true,
+    }
+
+    if (options.tools !== undefined && options.tools.length > 0) {
+      body.tools = options.tools.map(toOllamaTool)
+    }
+
+    const ollamaOptions: OllamaOptions = {}
+    if (options.temperature !== undefined) ollamaOptions.temperature = options.temperature
+    if (options.maxTokens !== undefined) ollamaOptions.num_predict = options.maxTokens
+    if (Object.keys(ollamaOptions).length > 0) body.options = ollamaOptions
+
+    try {
+      const response = await fetch(`${this.#baseUrl}/api/chat`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify(body),
+        signal: options.abortSignal,
+      })
+
+      if (!response.ok) {
+        const errorText = await response.text().catch(() => response.statusText)
+        throw new Error(`Ollama API error ${response.status}: ${errorText}`)
+      }
+
+      if (response.body === null) {
+        throw new Error('Ollama streaming response has no body')
+      }
+
+      // Accumulate state across chunks.
+      let responseModel = options.model
+      let doneReason: string | undefined
+      let inputTokens = 0
+      let outputTokens = 0
+      let fullText = ''
+      const toolCallBuffers: OllamaToolCall[] = []
+
+      // Read the NDJSON stream line by line.
+      const reader = response.body.getReader()
+      const decoder = new TextDecoder()
+      let lineBuffer = ''
+
+      try {
+        while (true) {
+          const { done, value } = await reader.read()
+          if (done) break
+
+          lineBuffer += decoder.decode(value, { stream: true })
+          const lines = lineBuffer.split('\n')
+          // Keep the last (possibly incomplete) line in the buffer.
+          lineBuffer = lines.pop() ?? ''
+
+          for (const line of lines) {
+            const event = parseStreamChunk(line)
+            if (event === null) continue
+
+            responseModel = event.model
+
+            if (event.message.content.length > 0) {
+              fullText += event.message.content
+              const textEvent: StreamEvent = { type: 'text', data: event.message.content }
+              yield textEvent
+            }
+
+            if (event.message.tool_calls !== undefined) {
+              for (const tc of event.message.tool_calls) {
+                toolCallBuffers.push(tc)
+              }
+            }
+
+            if (event.done) {
+              doneReason = event.done_reason
+              inputTokens = event.prompt_eval_count ?? 0
+              outputTokens = event.eval_count ?? 0
+            }
+          }
+        }
+
+        // Handle any remaining data left in the line buffer after EOF.
+        const finalEvent = parseStreamChunk(lineBuffer)
+        if (finalEvent !== null) {
+          responseModel = finalEvent.model
+          if (finalEvent.message.content.length > 0) {
+            fullText += finalEvent.message.content
+            const textEvent: StreamEvent = { type: 'text', data: finalEvent.message.content }
+            yield textEvent
+          }
+          if (finalEvent.message.tool_calls !== undefined) {
+            for (const tc of finalEvent.message.tool_calls) {
+              toolCallBuffers.push(tc)
+            }
+          }
+          if (finalEvent.done) {
+            doneReason = finalEvent.done_reason
+            inputTokens = finalEvent.prompt_eval_count ?? 0
+            outputTokens = finalEvent.eval_count ?? 0
+          }
+        }
+      } finally {
+        reader.releaseLock()
+      }
+
+      // Emit accumulated tool_use events after the stream ends.
+      const finalToolUseBlocks: ToolUseBlock[] = []
+      for (const tc of toolCallBuffers) {
+        const toolUseBlock: ToolUseBlock = {
+          type: 'tool_use',
+          id: crypto.randomUUID(),
+          name: tc.function.name,
+          input: parseToolArguments(tc.function.arguments),
+        }
+        finalToolUseBlocks.push(toolUseBlock)
+        const toolUseEvent: StreamEvent = { type: 'tool_use', data: toolUseBlock }
+        yield toolUseEvent
+      }
+
+      // Build the complete content array for the done response.
+      const doneContent: ContentBlock[] = []
+      if (fullText.length > 0) {
+        const textBlock: TextBlock = { type: 'text', text: fullText }
+        doneContent.push(textBlock)
+      }
+      doneContent.push(...finalToolUseBlocks)
+
+      const finalResponse: LLMResponse = {
+        id: crypto.randomUUID(),
+        content: doneContent,
+        model: responseModel,
+        stop_reason: normalizeDoneReason(doneReason),
+        usage: { input_tokens: inputTokens, output_tokens: outputTokens },
+      }
+
+      const doneEvent: StreamEvent = { type: 'done', data: finalResponse }
+      yield doneEvent
+    } catch (err) {
+      const error = err instanceof Error ? err : new Error(String(err))
+      const errorEvent: StreamEvent = { type: 'error', data: error }
+      yield errorEvent
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Private utility
+// ---------------------------------------------------------------------------
+
+/**
+ * Parse a single NDJSON line from the Ollama streaming response.
+ * Returns `null` for empty or unparseable lines.
+ */
+function parseStreamChunk(line: string): OllamaChatResponse | null {
+  const trimmed = line.trim()
+  if (trimmed.length === 0) return null
+  try {
+    return JSON.parse(trimmed) as OllamaChatResponse
+  } catch {
+    return null
+  }
+}
+
+// Re-export types that consumers of this module commonly need alongside the adapter.
+export type {
+  ContentBlock,
+  LLMAdapter,
+  LLMChatOptions,
+  LLMMessage,
+  LLMResponse,
+  LLMStreamOptions,
+  LLMToolDef,
+  StreamEvent,
+}
--- a/src/types.ts
+++ b/src/types.ts
@ -186,7 +186,7 @@ export interface ToolDefinition<TInput = Record<string, unknown>> {
 export interface AgentConfig {
  readonly name: string
  readonly model: string
-  readonly provider?: 'anthropic' | 'openai'
+  readonly provider?: 'anthropic' | 'openai' | 'ollama'
  readonly systemPrompt?: string
  /** Names of tools (from the tool registry) available to this agent. */
  readonly tools?: readonly string[]
@ -285,7 +285,7 @@ export interface OrchestratorEvent {
 export interface OrchestratorConfig {
  readonly maxConcurrency?: number
  readonly defaultModel?: string
-  readonly defaultProvider?: 'anthropic' | 'openai'
+  readonly defaultProvider?: 'anthropic' | 'openai' | 'ollama'
  onProgress?: (event: OrchestratorEvent) => void
 }