feat: add OllamaAdapter with streaming, tool calling, and factory wiring

Agent-Logs-Url: https://github.com/m-prunty/open-multi-agent/sessions/1057f1b1-f24b-4363-8cdb-ab9188e5a262

Co-authored-by: m-prunty <27181505+m-prunty@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot] 2026-04-01 19:58:54 +00:00 committed by GitHub
parent ad317610ff
commit 7bc23521f4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 566 additions and 10 deletions

View File

@ -105,6 +105,7 @@ export {
export { createAdapter } from './llm/adapter.js'
export type { SupportedProvider } from './llm/adapter.js'
export { OllamaAdapter } from './llm/ollama.js'
// ---------------------------------------------------------------------------
// Memory

View File

@ -11,6 +11,8 @@
*
* const anthropic = createAdapter('anthropic')
* const openai = createAdapter('openai', process.env.OPENAI_API_KEY)
* const ollama = createAdapter('ollama') // uses http://localhost:11434
* const ollamaRemote = createAdapter('ollama', 'http://my-server:11434')
* ```
*/
@ -37,33 +39,42 @@ import type { LLMAdapter } from '../types.js'
* Additional providers can be integrated by implementing {@link LLMAdapter}
* directly and bypassing this factory.
*/
export type SupportedProvider = 'anthropic' | 'openai'
export type SupportedProvider = 'anthropic' | 'openai' | 'ollama'
/**
* Instantiate the appropriate {@link LLMAdapter} for the given provider.
*
* API keys fall back to the standard environment variables
* (`ANTHROPIC_API_KEY` / `OPENAI_API_KEY`) when not supplied explicitly.
* For `'anthropic'` and `'openai'`, the second argument is an API key that
* falls back to the standard environment variables (`ANTHROPIC_API_KEY` /
* `OPENAI_API_KEY`) when not supplied explicitly.
*
* For `'ollama'`, the second argument is the base URL of the Ollama server
* (e.g. `'http://localhost:11434'`). It falls back to the `OLLAMA_BASE_URL`
* environment variable, then `http://localhost:11434`.
*
* Adapters are imported lazily so that projects using only one provider
* are not forced to install the SDK for the other.
*
* @param provider - Which LLM provider to target.
* @param apiKey - Optional API key override; falls back to env var.
* @param provider - Which LLM provider to target.
* @param credential - API key (anthropic/openai) or base URL (ollama).
* @throws {Error} When the provider string is not recognised.
*/
export async function createAdapter(
provider: SupportedProvider,
apiKey?: string,
credential?: string,
): Promise<LLMAdapter> {
switch (provider) {
case 'anthropic': {
const { AnthropicAdapter } = await import('./anthropic.js')
return new AnthropicAdapter(apiKey)
return new AnthropicAdapter(credential)
}
case 'openai': {
const { OpenAIAdapter } = await import('./openai.js')
return new OpenAIAdapter(apiKey)
return new OpenAIAdapter(credential)
}
case 'ollama': {
const { OllamaAdapter } = await import('./ollama.js')
return new OllamaAdapter(credential)
}
default: {
// The `never` cast here makes TypeScript enforce exhaustiveness.

544
src/llm/ollama.ts Normal file
View File

@ -0,0 +1,544 @@
/// <reference types="node" />
/**
* @fileoverview Ollama adapter implementing {@link LLMAdapter}.
*
* Calls the Ollama HTTP API at `/api/chat` to run local models (Qwen, Llama,
* Mistral, etc.) without any external SDK. Uses the Node.js built-in `fetch`
* API (available since Node 18).
*
* Key mapping decisions:
*
* - Framework `tool_use` blocks in assistant messages Ollama `tool_calls`
* - Framework `tool_result` blocks in user messages Ollama `tool` role messages
* - System prompt in {@link LLMChatOptions} prepended `system` message
* - Ollama does not return IDs for tool calls; IDs are generated with
* `crypto.randomUUID()`
*
* Base URL resolution order:
* 1. `baseUrl` constructor argument
* 2. `OLLAMA_BASE_URL` environment variable
* 3. `http://localhost:11434`
*
* @example
* ```ts
* import { OllamaAdapter } from './ollama.js'
*
* const adapter = new OllamaAdapter()
* const response = await adapter.chat(messages, {
* model: 'qwen2.5',
* maxTokens: 1024,
* })
* ```
*/
import type {
ContentBlock,
LLMAdapter,
LLMChatOptions,
LLMMessage,
LLMResponse,
LLMStreamOptions,
LLMToolDef,
StreamEvent,
TextBlock,
ToolUseBlock,
} from '../types.js'
// ---------------------------------------------------------------------------
// Ollama wire types
// ---------------------------------------------------------------------------
interface OllamaToolCallFunction {
name: string
arguments: Record<string, unknown>
}
interface OllamaToolCall {
function: OllamaToolCallFunction
}
interface OllamaMessage {
role: 'user' | 'assistant' | 'system' | 'tool'
content: string
tool_calls?: OllamaToolCall[]
}
interface OllamaTool {
type: 'function'
function: {
name: string
description: string
parameters: Record<string, unknown>
}
}
interface OllamaOptions {
temperature?: number
num_predict?: number
}
interface OllamaChatRequest {
model: string
messages: OllamaMessage[]
tools?: OllamaTool[]
options?: OllamaOptions
stream: boolean
}
interface OllamaChatResponse {
model: string
created_at: string
message: OllamaMessage
done: boolean
done_reason?: string
prompt_eval_count?: number
eval_count?: number
}
// ---------------------------------------------------------------------------
// Internal helpers — framework → Ollama
// ---------------------------------------------------------------------------
/**
* Convert a framework {@link LLMToolDef} to an Ollama tool definition.
*/
function toOllamaTool(tool: LLMToolDef): OllamaTool {
return {
type: 'function',
function: {
name: tool.name,
description: tool.description,
parameters: tool.inputSchema as Record<string, unknown>,
},
}
}
/**
* Convert framework {@link LLMMessage} array (plus optional system prompt)
* into an Ollama messages array.
*
* - `tool_result` blocks become separate `tool`-role messages (one per block)
* - `tool_use` blocks in assistant messages become `tool_calls`
* - A system prompt is prepended as a `system`-role message
*/
function toOllamaMessages(
messages: LLMMessage[],
systemPrompt: string | undefined,
): OllamaMessage[] {
const result: OllamaMessage[] = []
if (systemPrompt !== undefined && systemPrompt.length > 0) {
result.push({ role: 'system', content: systemPrompt })
}
for (const msg of messages) {
if (msg.role === 'assistant') {
result.push(toOllamaAssistantMessage(msg))
} else {
// user role — split tool_result blocks out as separate `tool` messages
const toolResultBlocks = msg.content.filter((b) => b.type === 'tool_result')
const otherBlocks = msg.content.filter((b) => b.type !== 'tool_result')
if (otherBlocks.length > 0) {
const textContent = otherBlocks
.filter((b): b is TextBlock => b.type === 'text')
.map((b) => b.text)
.join('\n')
result.push({ role: 'user', content: textContent })
}
for (const block of toolResultBlocks) {
if (block.type === 'tool_result') {
result.push({ role: 'tool', content: block.content })
}
}
}
}
return result
}
/**
* Convert an `assistant`-role framework message to an Ollama assistant message.
*
* Any `tool_use` blocks become `tool_calls`; `text` blocks become the message
* content string.
*/
function toOllamaAssistantMessage(msg: LLMMessage): OllamaMessage {
const toolCalls: OllamaToolCall[] = []
const textParts: string[] = []
for (const block of msg.content) {
if (block.type === 'tool_use') {
toolCalls.push({
function: {
name: block.name,
arguments: block.input,
},
})
} else if (block.type === 'text') {
textParts.push(block.text)
}
}
const message: OllamaMessage = {
role: 'assistant',
content: textParts.join(''),
}
if (toolCalls.length > 0) {
message.tool_calls = toolCalls
}
return message
}
// ---------------------------------------------------------------------------
// Internal helpers — Ollama → framework
// ---------------------------------------------------------------------------
/**
* Parse tool call arguments from an Ollama response.
*
* Ollama returns `arguments` as a JSON object directly, but we defensively
* handle the case where it arrives as a serialised string (e.g. some older
* model releases).
*/
function parseToolArguments(args: unknown): Record<string, unknown> {
if (args !== null && typeof args === 'object' && !Array.isArray(args)) {
return args as Record<string, unknown>
}
if (typeof args === 'string') {
try {
const parsed: unknown = JSON.parse(args)
if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) {
return parsed as Record<string, unknown>
}
} catch {
// Malformed JSON — surface as empty object.
}
}
return {}
}
/**
* Normalise an Ollama `done_reason` string to the framework's canonical
* stop-reason vocabulary.
*
* Mapping:
* - `'stop'` `'end_turn'`
* - `'tool_calls'` `'tool_use'`
* - `'length'` `'max_tokens'`
* - anything else passed through unchanged
*/
function normalizeDoneReason(reason: string | undefined): string {
switch (reason) {
case 'stop': return 'end_turn'
case 'tool_calls': return 'tool_use'
case 'length': return 'max_tokens'
default: return reason ?? 'end_turn'
}
}
/**
* Convert an {@link OllamaChatResponse} into a framework {@link LLMResponse}.
*/
function fromOllamaResponse(data: OllamaChatResponse): LLMResponse {
const content: ContentBlock[] = []
const message = data.message
if (message.content.length > 0) {
const textBlock: TextBlock = { type: 'text', text: message.content }
content.push(textBlock)
}
for (const tc of message.tool_calls ?? []) {
const toolUseBlock: ToolUseBlock = {
type: 'tool_use',
id: crypto.randomUUID(),
name: tc.function.name,
input: parseToolArguments(tc.function.arguments),
}
content.push(toolUseBlock)
}
return {
id: crypto.randomUUID(),
content,
model: data.model,
stop_reason: normalizeDoneReason(data.done_reason),
usage: {
input_tokens: data.prompt_eval_count ?? 0,
output_tokens: data.eval_count ?? 0,
},
}
}
// ---------------------------------------------------------------------------
// Adapter implementation
// ---------------------------------------------------------------------------
/**
* LLM adapter backed by the Ollama HTTP API.
*
* Requires a locally-running Ollama instance. The default base URL is
* `http://localhost:11434`; override via the constructor or the
* `OLLAMA_BASE_URL` environment variable.
*
* No API key is required Ollama runs entirely on your own hardware.
*
* Thread-safe a single instance may be shared across concurrent agent runs.
*/
export class OllamaAdapter implements LLMAdapter {
readonly name = 'ollama'
readonly #baseUrl: string
constructor(baseUrl?: string) {
this.#baseUrl = (
baseUrl ?? process.env['OLLAMA_BASE_URL'] ?? 'http://localhost:11434'
).replace(/\/$/, '')
}
// -------------------------------------------------------------------------
// chat()
// -------------------------------------------------------------------------
/**
* Send a synchronous (non-streaming) chat request and return the complete
* {@link LLMResponse}.
*
* Throws an `Error` on non-2xx responses or network failures. Callers should
* catch and handle these (e.g. model not found, Ollama not running).
*/
async chat(messages: LLMMessage[], options: LLMChatOptions): Promise<LLMResponse> {
const ollamaMessages = toOllamaMessages(messages, options.systemPrompt)
const body: OllamaChatRequest = {
model: options.model,
messages: ollamaMessages,
stream: false,
}
if (options.tools !== undefined && options.tools.length > 0) {
body.tools = options.tools.map(toOllamaTool)
}
const ollamaOptions: OllamaOptions = {}
if (options.temperature !== undefined) ollamaOptions.temperature = options.temperature
if (options.maxTokens !== undefined) ollamaOptions.num_predict = options.maxTokens
if (Object.keys(ollamaOptions).length > 0) body.options = ollamaOptions
const response = await fetch(`${this.#baseUrl}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
signal: options.abortSignal,
})
if (!response.ok) {
const errorText = await response.text().catch(() => response.statusText)
throw new Error(`Ollama API error ${response.status}: ${errorText}`)
}
const data: OllamaChatResponse = (await response.json()) as OllamaChatResponse
return fromOllamaResponse(data)
}
// -------------------------------------------------------------------------
// stream()
// -------------------------------------------------------------------------
/**
* Send a streaming chat request and yield {@link StreamEvent}s incrementally.
*
* Ollama streams responses as NDJSON (newline-delimited JSON). Text deltas
* are emitted immediately; tool calls (when present) are accumulated and
* emitted after the stream ends, matching the contract of other adapters.
*
* Sequence guarantees:
* - Zero or more `text` events (incremental text deltas)
* - Zero or more `tool_use` events (emitted once per tool call, after stream ends)
* - Exactly one terminal event: `done` or `error`
*/
async *stream(
messages: LLMMessage[],
options: LLMStreamOptions,
): AsyncIterable<StreamEvent> {
const ollamaMessages = toOllamaMessages(messages, options.systemPrompt)
const body: OllamaChatRequest = {
model: options.model,
messages: ollamaMessages,
stream: true,
}
if (options.tools !== undefined && options.tools.length > 0) {
body.tools = options.tools.map(toOllamaTool)
}
const ollamaOptions: OllamaOptions = {}
if (options.temperature !== undefined) ollamaOptions.temperature = options.temperature
if (options.maxTokens !== undefined) ollamaOptions.num_predict = options.maxTokens
if (Object.keys(ollamaOptions).length > 0) body.options = ollamaOptions
try {
const response = await fetch(`${this.#baseUrl}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
signal: options.abortSignal,
})
if (!response.ok) {
const errorText = await response.text().catch(() => response.statusText)
throw new Error(`Ollama API error ${response.status}: ${errorText}`)
}
if (response.body === null) {
throw new Error('Ollama streaming response has no body')
}
// Accumulate state across chunks.
let responseModel = options.model
let doneReason: string | undefined
let inputTokens = 0
let outputTokens = 0
let fullText = ''
const toolCallBuffers: OllamaToolCall[] = []
// Read the NDJSON stream line by line.
const reader = response.body.getReader()
const decoder = new TextDecoder()
let lineBuffer = ''
try {
while (true) {
const { done, value } = await reader.read()
if (done) break
lineBuffer += decoder.decode(value, { stream: true })
const lines = lineBuffer.split('\n')
// Keep the last (possibly incomplete) line in the buffer.
lineBuffer = lines.pop() ?? ''
for (const line of lines) {
const event = parseStreamChunk(line)
if (event === null) continue
responseModel = event.model
if (event.message.content.length > 0) {
fullText += event.message.content
const textEvent: StreamEvent = { type: 'text', data: event.message.content }
yield textEvent
}
if (event.message.tool_calls !== undefined) {
for (const tc of event.message.tool_calls) {
toolCallBuffers.push(tc)
}
}
if (event.done) {
doneReason = event.done_reason
inputTokens = event.prompt_eval_count ?? 0
outputTokens = event.eval_count ?? 0
}
}
}
// Handle any remaining data left in the line buffer after EOF.
const finalEvent = parseStreamChunk(lineBuffer)
if (finalEvent !== null) {
responseModel = finalEvent.model
if (finalEvent.message.content.length > 0) {
fullText += finalEvent.message.content
const textEvent: StreamEvent = { type: 'text', data: finalEvent.message.content }
yield textEvent
}
if (finalEvent.message.tool_calls !== undefined) {
for (const tc of finalEvent.message.tool_calls) {
toolCallBuffers.push(tc)
}
}
if (finalEvent.done) {
doneReason = finalEvent.done_reason
inputTokens = finalEvent.prompt_eval_count ?? 0
outputTokens = finalEvent.eval_count ?? 0
}
}
} finally {
reader.releaseLock()
}
// Emit accumulated tool_use events after the stream ends.
const finalToolUseBlocks: ToolUseBlock[] = []
for (const tc of toolCallBuffers) {
const toolUseBlock: ToolUseBlock = {
type: 'tool_use',
id: crypto.randomUUID(),
name: tc.function.name,
input: parseToolArguments(tc.function.arguments),
}
finalToolUseBlocks.push(toolUseBlock)
const toolUseEvent: StreamEvent = { type: 'tool_use', data: toolUseBlock }
yield toolUseEvent
}
// Build the complete content array for the done response.
const doneContent: ContentBlock[] = []
if (fullText.length > 0) {
const textBlock: TextBlock = { type: 'text', text: fullText }
doneContent.push(textBlock)
}
doneContent.push(...finalToolUseBlocks)
const finalResponse: LLMResponse = {
id: crypto.randomUUID(),
content: doneContent,
model: responseModel,
stop_reason: normalizeDoneReason(doneReason),
usage: { input_tokens: inputTokens, output_tokens: outputTokens },
}
const doneEvent: StreamEvent = { type: 'done', data: finalResponse }
yield doneEvent
} catch (err) {
const error = err instanceof Error ? err : new Error(String(err))
const errorEvent: StreamEvent = { type: 'error', data: error }
yield errorEvent
}
}
}
// ---------------------------------------------------------------------------
// Private utility
// ---------------------------------------------------------------------------
/**
* Parse a single NDJSON line from the Ollama streaming response.
* Returns `null` for empty or unparseable lines.
*/
function parseStreamChunk(line: string): OllamaChatResponse | null {
const trimmed = line.trim()
if (trimmed.length === 0) return null
try {
return JSON.parse(trimmed) as OllamaChatResponse
} catch {
return null
}
}
// Re-export types that consumers of this module commonly need alongside the adapter.
export type {
ContentBlock,
LLMAdapter,
LLMChatOptions,
LLMMessage,
LLMResponse,
LLMStreamOptions,
LLMToolDef,
StreamEvent,
}

View File

@ -186,7 +186,7 @@ export interface ToolDefinition<TInput = Record<string, unknown>> {
export interface AgentConfig {
readonly name: string
readonly model: string
readonly provider?: 'anthropic' | 'openai'
readonly provider?: 'anthropic' | 'openai' | 'ollama'
readonly systemPrompt?: string
/** Names of tools (from the tool registry) available to this agent. */
readonly tools?: readonly string[]
@ -285,7 +285,7 @@ export interface OrchestratorEvent {
export interface OrchestratorConfig {
readonly maxConcurrency?: number
readonly defaultModel?: string
readonly defaultProvider?: 'anthropic' | 'openai'
readonly defaultProvider?: 'anthropic' | 'openai' | 'ollama'
onProgress?: (event: OrchestratorEvent) => void
}