feat: add OllamaAdapter with streaming, tool calling, and factory wiring
Agent-Logs-Url: https://github.com/m-prunty/open-multi-agent/sessions/1057f1b1-f24b-4363-8cdb-ab9188e5a262 Co-authored-by: m-prunty <27181505+m-prunty@users.noreply.github.com>
This commit is contained in:
parent
ad317610ff
commit
7bc23521f4
|
|
@ -105,6 +105,7 @@ export {
|
||||||
|
|
||||||
export { createAdapter } from './llm/adapter.js'
|
export { createAdapter } from './llm/adapter.js'
|
||||||
export type { SupportedProvider } from './llm/adapter.js'
|
export type { SupportedProvider } from './llm/adapter.js'
|
||||||
|
export { OllamaAdapter } from './llm/ollama.js'
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Memory
|
// Memory
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,8 @@
|
||||||
*
|
*
|
||||||
* const anthropic = createAdapter('anthropic')
|
* const anthropic = createAdapter('anthropic')
|
||||||
* const openai = createAdapter('openai', process.env.OPENAI_API_KEY)
|
* const openai = createAdapter('openai', process.env.OPENAI_API_KEY)
|
||||||
|
* const ollama = createAdapter('ollama') // uses http://localhost:11434
|
||||||
|
* const ollamaRemote = createAdapter('ollama', 'http://my-server:11434')
|
||||||
* ```
|
* ```
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
@ -37,33 +39,42 @@ import type { LLMAdapter } from '../types.js'
|
||||||
* Additional providers can be integrated by implementing {@link LLMAdapter}
|
* Additional providers can be integrated by implementing {@link LLMAdapter}
|
||||||
* directly and bypassing this factory.
|
* directly and bypassing this factory.
|
||||||
*/
|
*/
|
||||||
export type SupportedProvider = 'anthropic' | 'openai'
|
export type SupportedProvider = 'anthropic' | 'openai' | 'ollama'
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instantiate the appropriate {@link LLMAdapter} for the given provider.
|
* Instantiate the appropriate {@link LLMAdapter} for the given provider.
|
||||||
*
|
*
|
||||||
* API keys fall back to the standard environment variables
|
* For `'anthropic'` and `'openai'`, the second argument is an API key that
|
||||||
* (`ANTHROPIC_API_KEY` / `OPENAI_API_KEY`) when not supplied explicitly.
|
* falls back to the standard environment variables (`ANTHROPIC_API_KEY` /
|
||||||
|
* `OPENAI_API_KEY`) when not supplied explicitly.
|
||||||
|
*
|
||||||
|
* For `'ollama'`, the second argument is the base URL of the Ollama server
|
||||||
|
* (e.g. `'http://localhost:11434'`). It falls back to the `OLLAMA_BASE_URL`
|
||||||
|
* environment variable, then `http://localhost:11434`.
|
||||||
*
|
*
|
||||||
* Adapters are imported lazily so that projects using only one provider
|
* Adapters are imported lazily so that projects using only one provider
|
||||||
* are not forced to install the SDK for the other.
|
* are not forced to install the SDK for the other.
|
||||||
*
|
*
|
||||||
* @param provider - Which LLM provider to target.
|
* @param provider - Which LLM provider to target.
|
||||||
* @param apiKey - Optional API key override; falls back to env var.
|
* @param credential - API key (anthropic/openai) or base URL (ollama).
|
||||||
* @throws {Error} When the provider string is not recognised.
|
* @throws {Error} When the provider string is not recognised.
|
||||||
*/
|
*/
|
||||||
export async function createAdapter(
|
export async function createAdapter(
|
||||||
provider: SupportedProvider,
|
provider: SupportedProvider,
|
||||||
apiKey?: string,
|
credential?: string,
|
||||||
): Promise<LLMAdapter> {
|
): Promise<LLMAdapter> {
|
||||||
switch (provider) {
|
switch (provider) {
|
||||||
case 'anthropic': {
|
case 'anthropic': {
|
||||||
const { AnthropicAdapter } = await import('./anthropic.js')
|
const { AnthropicAdapter } = await import('./anthropic.js')
|
||||||
return new AnthropicAdapter(apiKey)
|
return new AnthropicAdapter(credential)
|
||||||
}
|
}
|
||||||
case 'openai': {
|
case 'openai': {
|
||||||
const { OpenAIAdapter } = await import('./openai.js')
|
const { OpenAIAdapter } = await import('./openai.js')
|
||||||
return new OpenAIAdapter(apiKey)
|
return new OpenAIAdapter(credential)
|
||||||
|
}
|
||||||
|
case 'ollama': {
|
||||||
|
const { OllamaAdapter } = await import('./ollama.js')
|
||||||
|
return new OllamaAdapter(credential)
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
// The `never` cast here makes TypeScript enforce exhaustiveness.
|
// The `never` cast here makes TypeScript enforce exhaustiveness.
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,544 @@
|
||||||
|
/// <reference types="node" />
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @fileoverview Ollama adapter implementing {@link LLMAdapter}.
|
||||||
|
*
|
||||||
|
* Calls the Ollama HTTP API at `/api/chat` to run local models (Qwen, Llama,
|
||||||
|
* Mistral, etc.) without any external SDK. Uses the Node.js built-in `fetch`
|
||||||
|
* API (available since Node 18).
|
||||||
|
*
|
||||||
|
* Key mapping decisions:
|
||||||
|
*
|
||||||
|
* - Framework `tool_use` blocks in assistant messages → Ollama `tool_calls`
|
||||||
|
* - Framework `tool_result` blocks in user messages → Ollama `tool` role messages
|
||||||
|
* - System prompt in {@link LLMChatOptions} → prepended `system` message
|
||||||
|
* - Ollama does not return IDs for tool calls; → IDs are generated with
|
||||||
|
* `crypto.randomUUID()`
|
||||||
|
*
|
||||||
|
* Base URL resolution order:
|
||||||
|
* 1. `baseUrl` constructor argument
|
||||||
|
* 2. `OLLAMA_BASE_URL` environment variable
|
||||||
|
* 3. `http://localhost:11434`
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* ```ts
|
||||||
|
* import { OllamaAdapter } from './ollama.js'
|
||||||
|
*
|
||||||
|
* const adapter = new OllamaAdapter()
|
||||||
|
* const response = await adapter.chat(messages, {
|
||||||
|
* model: 'qwen2.5',
|
||||||
|
* maxTokens: 1024,
|
||||||
|
* })
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type {
|
||||||
|
ContentBlock,
|
||||||
|
LLMAdapter,
|
||||||
|
LLMChatOptions,
|
||||||
|
LLMMessage,
|
||||||
|
LLMResponse,
|
||||||
|
LLMStreamOptions,
|
||||||
|
LLMToolDef,
|
||||||
|
StreamEvent,
|
||||||
|
TextBlock,
|
||||||
|
ToolUseBlock,
|
||||||
|
} from '../types.js'
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Ollama wire types
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
interface OllamaToolCallFunction {
|
||||||
|
name: string
|
||||||
|
arguments: Record<string, unknown>
|
||||||
|
}
|
||||||
|
|
||||||
|
interface OllamaToolCall {
|
||||||
|
function: OllamaToolCallFunction
|
||||||
|
}
|
||||||
|
|
||||||
|
interface OllamaMessage {
|
||||||
|
role: 'user' | 'assistant' | 'system' | 'tool'
|
||||||
|
content: string
|
||||||
|
tool_calls?: OllamaToolCall[]
|
||||||
|
}
|
||||||
|
|
||||||
|
interface OllamaTool {
|
||||||
|
type: 'function'
|
||||||
|
function: {
|
||||||
|
name: string
|
||||||
|
description: string
|
||||||
|
parameters: Record<string, unknown>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interface OllamaOptions {
|
||||||
|
temperature?: number
|
||||||
|
num_predict?: number
|
||||||
|
}
|
||||||
|
|
||||||
|
interface OllamaChatRequest {
|
||||||
|
model: string
|
||||||
|
messages: OllamaMessage[]
|
||||||
|
tools?: OllamaTool[]
|
||||||
|
options?: OllamaOptions
|
||||||
|
stream: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
interface OllamaChatResponse {
|
||||||
|
model: string
|
||||||
|
created_at: string
|
||||||
|
message: OllamaMessage
|
||||||
|
done: boolean
|
||||||
|
done_reason?: string
|
||||||
|
prompt_eval_count?: number
|
||||||
|
eval_count?: number
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Internal helpers — framework → Ollama
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a framework {@link LLMToolDef} to an Ollama tool definition.
|
||||||
|
*/
|
||||||
|
function toOllamaTool(tool: LLMToolDef): OllamaTool {
|
||||||
|
return {
|
||||||
|
type: 'function',
|
||||||
|
function: {
|
||||||
|
name: tool.name,
|
||||||
|
description: tool.description,
|
||||||
|
parameters: tool.inputSchema as Record<string, unknown>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert framework {@link LLMMessage} array (plus optional system prompt)
|
||||||
|
* into an Ollama messages array.
|
||||||
|
*
|
||||||
|
* - `tool_result` blocks become separate `tool`-role messages (one per block)
|
||||||
|
* - `tool_use` blocks in assistant messages become `tool_calls`
|
||||||
|
* - A system prompt is prepended as a `system`-role message
|
||||||
|
*/
|
||||||
|
function toOllamaMessages(
|
||||||
|
messages: LLMMessage[],
|
||||||
|
systemPrompt: string | undefined,
|
||||||
|
): OllamaMessage[] {
|
||||||
|
const result: OllamaMessage[] = []
|
||||||
|
|
||||||
|
if (systemPrompt !== undefined && systemPrompt.length > 0) {
|
||||||
|
result.push({ role: 'system', content: systemPrompt })
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const msg of messages) {
|
||||||
|
if (msg.role === 'assistant') {
|
||||||
|
result.push(toOllamaAssistantMessage(msg))
|
||||||
|
} else {
|
||||||
|
// user role — split tool_result blocks out as separate `tool` messages
|
||||||
|
const toolResultBlocks = msg.content.filter((b) => b.type === 'tool_result')
|
||||||
|
const otherBlocks = msg.content.filter((b) => b.type !== 'tool_result')
|
||||||
|
|
||||||
|
if (otherBlocks.length > 0) {
|
||||||
|
const textContent = otherBlocks
|
||||||
|
.filter((b): b is TextBlock => b.type === 'text')
|
||||||
|
.map((b) => b.text)
|
||||||
|
.join('\n')
|
||||||
|
result.push({ role: 'user', content: textContent })
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const block of toolResultBlocks) {
|
||||||
|
if (block.type === 'tool_result') {
|
||||||
|
result.push({ role: 'tool', content: block.content })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert an `assistant`-role framework message to an Ollama assistant message.
|
||||||
|
*
|
||||||
|
* Any `tool_use` blocks become `tool_calls`; `text` blocks become the message
|
||||||
|
* content string.
|
||||||
|
*/
|
||||||
|
function toOllamaAssistantMessage(msg: LLMMessage): OllamaMessage {
|
||||||
|
const toolCalls: OllamaToolCall[] = []
|
||||||
|
const textParts: string[] = []
|
||||||
|
|
||||||
|
for (const block of msg.content) {
|
||||||
|
if (block.type === 'tool_use') {
|
||||||
|
toolCalls.push({
|
||||||
|
function: {
|
||||||
|
name: block.name,
|
||||||
|
arguments: block.input,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
} else if (block.type === 'text') {
|
||||||
|
textParts.push(block.text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const message: OllamaMessage = {
|
||||||
|
role: 'assistant',
|
||||||
|
content: textParts.join(''),
|
||||||
|
}
|
||||||
|
|
||||||
|
if (toolCalls.length > 0) {
|
||||||
|
message.tool_calls = toolCalls
|
||||||
|
}
|
||||||
|
|
||||||
|
return message
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Internal helpers — Ollama → framework
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse tool call arguments from an Ollama response.
|
||||||
|
*
|
||||||
|
* Ollama returns `arguments` as a JSON object directly, but we defensively
|
||||||
|
* handle the case where it arrives as a serialised string (e.g. some older
|
||||||
|
* model releases).
|
||||||
|
*/
|
||||||
|
function parseToolArguments(args: unknown): Record<string, unknown> {
|
||||||
|
if (args !== null && typeof args === 'object' && !Array.isArray(args)) {
|
||||||
|
return args as Record<string, unknown>
|
||||||
|
}
|
||||||
|
if (typeof args === 'string') {
|
||||||
|
try {
|
||||||
|
const parsed: unknown = JSON.parse(args)
|
||||||
|
if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
||||||
|
return parsed as Record<string, unknown>
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Malformed JSON — surface as empty object.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalise an Ollama `done_reason` string to the framework's canonical
|
||||||
|
* stop-reason vocabulary.
|
||||||
|
*
|
||||||
|
* Mapping:
|
||||||
|
* - `'stop'` → `'end_turn'`
|
||||||
|
* - `'tool_calls'` → `'tool_use'`
|
||||||
|
* - `'length'` → `'max_tokens'`
|
||||||
|
* - anything else → passed through unchanged
|
||||||
|
*/
|
||||||
|
function normalizeDoneReason(reason: string | undefined): string {
|
||||||
|
switch (reason) {
|
||||||
|
case 'stop': return 'end_turn'
|
||||||
|
case 'tool_calls': return 'tool_use'
|
||||||
|
case 'length': return 'max_tokens'
|
||||||
|
default: return reason ?? 'end_turn'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert an {@link OllamaChatResponse} into a framework {@link LLMResponse}.
|
||||||
|
*/
|
||||||
|
function fromOllamaResponse(data: OllamaChatResponse): LLMResponse {
|
||||||
|
const content: ContentBlock[] = []
|
||||||
|
const message = data.message
|
||||||
|
|
||||||
|
if (message.content.length > 0) {
|
||||||
|
const textBlock: TextBlock = { type: 'text', text: message.content }
|
||||||
|
content.push(textBlock)
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const tc of message.tool_calls ?? []) {
|
||||||
|
const toolUseBlock: ToolUseBlock = {
|
||||||
|
type: 'tool_use',
|
||||||
|
id: crypto.randomUUID(),
|
||||||
|
name: tc.function.name,
|
||||||
|
input: parseToolArguments(tc.function.arguments),
|
||||||
|
}
|
||||||
|
content.push(toolUseBlock)
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: crypto.randomUUID(),
|
||||||
|
content,
|
||||||
|
model: data.model,
|
||||||
|
stop_reason: normalizeDoneReason(data.done_reason),
|
||||||
|
usage: {
|
||||||
|
input_tokens: data.prompt_eval_count ?? 0,
|
||||||
|
output_tokens: data.eval_count ?? 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Adapter implementation
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* LLM adapter backed by the Ollama HTTP API.
|
||||||
|
*
|
||||||
|
* Requires a locally-running Ollama instance. The default base URL is
|
||||||
|
* `http://localhost:11434`; override via the constructor or the
|
||||||
|
* `OLLAMA_BASE_URL` environment variable.
|
||||||
|
*
|
||||||
|
* No API key is required — Ollama runs entirely on your own hardware.
|
||||||
|
*
|
||||||
|
* Thread-safe — a single instance may be shared across concurrent agent runs.
|
||||||
|
*/
|
||||||
|
export class OllamaAdapter implements LLMAdapter {
|
||||||
|
readonly name = 'ollama'
|
||||||
|
|
||||||
|
readonly #baseUrl: string
|
||||||
|
|
||||||
|
constructor(baseUrl?: string) {
|
||||||
|
this.#baseUrl = (
|
||||||
|
baseUrl ?? process.env['OLLAMA_BASE_URL'] ?? 'http://localhost:11434'
|
||||||
|
).replace(/\/$/, '')
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// chat()
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Send a synchronous (non-streaming) chat request and return the complete
|
||||||
|
* {@link LLMResponse}.
|
||||||
|
*
|
||||||
|
* Throws an `Error` on non-2xx responses or network failures. Callers should
|
||||||
|
* catch and handle these (e.g. model not found, Ollama not running).
|
||||||
|
*/
|
||||||
|
async chat(messages: LLMMessage[], options: LLMChatOptions): Promise<LLMResponse> {
|
||||||
|
const ollamaMessages = toOllamaMessages(messages, options.systemPrompt)
|
||||||
|
|
||||||
|
const body: OllamaChatRequest = {
|
||||||
|
model: options.model,
|
||||||
|
messages: ollamaMessages,
|
||||||
|
stream: false,
|
||||||
|
}
|
||||||
|
|
||||||
|
if (options.tools !== undefined && options.tools.length > 0) {
|
||||||
|
body.tools = options.tools.map(toOllamaTool)
|
||||||
|
}
|
||||||
|
|
||||||
|
const ollamaOptions: OllamaOptions = {}
|
||||||
|
if (options.temperature !== undefined) ollamaOptions.temperature = options.temperature
|
||||||
|
if (options.maxTokens !== undefined) ollamaOptions.num_predict = options.maxTokens
|
||||||
|
if (Object.keys(ollamaOptions).length > 0) body.options = ollamaOptions
|
||||||
|
|
||||||
|
const response = await fetch(`${this.#baseUrl}/api/chat`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
signal: options.abortSignal,
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorText = await response.text().catch(() => response.statusText)
|
||||||
|
throw new Error(`Ollama API error ${response.status}: ${errorText}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
const data: OllamaChatResponse = (await response.json()) as OllamaChatResponse
|
||||||
|
return fromOllamaResponse(data)
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// stream()
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Send a streaming chat request and yield {@link StreamEvent}s incrementally.
|
||||||
|
*
|
||||||
|
* Ollama streams responses as NDJSON (newline-delimited JSON). Text deltas
|
||||||
|
* are emitted immediately; tool calls (when present) are accumulated and
|
||||||
|
* emitted after the stream ends, matching the contract of other adapters.
|
||||||
|
*
|
||||||
|
* Sequence guarantees:
|
||||||
|
* - Zero or more `text` events (incremental text deltas)
|
||||||
|
* - Zero or more `tool_use` events (emitted once per tool call, after stream ends)
|
||||||
|
* - Exactly one terminal event: `done` or `error`
|
||||||
|
*/
|
||||||
|
async *stream(
|
||||||
|
messages: LLMMessage[],
|
||||||
|
options: LLMStreamOptions,
|
||||||
|
): AsyncIterable<StreamEvent> {
|
||||||
|
const ollamaMessages = toOllamaMessages(messages, options.systemPrompt)
|
||||||
|
|
||||||
|
const body: OllamaChatRequest = {
|
||||||
|
model: options.model,
|
||||||
|
messages: ollamaMessages,
|
||||||
|
stream: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
if (options.tools !== undefined && options.tools.length > 0) {
|
||||||
|
body.tools = options.tools.map(toOllamaTool)
|
||||||
|
}
|
||||||
|
|
||||||
|
const ollamaOptions: OllamaOptions = {}
|
||||||
|
if (options.temperature !== undefined) ollamaOptions.temperature = options.temperature
|
||||||
|
if (options.maxTokens !== undefined) ollamaOptions.num_predict = options.maxTokens
|
||||||
|
if (Object.keys(ollamaOptions).length > 0) body.options = ollamaOptions
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${this.#baseUrl}/api/chat`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
signal: options.abortSignal,
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorText = await response.text().catch(() => response.statusText)
|
||||||
|
throw new Error(`Ollama API error ${response.status}: ${errorText}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (response.body === null) {
|
||||||
|
throw new Error('Ollama streaming response has no body')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Accumulate state across chunks.
|
||||||
|
let responseModel = options.model
|
||||||
|
let doneReason: string | undefined
|
||||||
|
let inputTokens = 0
|
||||||
|
let outputTokens = 0
|
||||||
|
let fullText = ''
|
||||||
|
const toolCallBuffers: OllamaToolCall[] = []
|
||||||
|
|
||||||
|
// Read the NDJSON stream line by line.
|
||||||
|
const reader = response.body.getReader()
|
||||||
|
const decoder = new TextDecoder()
|
||||||
|
let lineBuffer = ''
|
||||||
|
|
||||||
|
try {
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read()
|
||||||
|
if (done) break
|
||||||
|
|
||||||
|
lineBuffer += decoder.decode(value, { stream: true })
|
||||||
|
const lines = lineBuffer.split('\n')
|
||||||
|
// Keep the last (possibly incomplete) line in the buffer.
|
||||||
|
lineBuffer = lines.pop() ?? ''
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
const event = parseStreamChunk(line)
|
||||||
|
if (event === null) continue
|
||||||
|
|
||||||
|
responseModel = event.model
|
||||||
|
|
||||||
|
if (event.message.content.length > 0) {
|
||||||
|
fullText += event.message.content
|
||||||
|
const textEvent: StreamEvent = { type: 'text', data: event.message.content }
|
||||||
|
yield textEvent
|
||||||
|
}
|
||||||
|
|
||||||
|
if (event.message.tool_calls !== undefined) {
|
||||||
|
for (const tc of event.message.tool_calls) {
|
||||||
|
toolCallBuffers.push(tc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (event.done) {
|
||||||
|
doneReason = event.done_reason
|
||||||
|
inputTokens = event.prompt_eval_count ?? 0
|
||||||
|
outputTokens = event.eval_count ?? 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle any remaining data left in the line buffer after EOF.
|
||||||
|
const finalEvent = parseStreamChunk(lineBuffer)
|
||||||
|
if (finalEvent !== null) {
|
||||||
|
responseModel = finalEvent.model
|
||||||
|
if (finalEvent.message.content.length > 0) {
|
||||||
|
fullText += finalEvent.message.content
|
||||||
|
const textEvent: StreamEvent = { type: 'text', data: finalEvent.message.content }
|
||||||
|
yield textEvent
|
||||||
|
}
|
||||||
|
if (finalEvent.message.tool_calls !== undefined) {
|
||||||
|
for (const tc of finalEvent.message.tool_calls) {
|
||||||
|
toolCallBuffers.push(tc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (finalEvent.done) {
|
||||||
|
doneReason = finalEvent.done_reason
|
||||||
|
inputTokens = finalEvent.prompt_eval_count ?? 0
|
||||||
|
outputTokens = finalEvent.eval_count ?? 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
reader.releaseLock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit accumulated tool_use events after the stream ends.
|
||||||
|
const finalToolUseBlocks: ToolUseBlock[] = []
|
||||||
|
for (const tc of toolCallBuffers) {
|
||||||
|
const toolUseBlock: ToolUseBlock = {
|
||||||
|
type: 'tool_use',
|
||||||
|
id: crypto.randomUUID(),
|
||||||
|
name: tc.function.name,
|
||||||
|
input: parseToolArguments(tc.function.arguments),
|
||||||
|
}
|
||||||
|
finalToolUseBlocks.push(toolUseBlock)
|
||||||
|
const toolUseEvent: StreamEvent = { type: 'tool_use', data: toolUseBlock }
|
||||||
|
yield toolUseEvent
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the complete content array for the done response.
|
||||||
|
const doneContent: ContentBlock[] = []
|
||||||
|
if (fullText.length > 0) {
|
||||||
|
const textBlock: TextBlock = { type: 'text', text: fullText }
|
||||||
|
doneContent.push(textBlock)
|
||||||
|
}
|
||||||
|
doneContent.push(...finalToolUseBlocks)
|
||||||
|
|
||||||
|
const finalResponse: LLMResponse = {
|
||||||
|
id: crypto.randomUUID(),
|
||||||
|
content: doneContent,
|
||||||
|
model: responseModel,
|
||||||
|
stop_reason: normalizeDoneReason(doneReason),
|
||||||
|
usage: { input_tokens: inputTokens, output_tokens: outputTokens },
|
||||||
|
}
|
||||||
|
|
||||||
|
const doneEvent: StreamEvent = { type: 'done', data: finalResponse }
|
||||||
|
yield doneEvent
|
||||||
|
} catch (err) {
|
||||||
|
const error = err instanceof Error ? err : new Error(String(err))
|
||||||
|
const errorEvent: StreamEvent = { type: 'error', data: error }
|
||||||
|
yield errorEvent
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Private utility
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse a single NDJSON line from the Ollama streaming response.
|
||||||
|
* Returns `null` for empty or unparseable lines.
|
||||||
|
*/
|
||||||
|
function parseStreamChunk(line: string): OllamaChatResponse | null {
|
||||||
|
const trimmed = line.trim()
|
||||||
|
if (trimmed.length === 0) return null
|
||||||
|
try {
|
||||||
|
return JSON.parse(trimmed) as OllamaChatResponse
|
||||||
|
} catch {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-export types that consumers of this module commonly need alongside the adapter.
|
||||||
|
export type {
|
||||||
|
ContentBlock,
|
||||||
|
LLMAdapter,
|
||||||
|
LLMChatOptions,
|
||||||
|
LLMMessage,
|
||||||
|
LLMResponse,
|
||||||
|
LLMStreamOptions,
|
||||||
|
LLMToolDef,
|
||||||
|
StreamEvent,
|
||||||
|
}
|
||||||
|
|
@ -186,7 +186,7 @@ export interface ToolDefinition<TInput = Record<string, unknown>> {
|
||||||
export interface AgentConfig {
|
export interface AgentConfig {
|
||||||
readonly name: string
|
readonly name: string
|
||||||
readonly model: string
|
readonly model: string
|
||||||
readonly provider?: 'anthropic' | 'openai'
|
readonly provider?: 'anthropic' | 'openai' | 'ollama'
|
||||||
readonly systemPrompt?: string
|
readonly systemPrompt?: string
|
||||||
/** Names of tools (from the tool registry) available to this agent. */
|
/** Names of tools (from the tool registry) available to this agent. */
|
||||||
readonly tools?: readonly string[]
|
readonly tools?: readonly string[]
|
||||||
|
|
@ -285,7 +285,7 @@ export interface OrchestratorEvent {
|
||||||
export interface OrchestratorConfig {
|
export interface OrchestratorConfig {
|
||||||
readonly maxConcurrency?: number
|
readonly maxConcurrency?: number
|
||||||
readonly defaultModel?: string
|
readonly defaultModel?: string
|
||||||
readonly defaultProvider?: 'anthropic' | 'openai'
|
readonly defaultProvider?: 'anthropic' | 'openai' | 'ollama'
|
||||||
onProgress?: (event: OrchestratorEvent) => void
|
onProgress?: (event: OrchestratorEvent) => void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue