Phase 1B: Add vLLM adapter with shared OpenAI-compat helpers

- Extract shared OpenAI-format conversion helpers to openai-compat.ts
- Refactor OpenAIAdapter to import from shared module
- Create VLLMAdapter class reusing openai-compat helpers
- Add VLLMConfig type and 'vllm' to SupportedProvider union
- Update createAdapter() to accept VLLMConfig for vllm provider
- Export VLLMAdapter and VLLMConfig from public API

https://claude.ai/code/session_012cMotoivyjuMwbrnDo6YRg
This commit is contained in:
Claude 2026-04-01 00:57:30 +00:00
parent fb97f273a4
commit cadac55ac7
No known key found for this signature in database
6 changed files with 557 additions and 292 deletions

View File

@ -108,6 +108,7 @@ export {
export { createAdapter } from './llm/adapter.js'
export type { SupportedProvider } from './llm/adapter.js'
export { VLLMAdapter } from './llm/vllm.js'
// ---------------------------------------------------------------------------
// Memory
@ -166,4 +167,7 @@ export type {
// Memory
MemoryEntry,
MemoryStore,
// vLLM
VLLMConfig,
} from './types.js'

View File

@ -11,6 +11,7 @@
*
* const anthropic = createAdapter('anthropic')
* const openai = createAdapter('openai', process.env.OPENAI_API_KEY)
* const vllm = createAdapter('vllm', { baseURL: 'http://localhost:8000/v1', model: 'llama3' })
* ```
*/
@ -28,45 +29,59 @@ export type {
ToolUseBlock,
ToolResultBlock,
ImageBlock,
VLLMConfig,
} from '../types.js'
import type { LLMAdapter } from '../types.js'
import type { LLMAdapter, VLLMConfig } from '../types.js'
/**
* The set of LLM providers supported out of the box.
* Additional providers can be integrated by implementing {@link LLMAdapter}
* directly and bypassing this factory.
*/
export type SupportedProvider = 'anthropic' | 'openai'
export type SupportedProvider = 'anthropic' | 'openai' | 'vllm'
/**
* Instantiate the appropriate {@link LLMAdapter} for the given provider.
*
* API keys fall back to the standard environment variables
* (`ANTHROPIC_API_KEY` / `OPENAI_API_KEY`) when not supplied explicitly.
* For `'anthropic'` and `'openai'`, the second argument is an optional API key
* string (falls back to `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` env vars).
*
* For `'vllm'`, the second argument must be a {@link VLLMConfig} object.
*
* Adapters are imported lazily so that projects using only one provider
* are not forced to install the SDK for the other.
*
* @param provider - Which LLM provider to target.
* @param apiKey - Optional API key override; falls back to env var.
* @param config - API key string (for anthropic/openai) or VLLMConfig (for vllm).
* @throws {Error} When the provider string is not recognised.
*/
export async function createAdapter(
provider: SupportedProvider,
apiKey?: string,
config?: string | VLLMConfig,
): Promise<LLMAdapter> {
switch (provider) {
case 'anthropic': {
const { AnthropicAdapter } = await import('./anthropic.js')
const apiKey = typeof config === 'string' ? config : undefined
return new AnthropicAdapter(apiKey)
}
case 'openai': {
const { OpenAIAdapter } = await import('./openai.js')
const apiKey = typeof config === 'string' ? config : undefined
return new OpenAIAdapter(apiKey)
}
case 'vllm': {
const { VLLMAdapter } = await import('./vllm.js')
if (typeof config === 'object' && config !== null && 'baseURL' in config) {
return new VLLMAdapter(config as VLLMConfig)
}
throw new Error(
'createAdapter("vllm") requires a VLLMConfig object as the second argument ' +
'(e.g. { baseURL: "http://localhost:8000/v1", model: "llama3" }).',
)
}
default: {
// The `never` cast here makes TypeScript enforce exhaustiveness.
const _exhaustive: never = provider
throw new Error(`Unsupported LLM provider: ${String(_exhaustive)}`)
}

252
src/llm/openai-compat.ts Normal file
View File

@ -0,0 +1,252 @@
/**
* @fileoverview Shared OpenAI-format helpers for adapters that speak the
* OpenAI Chat Completions wire format (OpenAI, vLLM, etc.).
*
* Both {@link OpenAIAdapter} and {@link VLLMAdapter} import from this module
* to avoid duplicating conversion logic.
*
* @module @vcg/agent-sdk
*/
import type OpenAI from 'openai'
import type {
ChatCompletion,
ChatCompletionAssistantMessageParam,
ChatCompletionMessageParam,
ChatCompletionMessageToolCall,
ChatCompletionTool,
ChatCompletionToolMessageParam,
ChatCompletionUserMessageParam,
} from 'openai/resources/chat/completions/index.js'
import type {
ContentBlock,
LLMMessage,
LLMResponse,
LLMToolDef,
TextBlock,
ToolUseBlock,
} from '../types.js'
// ---------------------------------------------------------------------------
// Framework -> OpenAI format
// ---------------------------------------------------------------------------
/**
* Convert a framework {@link LLMToolDef} to an OpenAI {@link ChatCompletionTool}.
*/
export function toOpenAITool(tool: LLMToolDef): ChatCompletionTool {
return {
type: 'function',
function: {
name: tool.name,
description: tool.description,
parameters: tool.inputSchema as Record<string, unknown>,
},
}
}
/**
* Determine whether a framework message contains any `tool_result` content
* blocks, which must be serialised as separate OpenAI `tool`-role messages.
*/
export function hasToolResults(msg: LLMMessage): boolean {
return msg.content.some((b) => b.type === 'tool_result')
}
/**
* Convert framework messages into OpenAI {@link ChatCompletionMessageParam} entries.
*
* Expands `tool_result` blocks into separate `tool`-role messages as required
* by the OpenAI wire format.
*/
export function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageParam[] {
const result: ChatCompletionMessageParam[] = []
for (const msg of messages) {
if (msg.role === 'assistant') {
result.push(toOpenAIAssistantMessage(msg))
} else {
// user role
if (!hasToolResults(msg)) {
result.push(toOpenAIUserMessage(msg))
} else {
const nonToolBlocks = msg.content.filter((b) => b.type !== 'tool_result')
if (nonToolBlocks.length > 0) {
result.push(toOpenAIUserMessage({ role: 'user', content: nonToolBlocks }))
}
for (const block of msg.content) {
if (block.type === 'tool_result') {
const toolMsg: ChatCompletionToolMessageParam = {
role: 'tool',
tool_call_id: block.tool_use_id,
content: block.content,
}
result.push(toolMsg)
}
}
}
}
}
return result
}
/**
* Convert a `user`-role framework message into an OpenAI user message.
*/
export function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageParam {
if (msg.content.length === 1 && msg.content[0]?.type === 'text') {
return { role: 'user', content: msg.content[0].text }
}
type ContentPart = OpenAI.Chat.ChatCompletionContentPartText | OpenAI.Chat.ChatCompletionContentPartImage
const parts: ContentPart[] = []
for (const block of msg.content) {
if (block.type === 'text') {
parts.push({ type: 'text', text: block.text })
} else if (block.type === 'image') {
parts.push({
type: 'image_url',
image_url: {
url: `data:${block.source.media_type};base64,${block.source.data}`,
},
})
}
}
return { role: 'user', content: parts }
}
/**
* Convert an `assistant`-role framework message into an OpenAI assistant message.
*/
export function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessageParam {
const toolCalls: ChatCompletionMessageToolCall[] = []
const textParts: string[] = []
for (const block of msg.content) {
if (block.type === 'tool_use') {
toolCalls.push({
id: block.id,
type: 'function',
function: {
name: block.name,
arguments: JSON.stringify(block.input),
},
})
} else if (block.type === 'text') {
textParts.push(block.text)
}
}
const assistantMsg: ChatCompletionAssistantMessageParam = {
role: 'assistant',
content: textParts.length > 0 ? textParts.join('') : null,
}
if (toolCalls.length > 0) {
assistantMsg.tool_calls = toolCalls
}
return assistantMsg
}
// ---------------------------------------------------------------------------
// OpenAI format -> Framework
// ---------------------------------------------------------------------------
/**
* Convert an OpenAI {@link ChatCompletion} into a framework {@link LLMResponse}.
*/
export function fromOpenAICompletion(completion: ChatCompletion): LLMResponse {
const choice = completion.choices[0]
if (choice === undefined) {
throw new Error('OpenAI returned a completion with no choices')
}
const content: ContentBlock[] = []
const message = choice.message
if (message.content !== null && message.content !== undefined) {
const textBlock: TextBlock = { type: 'text', text: message.content }
content.push(textBlock)
}
for (const toolCall of message.tool_calls ?? []) {
let parsedInput: Record<string, unknown> = {}
try {
const parsed: unknown = JSON.parse(toolCall.function.arguments)
if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) {
parsedInput = parsed as Record<string, unknown>
}
} catch {
// Malformed arguments from the model — surface as empty object.
}
const toolUseBlock: ToolUseBlock = {
type: 'tool_use',
id: toolCall.id,
name: toolCall.function.name,
input: parsedInput,
}
content.push(toolUseBlock)
}
const stopReason = normalizeFinishReason(choice.finish_reason ?? 'stop')
return {
id: completion.id,
content,
model: completion.model,
stop_reason: stopReason,
usage: {
input_tokens: completion.usage?.prompt_tokens ?? 0,
output_tokens: completion.usage?.completion_tokens ?? 0,
},
}
}
/**
* Normalize an OpenAI `finish_reason` string to the framework's canonical
* stop-reason vocabulary.
*
* - `'stop'` -> `'end_turn'`
* - `'tool_calls'` -> `'tool_use'`
* - `'length'` -> `'max_tokens'`
* - `'content_filter'` -> `'content_filter'`
* - anything else -> passed through unchanged
*/
export function normalizeFinishReason(reason: string): string {
switch (reason) {
case 'stop': return 'end_turn'
case 'tool_calls': return 'tool_use'
case 'length': return 'max_tokens'
case 'content_filter': return 'content_filter'
default: return reason
}
}
// ---------------------------------------------------------------------------
// Message list assembly
// ---------------------------------------------------------------------------
/**
* Prepend a system message when `systemPrompt` is provided, then append the
* converted conversation messages.
*/
export function buildOpenAIMessageList(
messages: LLMMessage[],
systemPrompt: string | undefined,
): ChatCompletionMessageParam[] {
const result: ChatCompletionMessageParam[] = []
if (systemPrompt !== undefined && systemPrompt.length > 0) {
result.push({ role: 'system', content: systemPrompt })
}
result.push(...toOpenAIMessages(messages))
return result
}

View File

@ -4,10 +4,10 @@
* Converts between the framework's internal {@link ContentBlock} types and the
* OpenAI Chat Completions wire format. Key mapping decisions:
*
* - Framework `tool_use` blocks in assistant messages OpenAI `tool_calls`
* - Framework `tool_result` blocks in user messages OpenAI `tool` role messages
* - Framework `image` blocks in user messages OpenAI image content parts
* - System prompt in {@link LLMChatOptions} prepended `system` message
* - Framework `tool_use` blocks in assistant messages -> OpenAI `tool_calls`
* - Framework `tool_result` blocks in user messages -> OpenAI `tool` role messages
* - Framework `image` blocks in user messages -> OpenAI image content parts
* - System prompt in {@link LLMChatOptions} -> prepended `system` message
*
* Because OpenAI and Anthropic use fundamentally different role-based structures
* for tool calling (Anthropic embeds tool results in user-role content arrays;
@ -31,16 +31,7 @@
*/
import OpenAI from 'openai'
import type {
ChatCompletion,
ChatCompletionAssistantMessageParam,
ChatCompletionChunk,
ChatCompletionMessageParam,
ChatCompletionMessageToolCall,
ChatCompletionTool,
ChatCompletionToolMessageParam,
ChatCompletionUserMessageParam,
} from 'openai/resources/chat/completions/index.js'
import type { ChatCompletionChunk } from 'openai/resources/chat/completions/index.js'
import type {
ContentBlock,
@ -55,231 +46,12 @@ import type {
ToolUseBlock,
} from '../types.js'
// ---------------------------------------------------------------------------
// Internal helpers — framework → OpenAI
// ---------------------------------------------------------------------------
/**
* Convert a framework {@link LLMToolDef} to an OpenAI {@link ChatCompletionTool}.
*
* OpenAI wraps the function definition inside a `function` key and a `type`
* discriminant. The `inputSchema` is already a JSON Schema object.
*/
function toOpenAITool(tool: LLMToolDef): ChatCompletionTool {
return {
type: 'function',
function: {
name: tool.name,
description: tool.description,
parameters: tool.inputSchema as Record<string, unknown>,
},
}
}
/**
* Determine whether a framework message contains any `tool_result` content
* blocks, which must be serialised as separate OpenAI `tool`-role messages.
*/
function hasToolResults(msg: LLMMessage): boolean {
return msg.content.some((b) => b.type === 'tool_result')
}
/**
* Convert a single framework {@link LLMMessage} into one or more OpenAI
* {@link ChatCompletionMessageParam} entries.
*
* The expansion is necessary because OpenAI represents tool results as
* top-level messages with role `tool`, whereas in our model they are content
* blocks inside a `user` message.
*
* Expansion rules:
* - A `user` message containing only text/image blocks single user message
* - A `user` message containing `tool_result` blocks one `tool` message per
* tool_result block; any remaining text/image blocks are folded into an
* additional user message prepended to the group
* - An `assistant` message single assistant message with optional tool_calls
*/
function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageParam[] {
const result: ChatCompletionMessageParam[] = []
for (const msg of messages) {
if (msg.role === 'assistant') {
result.push(toOpenAIAssistantMessage(msg))
} else {
// user role
if (!hasToolResults(msg)) {
result.push(toOpenAIUserMessage(msg))
} else {
// Split: text/image blocks become a user message (if any exist), then
// each tool_result block becomes an independent tool message.
const nonToolBlocks = msg.content.filter((b) => b.type !== 'tool_result')
if (nonToolBlocks.length > 0) {
result.push(toOpenAIUserMessage({ role: 'user', content: nonToolBlocks }))
}
for (const block of msg.content) {
if (block.type === 'tool_result') {
const toolMsg: ChatCompletionToolMessageParam = {
role: 'tool',
tool_call_id: block.tool_use_id,
content: block.content,
}
result.push(toolMsg)
}
}
}
}
}
return result
}
/**
* Convert a `user`-role framework message into an OpenAI user message.
* Image blocks are converted to the OpenAI image_url content part format.
*/
function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageParam {
// If the entire content is a single text block, use the compact string form
// to keep the request payload smaller.
if (msg.content.length === 1 && msg.content[0]?.type === 'text') {
return { role: 'user', content: msg.content[0].text }
}
type ContentPart = OpenAI.Chat.ChatCompletionContentPartText | OpenAI.Chat.ChatCompletionContentPartImage
const parts: ContentPart[] = []
for (const block of msg.content) {
if (block.type === 'text') {
parts.push({ type: 'text', text: block.text })
} else if (block.type === 'image') {
parts.push({
type: 'image_url',
image_url: {
url: `data:${block.source.media_type};base64,${block.source.data}`,
},
})
}
// tool_result blocks are handled by the caller (toOpenAIMessages); skip here.
}
return { role: 'user', content: parts }
}
/**
* Convert an `assistant`-role framework message into an OpenAI assistant message.
*
* Any `tool_use` blocks become `tool_calls`; `text` blocks become the message content.
*/
function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessageParam {
const toolCalls: ChatCompletionMessageToolCall[] = []
const textParts: string[] = []
for (const block of msg.content) {
if (block.type === 'tool_use') {
toolCalls.push({
id: block.id,
type: 'function',
function: {
name: block.name,
arguments: JSON.stringify(block.input),
},
})
} else if (block.type === 'text') {
textParts.push(block.text)
}
}
const assistantMsg: ChatCompletionAssistantMessageParam = {
role: 'assistant',
content: textParts.length > 0 ? textParts.join('') : null,
}
if (toolCalls.length > 0) {
assistantMsg.tool_calls = toolCalls
}
return assistantMsg
}
// ---------------------------------------------------------------------------
// Internal helpers — OpenAI → framework
// ---------------------------------------------------------------------------
/**
* Convert an OpenAI {@link ChatCompletion} into a framework {@link LLMResponse}.
*
* We take only the first choice (index 0), consistent with how the framework
* is designed for single-output agents.
*/
function fromOpenAICompletion(completion: ChatCompletion): LLMResponse {
const choice = completion.choices[0]
if (choice === undefined) {
throw new Error('OpenAI returned a completion with no choices')
}
const content: ContentBlock[] = []
const message = choice.message
if (message.content !== null && message.content !== undefined) {
const textBlock: TextBlock = { type: 'text', text: message.content }
content.push(textBlock)
}
for (const toolCall of message.tool_calls ?? []) {
let parsedInput: Record<string, unknown> = {}
try {
const parsed: unknown = JSON.parse(toolCall.function.arguments)
if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) {
parsedInput = parsed as Record<string, unknown>
}
} catch {
// Malformed arguments from the model — surface as empty object.
}
const toolUseBlock: ToolUseBlock = {
type: 'tool_use',
id: toolCall.id,
name: toolCall.function.name,
input: parsedInput,
}
content.push(toolUseBlock)
}
const stopReason = normalizeFinishReason(choice.finish_reason ?? 'stop')
return {
id: completion.id,
content,
model: completion.model,
stop_reason: stopReason,
usage: {
input_tokens: completion.usage?.prompt_tokens ?? 0,
output_tokens: completion.usage?.completion_tokens ?? 0,
},
}
}
/**
* Normalize an OpenAI `finish_reason` string to the framework's canonical
* stop-reason vocabulary so consumers never need to branch on provider-specific
* strings.
*
* Mapping:
* - `'stop'` `'end_turn'`
* - `'tool_calls'` `'tool_use'`
* - `'length'` `'max_tokens'`
* - `'content_filter'` `'content_filter'`
* - anything else passed through unchanged
*/
function normalizeFinishReason(reason: string): string {
switch (reason) {
case 'stop': return 'end_turn'
case 'tool_calls': return 'tool_use'
case 'length': return 'max_tokens'
case 'content_filter': return 'content_filter'
default: return reason
}
}
import {
toOpenAITool,
fromOpenAICompletion,
normalizeFinishReason,
buildOpenAIMessageList,
} from './openai-compat.js'
// ---------------------------------------------------------------------------
// Adapter implementation
@ -308,9 +80,6 @@ export class OpenAIAdapter implements LLMAdapter {
/**
* Send a synchronous (non-streaming) chat request and return the complete
* {@link LLMResponse}.
*
* Throws an `OpenAI.APIError` on non-2xx responses. Callers should catch and
* handle these (e.g. rate limits, context length exceeded).
*/
async chat(messages: LLMMessage[], options: LLMChatOptions): Promise<LLMResponse> {
const openAIMessages = buildOpenAIMessageList(messages, options.systemPrompt)
@ -338,12 +107,6 @@ export class OpenAIAdapter implements LLMAdapter {
/**
* Send a streaming chat request and yield {@link StreamEvent}s incrementally.
*
* Sequence guarantees match {@link AnthropicAdapter.stream}:
* - Zero or more `text` events
* - Zero or more `tool_use` events (emitted once per tool call, after
* arguments have been fully assembled)
* - Exactly one terminal event: `done` or `error`
*/
async *stream(
messages: LLMMessage[],
@ -351,7 +114,6 @@ export class OpenAIAdapter implements LLMAdapter {
): AsyncIterable<StreamEvent> {
const openAIMessages = buildOpenAIMessageList(messages, options.systemPrompt)
// We request usage in the final chunk so we can include it in the `done` event.
const streamResponse = await this.#client.chat.completions.create(
{
model: options.model,
@ -367,20 +129,17 @@ export class OpenAIAdapter implements LLMAdapter {
},
)
// Accumulate state across chunks.
let completionId = ''
let completionModel = ''
let finalFinishReason: string = 'stop'
let inputTokens = 0
let outputTokens = 0
// tool_calls are streamed piecemeal; key = tool call index
const toolCallBuffers = new Map<
number,
{ id: string; name: string; argsJson: string }
>()
// Full text accumulator for the `done` response.
let fullText = ''
try {
@ -388,7 +147,6 @@ export class OpenAIAdapter implements LLMAdapter {
completionId = chunk.id
completionModel = chunk.model
// Usage is only populated in the final chunk when stream_options.include_usage is set.
if (chunk.usage !== null && chunk.usage !== undefined) {
inputTokens = chunk.usage.prompt_tokens
outputTokens = chunk.usage.completion_tokens
@ -399,14 +157,12 @@ export class OpenAIAdapter implements LLMAdapter {
const delta = choice.delta
// --- text delta ---
if (delta.content !== null && delta.content !== undefined) {
fullText += delta.content
const textEvent: StreamEvent = { type: 'text', data: delta.content }
yield textEvent
}
// --- tool call delta ---
for (const toolCallDelta of delta.tool_calls ?? []) {
const idx = toolCallDelta.index
@ -419,7 +175,6 @@ export class OpenAIAdapter implements LLMAdapter {
}
const buf = toolCallBuffers.get(idx)
// buf is guaranteed to exist: we just set it above.
if (buf !== undefined) {
if (toolCallDelta.id) buf.id = toolCallDelta.id
if (toolCallDelta.function?.name) buf.name = toolCallDelta.function.name
@ -434,7 +189,6 @@ export class OpenAIAdapter implements LLMAdapter {
}
}
// Emit accumulated tool_use events after the stream ends.
const finalToolUseBlocks: ToolUseBlock[] = []
for (const buf of toolCallBuffers.values()) {
let parsedInput: Record<string, unknown> = {}
@ -458,7 +212,6 @@ export class OpenAIAdapter implements LLMAdapter {
yield toolUseEvent
}
// Build the complete content array for the done response.
const doneContent: ContentBlock[] = []
if (fullText.length > 0) {
const textBlock: TextBlock = { type: 'text', text: fullText }
@ -484,31 +237,6 @@ export class OpenAIAdapter implements LLMAdapter {
}
}
// ---------------------------------------------------------------------------
// Private utility
// ---------------------------------------------------------------------------
/**
* Prepend a system message when `systemPrompt` is provided, then append the
* converted conversation messages.
*
* OpenAI represents system instructions as a message with `role: 'system'`
* at the top of the array, not as a separate API parameter.
*/
function buildOpenAIMessageList(
messages: LLMMessage[],
systemPrompt: string | undefined,
): ChatCompletionMessageParam[] {
const result: ChatCompletionMessageParam[] = []
if (systemPrompt !== undefined && systemPrompt.length > 0) {
result.push({ role: 'system', content: systemPrompt })
}
result.push(...toOpenAIMessages(messages))
return result
}
// Re-export types that consumers of this module commonly need alongside the adapter.
export type {
ContentBlock,

248
src/llm/vllm.ts Normal file
View File

@ -0,0 +1,248 @@
/**
* @fileoverview vLLM adapter implementing {@link LLMAdapter}.
*
* vLLM exposes an OpenAI-compatible API, so this adapter reuses all shared
* helpers from `openai-compat.ts` and simply points the `openai` client at
* a custom `baseURL`.
*
* @module @vcg/agent-sdk
*/
import OpenAI from 'openai'
import type { ChatCompletionChunk } from 'openai/resources/chat/completions/index.js'
import type {
ContentBlock,
LLMAdapter,
LLMChatOptions,
LLMMessage,
LLMResponse,
LLMStreamOptions,
StreamEvent,
TextBlock,
ToolUseBlock,
VLLMConfig,
} from '../types.js'
import {
toOpenAITool,
fromOpenAICompletion,
normalizeFinishReason,
buildOpenAIMessageList,
} from './openai-compat.js'
// ---------------------------------------------------------------------------
// VLLMAdapter
// ---------------------------------------------------------------------------
/**
* LLM adapter for vLLM inference servers.
*
* vLLM is OpenAI-compatible, so this adapter reuses the same message
* conversion and response parsing logic as the OpenAI adapter. The key
* difference is the configurable `baseURL` pointing at a self-hosted
* vLLM instance.
*
* @example
* ```ts
* const adapter = new VLLMAdapter({
* baseURL: 'http://localhost:8000/v1',
* model: 'meta-llama/Llama-3-70b-chat-hf',
* })
* const response = await adapter.chat(messages, { model: 'meta-llama/Llama-3-70b-chat-hf' })
* ```
*/
export class VLLMAdapter implements LLMAdapter {
readonly name = 'vllm'
readonly #client: OpenAI
readonly #config: VLLMConfig
constructor(config: VLLMConfig) {
this.#config = config
this.#client = new OpenAI({
baseURL: config.baseURL,
apiKey: config.apiKey ?? 'dummy',
timeout: config.timeout,
maxRetries: config.maxRetries,
})
}
// -------------------------------------------------------------------------
// healthCheck()
// -------------------------------------------------------------------------
/**
* Check whether the vLLM server is reachable by hitting `GET {baseURL}/health`.
*
* Returns `true` if the server responds with a 2xx status, `false` otherwise.
*/
async healthCheck(): Promise<boolean> {
try {
// Strip trailing /v1 if present to hit the root health endpoint
const base = this.#config.baseURL.replace(/\/v1\/?$/, '')
const response = await fetch(`${base}/health`, {
signal: AbortSignal.timeout(this.#config.timeout ?? 5000),
})
return response.ok
} catch {
return false
}
}
// -------------------------------------------------------------------------
// chat()
// -------------------------------------------------------------------------
async chat(messages: LLMMessage[], options: LLMChatOptions): Promise<LLMResponse> {
const openAIMessages = buildOpenAIMessageList(messages, options.systemPrompt)
const completion = await this.#client.chat.completions.create(
{
model: options.model ?? this.#config.model,
messages: openAIMessages,
max_tokens: options.maxTokens,
temperature: options.temperature,
tools: options.tools ? options.tools.map(toOpenAITool) : undefined,
stream: false,
},
{
signal: options.abortSignal,
},
)
return fromOpenAICompletion(completion)
}
// -------------------------------------------------------------------------
// stream()
// -------------------------------------------------------------------------
async *stream(
messages: LLMMessage[],
options: LLMStreamOptions,
): AsyncIterable<StreamEvent> {
const openAIMessages = buildOpenAIMessageList(messages, options.systemPrompt)
const streamResponse = await this.#client.chat.completions.create(
{
model: options.model ?? this.#config.model,
messages: openAIMessages,
max_tokens: options.maxTokens,
temperature: options.temperature,
tools: options.tools ? options.tools.map(toOpenAITool) : undefined,
stream: true,
stream_options: { include_usage: true },
},
{
signal: options.abortSignal,
},
)
let completionId = ''
let completionModel = ''
let finalFinishReason: string = 'stop'
let inputTokens = 0
let outputTokens = 0
const toolCallBuffers = new Map<
number,
{ id: string; name: string; argsJson: string }
>()
let fullText = ''
try {
for await (const chunk of streamResponse) {
completionId = chunk.id
completionModel = chunk.model
if (chunk.usage !== null && chunk.usage !== undefined) {
inputTokens = chunk.usage.prompt_tokens
outputTokens = chunk.usage.completion_tokens
}
const choice: ChatCompletionChunk.Choice | undefined = chunk.choices[0]
if (choice === undefined) continue
const delta = choice.delta
if (delta.content !== null && delta.content !== undefined) {
fullText += delta.content
const textEvent: StreamEvent = { type: 'text', data: delta.content }
yield textEvent
}
for (const toolCallDelta of delta.tool_calls ?? []) {
const idx = toolCallDelta.index
if (!toolCallBuffers.has(idx)) {
toolCallBuffers.set(idx, {
id: toolCallDelta.id ?? '',
name: toolCallDelta.function?.name ?? '',
argsJson: '',
})
}
const buf = toolCallBuffers.get(idx)
if (buf !== undefined) {
if (toolCallDelta.id) buf.id = toolCallDelta.id
if (toolCallDelta.function?.name) buf.name = toolCallDelta.function.name
if (toolCallDelta.function?.arguments) {
buf.argsJson += toolCallDelta.function.arguments
}
}
}
if (choice.finish_reason !== null && choice.finish_reason !== undefined) {
finalFinishReason = choice.finish_reason
}
}
const finalToolUseBlocks: ToolUseBlock[] = []
for (const buf of toolCallBuffers.values()) {
let parsedInput: Record<string, unknown> = {}
try {
const parsed: unknown = JSON.parse(buf.argsJson)
if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) {
parsedInput = parsed as Record<string, unknown>
}
} catch {
// Malformed JSON — surface as empty object.
}
const toolUseBlock: ToolUseBlock = {
type: 'tool_use',
id: buf.id,
name: buf.name,
input: parsedInput,
}
finalToolUseBlocks.push(toolUseBlock)
const toolUseEvent: StreamEvent = { type: 'tool_use', data: toolUseBlock }
yield toolUseEvent
}
const doneContent: ContentBlock[] = []
if (fullText.length > 0) {
const textBlock: TextBlock = { type: 'text', text: fullText }
doneContent.push(textBlock)
}
doneContent.push(...finalToolUseBlocks)
const finalResponse: LLMResponse = {
id: completionId,
content: doneContent,
model: completionModel,
stop_reason: normalizeFinishReason(finalFinishReason),
usage: { input_tokens: inputTokens, output_tokens: outputTokens },
}
const doneEvent: StreamEvent = { type: 'done', data: finalResponse }
yield doneEvent
} catch (err) {
const error = err instanceof Error ? err : new Error(String(err))
const errorEvent: StreamEvent = { type: 'error', data: error }
yield errorEvent
}
}
}

View File

@ -186,7 +186,7 @@ export interface ToolDefinition<TInput = Record<string, unknown>> {
export interface AgentConfig {
readonly name: string
readonly model: string
readonly provider?: 'anthropic' | 'openai'
readonly provider?: 'anthropic' | 'openai' | 'vllm'
readonly systemPrompt?: string
/** Names of tools (from the tool registry) available to this agent. */
readonly tools?: readonly string[]
@ -285,10 +285,28 @@ export interface OrchestratorEvent {
export interface OrchestratorConfig {
readonly maxConcurrency?: number
readonly defaultModel?: string
readonly defaultProvider?: 'anthropic' | 'openai'
readonly defaultProvider?: 'anthropic' | 'openai' | 'vllm'
onProgress?: (event: OrchestratorEvent) => void
}
// ---------------------------------------------------------------------------
// vLLM configuration
// ---------------------------------------------------------------------------
/** Configuration for connecting to a vLLM inference server. */
export interface VLLMConfig {
/** Base URL of the vLLM server (e.g. `'http://localhost:8000/v1'`). */
readonly baseURL: string
/** Model name to use for requests (e.g. `'meta-llama/Llama-3-70b-chat-hf'`). */
readonly model: string
/** Optional API key for authenticated vLLM deployments. */
readonly apiKey?: string
/** Request timeout in milliseconds. */
readonly timeout?: number
/** Maximum number of retries on transient errors. */
readonly maxRetries?: number
}
// ---------------------------------------------------------------------------
// Memory
// ---------------------------------------------------------------------------