diff --git a/src/index.ts b/src/index.ts
index 8841357..98ca209 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -108,6 +108,7 @@ export {
 
 export { createAdapter } from './llm/adapter.js'
 export type { SupportedProvider } from './llm/adapter.js'
+export { VLLMAdapter } from './llm/vllm.js'
 
 // ---------------------------------------------------------------------------
 // Memory
@@ -166,4 +167,7 @@ export type {
   // Memory
   MemoryEntry,
   MemoryStore,
+
+  // vLLM
+  VLLMConfig,
 } from './types.js'
diff --git a/src/llm/adapter.ts b/src/llm/adapter.ts
index 5b032c5..bd1064d 100644
--- a/src/llm/adapter.ts
+++ b/src/llm/adapter.ts
@@ -11,6 +11,7 @@
  *
  * const anthropic = createAdapter('anthropic')
  * const openai    = createAdapter('openai', process.env.OPENAI_API_KEY)
+ * const vllm      = createAdapter('vllm', { baseURL: 'http://localhost:8000/v1', model: 'llama3' })
  * ```
  */
 
@@ -28,45 +29,59 @@ export type {
   ToolUseBlock,
   ToolResultBlock,
   ImageBlock,
+  VLLMConfig,
 } from '../types.js'
 
-import type { LLMAdapter } from '../types.js'
+import type { LLMAdapter, VLLMConfig } from '../types.js'
 
 /**
  * The set of LLM providers supported out of the box.
  * Additional providers can be integrated by implementing {@link LLMAdapter}
  * directly and bypassing this factory.
  */
-export type SupportedProvider = 'anthropic' | 'openai'
+export type SupportedProvider = 'anthropic' | 'openai' | 'vllm'
 
 /**
  * Instantiate the appropriate {@link LLMAdapter} for the given provider.
  *
- * API keys fall back to the standard environment variables
- * (`ANTHROPIC_API_KEY` / `OPENAI_API_KEY`) when not supplied explicitly.
+ * For `'anthropic'` and `'openai'`, the second argument is an optional API key
+ * string (falls back to `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` env vars).
+ *
+ * For `'vllm'`, the second argument must be a {@link VLLMConfig} object.
  *
  * Adapters are imported lazily so that projects using only one provider
  * are not forced to install the SDK for the other.
  *
  * @param provider - Which LLM provider to target.
- * @param apiKey   - Optional API key override; falls back to env var.
+ * @param config   - API key string (for anthropic/openai) or VLLMConfig (for vllm).
  * @throws {Error} When the provider string is not recognised.
  */
 export async function createAdapter(
   provider: SupportedProvider,
-  apiKey?: string,
+  config?: string | VLLMConfig,
 ): Promise<LLMAdapter> {
   switch (provider) {
     case 'anthropic': {
       const { AnthropicAdapter } = await import('./anthropic.js')
+      const apiKey = typeof config === 'string' ? config : undefined
       return new AnthropicAdapter(apiKey)
     }
     case 'openai': {
       const { OpenAIAdapter } = await import('./openai.js')
+      const apiKey = typeof config === 'string' ? config : undefined
       return new OpenAIAdapter(apiKey)
     }
+    case 'vllm': {
+      const { VLLMAdapter } = await import('./vllm.js')
+      if (typeof config === 'object' && config !== null && 'baseURL' in config) {
+        return new VLLMAdapter(config as VLLMConfig)
+      }
+      throw new Error(
+        'createAdapter("vllm") requires a VLLMConfig object as the second argument ' +
+        '(e.g. { baseURL: "http://localhost:8000/v1", model: "llama3" }).',
+      )
+    }
     default: {
-      // The `never` cast here makes TypeScript enforce exhaustiveness.
       const _exhaustive: never = provider
       throw new Error(`Unsupported LLM provider: ${String(_exhaustive)}`)
     }
diff --git a/src/llm/openai-compat.ts b/src/llm/openai-compat.ts
new file mode 100644
index 0000000..ba3ef9e
--- /dev/null
+++ b/src/llm/openai-compat.ts
@@ -0,0 +1,252 @@
+/**
+ * @fileoverview Shared OpenAI-format helpers for adapters that speak the
+ * OpenAI Chat Completions wire format (OpenAI, vLLM, etc.).
+ *
+ * Both {@link OpenAIAdapter} and {@link VLLMAdapter} import from this module
+ * to avoid duplicating conversion logic.
+ *
+ * @module @vcg/agent-sdk
+ */
+
+import type OpenAI from 'openai'
+import type {
+  ChatCompletion,
+  ChatCompletionAssistantMessageParam,
+  ChatCompletionMessageParam,
+  ChatCompletionMessageToolCall,
+  ChatCompletionTool,
+  ChatCompletionToolMessageParam,
+  ChatCompletionUserMessageParam,
+} from 'openai/resources/chat/completions/index.js'
+
+import type {
+  ContentBlock,
+  LLMMessage,
+  LLMResponse,
+  LLMToolDef,
+  TextBlock,
+  ToolUseBlock,
+} from '../types.js'
+
+// ---------------------------------------------------------------------------
+// Framework -> OpenAI format
+// ---------------------------------------------------------------------------
+
+/**
+ * Convert a framework {@link LLMToolDef} to an OpenAI {@link ChatCompletionTool}.
+ */
+export function toOpenAITool(tool: LLMToolDef): ChatCompletionTool {
+  return {
+    type: 'function',
+    function: {
+      name: tool.name,
+      description: tool.description,
+      parameters: tool.inputSchema as Record<string, unknown>,
+    },
+  }
+}
+
+/**
+ * Determine whether a framework message contains any `tool_result` content
+ * blocks, which must be serialised as separate OpenAI `tool`-role messages.
+ */
+export function hasToolResults(msg: LLMMessage): boolean {
+  return msg.content.some((b) => b.type === 'tool_result')
+}
+
+/**
+ * Convert framework messages into OpenAI {@link ChatCompletionMessageParam} entries.
+ *
+ * Expands `tool_result` blocks into separate `tool`-role messages as required
+ * by the OpenAI wire format.
+ */
+export function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageParam[] {
+  const result: ChatCompletionMessageParam[] = []
+
+  for (const msg of messages) {
+    if (msg.role === 'assistant') {
+      result.push(toOpenAIAssistantMessage(msg))
+    } else {
+      // user role
+      if (!hasToolResults(msg)) {
+        result.push(toOpenAIUserMessage(msg))
+      } else {
+        const nonToolBlocks = msg.content.filter((b) => b.type !== 'tool_result')
+        if (nonToolBlocks.length > 0) {
+          result.push(toOpenAIUserMessage({ role: 'user', content: nonToolBlocks }))
+        }
+
+        for (const block of msg.content) {
+          if (block.type === 'tool_result') {
+            const toolMsg: ChatCompletionToolMessageParam = {
+              role: 'tool',
+              tool_call_id: block.tool_use_id,
+              content: block.content,
+            }
+            result.push(toolMsg)
+          }
+        }
+      }
+    }
+  }
+
+  return result
+}
+
+/**
+ * Convert a `user`-role framework message into an OpenAI user message.
+ */
+export function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageParam {
+  if (msg.content.length === 1 && msg.content[0]?.type === 'text') {
+    return { role: 'user', content: msg.content[0].text }
+  }
+
+  type ContentPart = OpenAI.Chat.ChatCompletionContentPartText | OpenAI.Chat.ChatCompletionContentPartImage
+  const parts: ContentPart[] = []
+
+  for (const block of msg.content) {
+    if (block.type === 'text') {
+      parts.push({ type: 'text', text: block.text })
+    } else if (block.type === 'image') {
+      parts.push({
+        type: 'image_url',
+        image_url: {
+          url: `data:${block.source.media_type};base64,${block.source.data}`,
+        },
+      })
+    }
+  }
+
+  return { role: 'user', content: parts }
+}
+
+/**
+ * Convert an `assistant`-role framework message into an OpenAI assistant message.
+ */
+export function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessageParam {
+  const toolCalls: ChatCompletionMessageToolCall[] = []
+  const textParts: string[] = []
+
+  for (const block of msg.content) {
+    if (block.type === 'tool_use') {
+      toolCalls.push({
+        id: block.id,
+        type: 'function',
+        function: {
+          name: block.name,
+          arguments: JSON.stringify(block.input),
+        },
+      })
+    } else if (block.type === 'text') {
+      textParts.push(block.text)
+    }
+  }
+
+  const assistantMsg: ChatCompletionAssistantMessageParam = {
+    role: 'assistant',
+    content: textParts.length > 0 ? textParts.join('') : null,
+  }
+
+  if (toolCalls.length > 0) {
+    assistantMsg.tool_calls = toolCalls
+  }
+
+  return assistantMsg
+}
+
+// ---------------------------------------------------------------------------
+// OpenAI format -> Framework
+// ---------------------------------------------------------------------------
+
+/**
+ * Convert an OpenAI {@link ChatCompletion} into a framework {@link LLMResponse}.
+ */
+export function fromOpenAICompletion(completion: ChatCompletion): LLMResponse {
+  const choice = completion.choices[0]
+  if (choice === undefined) {
+    throw new Error('OpenAI returned a completion with no choices')
+  }
+
+  const content: ContentBlock[] = []
+  const message = choice.message
+
+  if (message.content !== null && message.content !== undefined) {
+    const textBlock: TextBlock = { type: 'text', text: message.content }
+    content.push(textBlock)
+  }
+
+  for (const toolCall of message.tool_calls ?? []) {
+    let parsedInput: Record<string, unknown> = {}
+    try {
+      const parsed: unknown = JSON.parse(toolCall.function.arguments)
+      if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) {
+        parsedInput = parsed as Record<string, unknown>
+      }
+    } catch {
+      // Malformed arguments from the model — surface as empty object.
+    }
+
+    const toolUseBlock: ToolUseBlock = {
+      type: 'tool_use',
+      id: toolCall.id,
+      name: toolCall.function.name,
+      input: parsedInput,
+    }
+    content.push(toolUseBlock)
+  }
+
+  const stopReason = normalizeFinishReason(choice.finish_reason ?? 'stop')
+
+  return {
+    id: completion.id,
+    content,
+    model: completion.model,
+    stop_reason: stopReason,
+    usage: {
+      input_tokens: completion.usage?.prompt_tokens ?? 0,
+      output_tokens: completion.usage?.completion_tokens ?? 0,
+    },
+  }
+}
+
+/**
+ * Normalize an OpenAI `finish_reason` string to the framework's canonical
+ * stop-reason vocabulary.
+ *
+ * - `'stop'`           -> `'end_turn'`
+ * - `'tool_calls'`     -> `'tool_use'`
+ * - `'length'`         -> `'max_tokens'`
+ * - `'content_filter'` -> `'content_filter'`
+ * - anything else      -> passed through unchanged
+ */
+export function normalizeFinishReason(reason: string): string {
+  switch (reason) {
+    case 'stop':           return 'end_turn'
+    case 'tool_calls':     return 'tool_use'
+    case 'length':         return 'max_tokens'
+    case 'content_filter': return 'content_filter'
+    default:               return reason
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Message list assembly
+// ---------------------------------------------------------------------------
+
+/**
+ * Prepend a system message when `systemPrompt` is provided, then append the
+ * converted conversation messages.
+ */
+export function buildOpenAIMessageList(
+  messages: LLMMessage[],
+  systemPrompt: string | undefined,
+): ChatCompletionMessageParam[] {
+  const result: ChatCompletionMessageParam[] = []
+
+  if (systemPrompt !== undefined && systemPrompt.length > 0) {
+    result.push({ role: 'system', content: systemPrompt })
+  }
+
+  result.push(...toOpenAIMessages(messages))
+  return result
+}
diff --git a/src/llm/openai.ts b/src/llm/openai.ts
index a53ab24..568e539 100644
--- a/src/llm/openai.ts
+++ b/src/llm/openai.ts
@@ -4,10 +4,10 @@
  * Converts between the framework's internal {@link ContentBlock} types and the
  * OpenAI Chat Completions wire format. Key mapping decisions:
  *
- * - Framework `tool_use` blocks in assistant messages → OpenAI `tool_calls`
- * - Framework `tool_result` blocks in user messages  → OpenAI `tool` role messages
- * - Framework `image` blocks in user messages        → OpenAI image content parts
- * - System prompt in {@link LLMChatOptions}           → prepended `system` message
+ * - Framework `tool_use` blocks in assistant messages -> OpenAI `tool_calls`
+ * - Framework `tool_result` blocks in user messages  -> OpenAI `tool` role messages
+ * - Framework `image` blocks in user messages        -> OpenAI image content parts
+ * - System prompt in {@link LLMChatOptions}           -> prepended `system` message
  *
  * Because OpenAI and Anthropic use fundamentally different role-based structures
  * for tool calling (Anthropic embeds tool results in user-role content arrays;
@@ -31,16 +31,7 @@
  */
 
 import OpenAI from 'openai'
-import type {
-  ChatCompletion,
-  ChatCompletionAssistantMessageParam,
-  ChatCompletionChunk,
-  ChatCompletionMessageParam,
-  ChatCompletionMessageToolCall,
-  ChatCompletionTool,
-  ChatCompletionToolMessageParam,
-  ChatCompletionUserMessageParam,
-} from 'openai/resources/chat/completions/index.js'
+import type { ChatCompletionChunk } from 'openai/resources/chat/completions/index.js'
 
 import type {
   ContentBlock,
@@ -55,231 +46,12 @@ import type {
   ToolUseBlock,
 } from '../types.js'
 
-// ---------------------------------------------------------------------------
-// Internal helpers — framework → OpenAI
-// ---------------------------------------------------------------------------
-
-/**
- * Convert a framework {@link LLMToolDef} to an OpenAI {@link ChatCompletionTool}.
- *
- * OpenAI wraps the function definition inside a `function` key and a `type`
- * discriminant. The `inputSchema` is already a JSON Schema object.
- */
-function toOpenAITool(tool: LLMToolDef): ChatCompletionTool {
-  return {
-    type: 'function',
-    function: {
-      name: tool.name,
-      description: tool.description,
-      parameters: tool.inputSchema as Record<string, unknown>,
-    },
-  }
-}
-
-/**
- * Determine whether a framework message contains any `tool_result` content
- * blocks, which must be serialised as separate OpenAI `tool`-role messages.
- */
-function hasToolResults(msg: LLMMessage): boolean {
-  return msg.content.some((b) => b.type === 'tool_result')
-}
-
-/**
- * Convert a single framework {@link LLMMessage} into one or more OpenAI
- * {@link ChatCompletionMessageParam} entries.
- *
- * The expansion is necessary because OpenAI represents tool results as
- * top-level messages with role `tool`, whereas in our model they are content
- * blocks inside a `user` message.
- *
- * Expansion rules:
- * - A `user` message containing only text/image blocks → single user message
- * - A `user` message containing `tool_result` blocks → one `tool` message per
- *   tool_result block; any remaining text/image blocks are folded into an
- *   additional user message prepended to the group
- * - An `assistant` message → single assistant message with optional tool_calls
- */
-function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageParam[] {
-  const result: ChatCompletionMessageParam[] = []
-
-  for (const msg of messages) {
-    if (msg.role === 'assistant') {
-      result.push(toOpenAIAssistantMessage(msg))
-    } else {
-      // user role
-      if (!hasToolResults(msg)) {
-        result.push(toOpenAIUserMessage(msg))
-      } else {
-        // Split: text/image blocks become a user message (if any exist), then
-        // each tool_result block becomes an independent tool message.
-        const nonToolBlocks = msg.content.filter((b) => b.type !== 'tool_result')
-        if (nonToolBlocks.length > 0) {
-          result.push(toOpenAIUserMessage({ role: 'user', content: nonToolBlocks }))
-        }
-
-        for (const block of msg.content) {
-          if (block.type === 'tool_result') {
-            const toolMsg: ChatCompletionToolMessageParam = {
-              role: 'tool',
-              tool_call_id: block.tool_use_id,
-              content: block.content,
-            }
-            result.push(toolMsg)
-          }
-        }
-      }
-    }
-  }
-
-  return result
-}
-
-/**
- * Convert a `user`-role framework message into an OpenAI user message.
- * Image blocks are converted to the OpenAI image_url content part format.
- */
-function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageParam {
-  // If the entire content is a single text block, use the compact string form
-  // to keep the request payload smaller.
-  if (msg.content.length === 1 && msg.content[0]?.type === 'text') {
-    return { role: 'user', content: msg.content[0].text }
-  }
-
-  type ContentPart = OpenAI.Chat.ChatCompletionContentPartText | OpenAI.Chat.ChatCompletionContentPartImage
-  const parts: ContentPart[] = []
-
-  for (const block of msg.content) {
-    if (block.type === 'text') {
-      parts.push({ type: 'text', text: block.text })
-    } else if (block.type === 'image') {
-      parts.push({
-        type: 'image_url',
-        image_url: {
-          url: `data:${block.source.media_type};base64,${block.source.data}`,
-        },
-      })
-    }
-    // tool_result blocks are handled by the caller (toOpenAIMessages); skip here.
-  }
-
-  return { role: 'user', content: parts }
-}
-
-/**
- * Convert an `assistant`-role framework message into an OpenAI assistant message.
- *
- * Any `tool_use` blocks become `tool_calls`; `text` blocks become the message content.
- */
-function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessageParam {
-  const toolCalls: ChatCompletionMessageToolCall[] = []
-  const textParts: string[] = []
-
-  for (const block of msg.content) {
-    if (block.type === 'tool_use') {
-      toolCalls.push({
-        id: block.id,
-        type: 'function',
-        function: {
-          name: block.name,
-          arguments: JSON.stringify(block.input),
-        },
-      })
-    } else if (block.type === 'text') {
-      textParts.push(block.text)
-    }
-  }
-
-  const assistantMsg: ChatCompletionAssistantMessageParam = {
-    role: 'assistant',
-    content: textParts.length > 0 ? textParts.join('') : null,
-  }
-
-  if (toolCalls.length > 0) {
-    assistantMsg.tool_calls = toolCalls
-  }
-
-  return assistantMsg
-}
-
-// ---------------------------------------------------------------------------
-// Internal helpers — OpenAI → framework
-// ---------------------------------------------------------------------------
-
-/**
- * Convert an OpenAI {@link ChatCompletion} into a framework {@link LLMResponse}.
- *
- * We take only the first choice (index 0), consistent with how the framework
- * is designed for single-output agents.
- */
-function fromOpenAICompletion(completion: ChatCompletion): LLMResponse {
-  const choice = completion.choices[0]
-  if (choice === undefined) {
-    throw new Error('OpenAI returned a completion with no choices')
-  }
-
-  const content: ContentBlock[] = []
-  const message = choice.message
-
-  if (message.content !== null && message.content !== undefined) {
-    const textBlock: TextBlock = { type: 'text', text: message.content }
-    content.push(textBlock)
-  }
-
-  for (const toolCall of message.tool_calls ?? []) {
-    let parsedInput: Record<string, unknown> = {}
-    try {
-      const parsed: unknown = JSON.parse(toolCall.function.arguments)
-      if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) {
-        parsedInput = parsed as Record<string, unknown>
-      }
-    } catch {
-      // Malformed arguments from the model — surface as empty object.
-    }
-
-    const toolUseBlock: ToolUseBlock = {
-      type: 'tool_use',
-      id: toolCall.id,
-      name: toolCall.function.name,
-      input: parsedInput,
-    }
-    content.push(toolUseBlock)
-  }
-
-  const stopReason = normalizeFinishReason(choice.finish_reason ?? 'stop')
-
-  return {
-    id: completion.id,
-    content,
-    model: completion.model,
-    stop_reason: stopReason,
-    usage: {
-      input_tokens: completion.usage?.prompt_tokens ?? 0,
-      output_tokens: completion.usage?.completion_tokens ?? 0,
-    },
-  }
-}
-
-/**
- * Normalize an OpenAI `finish_reason` string to the framework's canonical
- * stop-reason vocabulary so consumers never need to branch on provider-specific
- * strings.
- *
- * Mapping:
- * - `'stop'`           → `'end_turn'`
- * - `'tool_calls'`     → `'tool_use'`
- * - `'length'`         → `'max_tokens'`
- * - `'content_filter'` → `'content_filter'`
- * - anything else      → passed through unchanged
- */
-function normalizeFinishReason(reason: string): string {
-  switch (reason) {
-    case 'stop':           return 'end_turn'
-    case 'tool_calls':     return 'tool_use'
-    case 'length':         return 'max_tokens'
-    case 'content_filter': return 'content_filter'
-    default:               return reason
-  }
-}
+import {
+  toOpenAITool,
+  fromOpenAICompletion,
+  normalizeFinishReason,
+  buildOpenAIMessageList,
+} from './openai-compat.js'
 
 // ---------------------------------------------------------------------------
 // Adapter implementation
@@ -308,9 +80,6 @@ export class OpenAIAdapter implements LLMAdapter {
   /**
    * Send a synchronous (non-streaming) chat request and return the complete
    * {@link LLMResponse}.
-   *
-   * Throws an `OpenAI.APIError` on non-2xx responses. Callers should catch and
-   * handle these (e.g. rate limits, context length exceeded).
    */
   async chat(messages: LLMMessage[], options: LLMChatOptions): Promise<LLMResponse> {
     const openAIMessages = buildOpenAIMessageList(messages, options.systemPrompt)
@@ -338,12 +107,6 @@ export class OpenAIAdapter implements LLMAdapter {
 
   /**
    * Send a streaming chat request and yield {@link StreamEvent}s incrementally.
-   *
-   * Sequence guarantees match {@link AnthropicAdapter.stream}:
-   * - Zero or more `text` events
-   * - Zero or more `tool_use` events (emitted once per tool call, after
-   *   arguments have been fully assembled)
-   * - Exactly one terminal event: `done` or `error`
    */
   async *stream(
     messages: LLMMessage[],
@@ -351,7 +114,6 @@ export class OpenAIAdapter implements LLMAdapter {
   ): AsyncIterable<StreamEvent> {
     const openAIMessages = buildOpenAIMessageList(messages, options.systemPrompt)
 
-    // We request usage in the final chunk so we can include it in the `done` event.
     const streamResponse = await this.#client.chat.completions.create(
       {
         model: options.model,
@@ -367,20 +129,17 @@ export class OpenAIAdapter implements LLMAdapter {
       },
     )
 
-    // Accumulate state across chunks.
     let completionId = ''
     let completionModel = ''
     let finalFinishReason: string = 'stop'
     let inputTokens = 0
     let outputTokens = 0
 
-    // tool_calls are streamed piecemeal; key = tool call index
     const toolCallBuffers = new Map<
       number,
       { id: string; name: string; argsJson: string }
     >()
 
-    // Full text accumulator for the `done` response.
     let fullText = ''
 
     try {
@@ -388,7 +147,6 @@ export class OpenAIAdapter implements LLMAdapter {
         completionId = chunk.id
         completionModel = chunk.model
 
-        // Usage is only populated in the final chunk when stream_options.include_usage is set.
         if (chunk.usage !== null && chunk.usage !== undefined) {
           inputTokens = chunk.usage.prompt_tokens
           outputTokens = chunk.usage.completion_tokens
@@ -399,14 +157,12 @@ export class OpenAIAdapter implements LLMAdapter {
 
         const delta = choice.delta
 
-        // --- text delta ---
         if (delta.content !== null && delta.content !== undefined) {
           fullText += delta.content
           const textEvent: StreamEvent = { type: 'text', data: delta.content }
           yield textEvent
         }
 
-        // --- tool call delta ---
         for (const toolCallDelta of delta.tool_calls ?? []) {
           const idx = toolCallDelta.index
 
@@ -419,7 +175,6 @@ export class OpenAIAdapter implements LLMAdapter {
           }
 
           const buf = toolCallBuffers.get(idx)
-          // buf is guaranteed to exist: we just set it above.
           if (buf !== undefined) {
             if (toolCallDelta.id) buf.id = toolCallDelta.id
             if (toolCallDelta.function?.name) buf.name = toolCallDelta.function.name
@@ -434,7 +189,6 @@ export class OpenAIAdapter implements LLMAdapter {
         }
       }
 
-      // Emit accumulated tool_use events after the stream ends.
       const finalToolUseBlocks: ToolUseBlock[] = []
       for (const buf of toolCallBuffers.values()) {
         let parsedInput: Record<string, unknown> = {}
@@ -458,7 +212,6 @@ export class OpenAIAdapter implements LLMAdapter {
         yield toolUseEvent
       }
 
-      // Build the complete content array for the done response.
       const doneContent: ContentBlock[] = []
       if (fullText.length > 0) {
         const textBlock: TextBlock = { type: 'text', text: fullText }
@@ -484,31 +237,6 @@ export class OpenAIAdapter implements LLMAdapter {
   }
 }
 
-// ---------------------------------------------------------------------------
-// Private utility
-// ---------------------------------------------------------------------------
-
-/**
- * Prepend a system message when `systemPrompt` is provided, then append the
- * converted conversation messages.
- *
- * OpenAI represents system instructions as a message with `role: 'system'`
- * at the top of the array, not as a separate API parameter.
- */
-function buildOpenAIMessageList(
-  messages: LLMMessage[],
-  systemPrompt: string | undefined,
-): ChatCompletionMessageParam[] {
-  const result: ChatCompletionMessageParam[] = []
-
-  if (systemPrompt !== undefined && systemPrompt.length > 0) {
-    result.push({ role: 'system', content: systemPrompt })
-  }
-
-  result.push(...toOpenAIMessages(messages))
-  return result
-}
-
 // Re-export types that consumers of this module commonly need alongside the adapter.
 export type {
   ContentBlock,
diff --git a/src/llm/vllm.ts b/src/llm/vllm.ts
new file mode 100644
index 0000000..7f848ec
--- /dev/null
+++ b/src/llm/vllm.ts
@@ -0,0 +1,248 @@
+/**
+ * @fileoverview vLLM adapter implementing {@link LLMAdapter}.
+ *
+ * vLLM exposes an OpenAI-compatible API, so this adapter reuses all shared
+ * helpers from `openai-compat.ts` and simply points the `openai` client at
+ * a custom `baseURL`.
+ *
+ * @module @vcg/agent-sdk
+ */
+
+import OpenAI from 'openai'
+import type { ChatCompletionChunk } from 'openai/resources/chat/completions/index.js'
+
+import type {
+  ContentBlock,
+  LLMAdapter,
+  LLMChatOptions,
+  LLMMessage,
+  LLMResponse,
+  LLMStreamOptions,
+  StreamEvent,
+  TextBlock,
+  ToolUseBlock,
+  VLLMConfig,
+} from '../types.js'
+
+import {
+  toOpenAITool,
+  fromOpenAICompletion,
+  normalizeFinishReason,
+  buildOpenAIMessageList,
+} from './openai-compat.js'
+
+// ---------------------------------------------------------------------------
+// VLLMAdapter
+// ---------------------------------------------------------------------------
+
+/**
+ * LLM adapter for vLLM inference servers.
+ *
+ * vLLM is OpenAI-compatible, so this adapter reuses the same message
+ * conversion and response parsing logic as the OpenAI adapter. The key
+ * difference is the configurable `baseURL` pointing at a self-hosted
+ * vLLM instance.
+ *
+ * @example
+ * ```ts
+ * const adapter = new VLLMAdapter({
+ *   baseURL: 'http://localhost:8000/v1',
+ *   model: 'meta-llama/Llama-3-70b-chat-hf',
+ * })
+ * const response = await adapter.chat(messages, { model: 'meta-llama/Llama-3-70b-chat-hf' })
+ * ```
+ */
+export class VLLMAdapter implements LLMAdapter {
+  readonly name = 'vllm'
+
+  readonly #client: OpenAI
+  readonly #config: VLLMConfig
+
+  constructor(config: VLLMConfig) {
+    this.#config = config
+    this.#client = new OpenAI({
+      baseURL: config.baseURL,
+      apiKey: config.apiKey ?? 'dummy',
+      timeout: config.timeout,
+      maxRetries: config.maxRetries,
+    })
+  }
+
+  // -------------------------------------------------------------------------
+  // healthCheck()
+  // -------------------------------------------------------------------------
+
+  /**
+   * Check whether the vLLM server is reachable by hitting `GET {baseURL}/health`.
+   *
+   * Returns `true` if the server responds with a 2xx status, `false` otherwise.
+   */
+  async healthCheck(): Promise<boolean> {
+    try {
+      // Strip trailing /v1 if present to hit the root health endpoint
+      const base = this.#config.baseURL.replace(/\/v1\/?$/, '')
+      const response = await fetch(`${base}/health`, {
+        signal: AbortSignal.timeout(this.#config.timeout ?? 5000),
+      })
+      return response.ok
+    } catch {
+      return false
+    }
+  }
+
+  // -------------------------------------------------------------------------
+  // chat()
+  // -------------------------------------------------------------------------
+
+  async chat(messages: LLMMessage[], options: LLMChatOptions): Promise<LLMResponse> {
+    const openAIMessages = buildOpenAIMessageList(messages, options.systemPrompt)
+
+    const completion = await this.#client.chat.completions.create(
+      {
+        model: options.model ?? this.#config.model,
+        messages: openAIMessages,
+        max_tokens: options.maxTokens,
+        temperature: options.temperature,
+        tools: options.tools ? options.tools.map(toOpenAITool) : undefined,
+        stream: false,
+      },
+      {
+        signal: options.abortSignal,
+      },
+    )
+
+    return fromOpenAICompletion(completion)
+  }
+
+  // -------------------------------------------------------------------------
+  // stream()
+  // -------------------------------------------------------------------------
+
+  async *stream(
+    messages: LLMMessage[],
+    options: LLMStreamOptions,
+  ): AsyncIterable<StreamEvent> {
+    const openAIMessages = buildOpenAIMessageList(messages, options.systemPrompt)
+
+    const streamResponse = await this.#client.chat.completions.create(
+      {
+        model: options.model ?? this.#config.model,
+        messages: openAIMessages,
+        max_tokens: options.maxTokens,
+        temperature: options.temperature,
+        tools: options.tools ? options.tools.map(toOpenAITool) : undefined,
+        stream: true,
+        stream_options: { include_usage: true },
+      },
+      {
+        signal: options.abortSignal,
+      },
+    )
+
+    let completionId = ''
+    let completionModel = ''
+    let finalFinishReason: string = 'stop'
+    let inputTokens = 0
+    let outputTokens = 0
+
+    const toolCallBuffers = new Map<
+      number,
+      { id: string; name: string; argsJson: string }
+    >()
+
+    let fullText = ''
+
+    try {
+      for await (const chunk of streamResponse) {
+        completionId = chunk.id
+        completionModel = chunk.model
+
+        if (chunk.usage !== null && chunk.usage !== undefined) {
+          inputTokens = chunk.usage.prompt_tokens
+          outputTokens = chunk.usage.completion_tokens
+        }
+
+        const choice: ChatCompletionChunk.Choice | undefined = chunk.choices[0]
+        if (choice === undefined) continue
+
+        const delta = choice.delta
+
+        if (delta.content !== null && delta.content !== undefined) {
+          fullText += delta.content
+          const textEvent: StreamEvent = { type: 'text', data: delta.content }
+          yield textEvent
+        }
+
+        for (const toolCallDelta of delta.tool_calls ?? []) {
+          const idx = toolCallDelta.index
+
+          if (!toolCallBuffers.has(idx)) {
+            toolCallBuffers.set(idx, {
+              id: toolCallDelta.id ?? '',
+              name: toolCallDelta.function?.name ?? '',
+              argsJson: '',
+            })
+          }
+
+          const buf = toolCallBuffers.get(idx)
+          if (buf !== undefined) {
+            if (toolCallDelta.id) buf.id = toolCallDelta.id
+            if (toolCallDelta.function?.name) buf.name = toolCallDelta.function.name
+            if (toolCallDelta.function?.arguments) {
+              buf.argsJson += toolCallDelta.function.arguments
+            }
+          }
+        }
+
+        if (choice.finish_reason !== null && choice.finish_reason !== undefined) {
+          finalFinishReason = choice.finish_reason
+        }
+      }
+
+      const finalToolUseBlocks: ToolUseBlock[] = []
+      for (const buf of toolCallBuffers.values()) {
+        let parsedInput: Record<string, unknown> = {}
+        try {
+          const parsed: unknown = JSON.parse(buf.argsJson)
+          if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) {
+            parsedInput = parsed as Record<string, unknown>
+          }
+        } catch {
+          // Malformed JSON — surface as empty object.
+        }
+
+        const toolUseBlock: ToolUseBlock = {
+          type: 'tool_use',
+          id: buf.id,
+          name: buf.name,
+          input: parsedInput,
+        }
+        finalToolUseBlocks.push(toolUseBlock)
+        const toolUseEvent: StreamEvent = { type: 'tool_use', data: toolUseBlock }
+        yield toolUseEvent
+      }
+
+      const doneContent: ContentBlock[] = []
+      if (fullText.length > 0) {
+        const textBlock: TextBlock = { type: 'text', text: fullText }
+        doneContent.push(textBlock)
+      }
+      doneContent.push(...finalToolUseBlocks)
+
+      const finalResponse: LLMResponse = {
+        id: completionId,
+        content: doneContent,
+        model: completionModel,
+        stop_reason: normalizeFinishReason(finalFinishReason),
+        usage: { input_tokens: inputTokens, output_tokens: outputTokens },
+      }
+
+      const doneEvent: StreamEvent = { type: 'done', data: finalResponse }
+      yield doneEvent
+    } catch (err) {
+      const error = err instanceof Error ? err : new Error(String(err))
+      const errorEvent: StreamEvent = { type: 'error', data: error }
+      yield errorEvent
+    }
+  }
+}
diff --git a/src/types.ts b/src/types.ts
index f980c68..8b150b6 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -186,7 +186,7 @@ export interface ToolDefinition<TInput = Record<string, unknown>> {
 export interface AgentConfig {
   readonly name: string
   readonly model: string
-  readonly provider?: 'anthropic' | 'openai'
+  readonly provider?: 'anthropic' | 'openai' | 'vllm'
   readonly systemPrompt?: string
   /** Names of tools (from the tool registry) available to this agent. */
   readonly tools?: readonly string[]
@@ -285,10 +285,28 @@ export interface OrchestratorEvent {
 export interface OrchestratorConfig {
   readonly maxConcurrency?: number
   readonly defaultModel?: string
-  readonly defaultProvider?: 'anthropic' | 'openai'
+  readonly defaultProvider?: 'anthropic' | 'openai' | 'vllm'
   onProgress?: (event: OrchestratorEvent) => void
 }
 
+// ---------------------------------------------------------------------------
+// vLLM configuration
+// ---------------------------------------------------------------------------
+
+/** Configuration for connecting to a vLLM inference server. */
+export interface VLLMConfig {
+  /** Base URL of the vLLM server (e.g. `'http://localhost:8000/v1'`). */
+  readonly baseURL: string
+  /** Model name to use for requests (e.g. `'meta-llama/Llama-3-70b-chat-hf'`). */
+  readonly model: string
+  /** Optional API key for authenticated vLLM deployments. */
+  readonly apiKey?: string
+  /** Request timeout in milliseconds. */
+  readonly timeout?: number
+  /** Maximum number of retries on transient errors. */
+  readonly maxRetries?: number
+}
+
 // ---------------------------------------------------------------------------
 // Memory
 // ---------------------------------------------------------------------------