feat(llm): add fallback tool-call extraction for local models (#15)

Local models (Ollama, vLLM) sometimes return tool calls as text instead
of using the native tool_calls wire format. This adds a safety-net
extractor that parses tool calls from model text output when native
tool_calls is empty.

- Add text-tool-extractor with support for bare JSON, code fences,
  and Hermes <tool_call> tags
- Wire fallback into OpenAI adapter chat() and stream() paths
- Add onWarning callback when model ignores configured tools
- Add timeoutMs on AgentConfig for per-run abort (local models can
  be slow)
- Add 26 tests for extractor and fallback behavior
- Document local model compatibility in README
This commit is contained in:
JackChen 2026-04-05 03:20:20 +08:00
parent 9a81a13982
commit bc31008f4e
11 changed files with 691 additions and 8 deletions

View File

@ -197,6 +197,33 @@ Verified local models with tool-calling: **Gemma 4** (see [example 08](examples/
Any OpenAI-compatible API should work via `provider: 'openai'` + `baseURL` (DeepSeek, Groq, Mistral, Qwen, MiniMax, etc.). **Grok now has first-class support** via `provider: 'grok'`. Any OpenAI-compatible API should work via `provider: 'openai'` + `baseURL` (DeepSeek, Groq, Mistral, Qwen, MiniMax, etc.). **Grok now has first-class support** via `provider: 'grok'`.
### Local Model Tool-Calling
The framework supports tool-calling with local models served by Ollama, vLLM, LM Studio, or llama.cpp. Tool-calling is handled natively by these servers via the OpenAI-compatible API.
**Verified models:** Gemma 4, Llama 3.1, Qwen 3, Mistral, Phi-4. See the full list at [ollama.com/search?c=tools](https://ollama.com/search?c=tools).
**Fallback extraction:** If a local model returns tool calls as text instead of using the `tool_calls` wire format (common with thinking models or misconfigured servers), the framework automatically extracts them from the text output.
**Timeout:** Local inference can be slow. Use `timeoutMs` on `AgentConfig` to prevent indefinite hangs:
```typescript
const localAgent: AgentConfig = {
name: 'local',
model: 'llama3.1',
provider: 'openai',
baseURL: 'http://localhost:11434/v1',
apiKey: 'ollama',
tools: ['bash', 'file_read'],
timeoutMs: 120_000, // abort after 2 minutes
}
```
**Troubleshooting:**
- Model not calling tools? Ensure it appears in Ollama's [Tools category](https://ollama.com/search?c=tools). Not all models support tool-calling.
- Using Ollama? Update to the latest version (`ollama update`) — older versions have known tool-calling bugs.
- Proxy interfering? Use `no_proxy=localhost` when running against local servers.
### LLM Configuration Examples ### LLM Configuration Examples
```typescript ```typescript

View File

@ -64,6 +64,7 @@ Your review MUST include these sections:
Be specific and constructive. Reference line numbers or function names when possible.`, Be specific and constructive. Reference line numbers or function names when possible.`,
tools: ['file_read'], tools: ['file_read'],
maxTurns: 4, maxTurns: 4,
timeoutMs: 120_000, // 2 min — local models can be slow
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------

View File

@ -293,10 +293,16 @@ export class Agent {
} }
// Auto-generate runId when onTrace is provided but runId is missing // Auto-generate runId when onTrace is provided but runId is missing
const needsRunId = callerOptions?.onTrace && !callerOptions.runId const needsRunId = callerOptions?.onTrace && !callerOptions.runId
// Create a fresh timeout signal per run (not per runner) so that
// each run() / prompt() call gets its own timeout window.
const timeoutSignal = this.config.timeoutMs !== undefined && this.config.timeoutMs > 0
? AbortSignal.timeout(this.config.timeoutMs)
: undefined
const runOptions: RunOptions = { const runOptions: RunOptions = {
...callerOptions, ...callerOptions,
onMessage: internalOnMessage, onMessage: internalOnMessage,
...(needsRunId ? { runId: generateRunId() } : undefined), ...(needsRunId ? { runId: generateRunId() } : undefined),
...(timeoutSignal ? { abortSignal: timeoutSignal } : undefined),
} }
const result = await runner.run(messages, runOptions) const result = await runner.run(messages, runOptions)
@ -466,8 +472,12 @@ export class Agent {
} }
const runner = await this.getRunner() const runner = await this.getRunner()
// Fresh timeout per stream call, same as executeRun.
const timeoutSignal = this.config.timeoutMs !== undefined && this.config.timeoutMs > 0
? AbortSignal.timeout(this.config.timeoutMs)
: undefined
for await (const event of runner.stream(messages)) { for await (const event of runner.stream(messages, timeoutSignal ? { abortSignal: timeoutSignal } : {})) {
if (event.type === 'done') { if (event.type === 'done') {
const result = event.data as import('./runner.js').RunResult const result = event.data as import('./runner.js').RunResult
this.state.tokenUsage = addUsage(this.state.tokenUsage, result.tokenUsage) this.state.tokenUsage = addUsage(this.state.tokenUsage, result.tokenUsage)

View File

@ -78,6 +78,11 @@ export interface RunOptions {
readonly onToolResult?: (name: string, result: ToolResult) => void readonly onToolResult?: (name: string, result: ToolResult) => void
/** Fired after each complete {@link LLMMessage} is appended. */ /** Fired after each complete {@link LLMMessage} is appended. */
readonly onMessage?: (message: LLMMessage) => void readonly onMessage?: (message: LLMMessage) => void
/**
* Fired when the runner detects a potential configuration issue.
* For example, when a model appears to ignore tool definitions.
*/
readonly onWarning?: (message: string) => void
/** Trace callback for observability spans. Async callbacks are safe. */ /** Trace callback for observability spans. Async callbacks are safe. */
readonly onTrace?: (event: TraceEvent) => void | Promise<void> readonly onTrace?: (event: TraceEvent) => void | Promise<void>
/** Run ID for trace correlation. */ /** Run ID for trace correlation. */
@ -86,6 +91,11 @@ export interface RunOptions {
readonly taskId?: string readonly taskId?: string
/** Agent name for trace correlation (overrides RunnerOptions.agentName). */ /** Agent name for trace correlation (overrides RunnerOptions.agentName). */
readonly traceAgent?: string readonly traceAgent?: string
/**
* Per-call abort signal. When set, takes precedence over the static
* {@link RunnerOptions.abortSignal}. Useful for per-run timeouts.
*/
readonly abortSignal?: AbortSignal
} }
/** The aggregated result returned when a full run completes. */ /** The aggregated result returned when a full run completes. */
@ -235,13 +245,16 @@ export class AgentRunner {
? allDefs.filter(d => this.options.allowedTools!.includes(d.name)) ? allDefs.filter(d => this.options.allowedTools!.includes(d.name))
: allDefs : allDefs
// Per-call abortSignal takes precedence over the static one.
const effectiveAbortSignal = options.abortSignal ?? this.options.abortSignal
const baseChatOptions: LLMChatOptions = { const baseChatOptions: LLMChatOptions = {
model: this.options.model, model: this.options.model,
tools: toolDefs.length > 0 ? toolDefs : undefined, tools: toolDefs.length > 0 ? toolDefs : undefined,
maxTokens: this.options.maxTokens, maxTokens: this.options.maxTokens,
temperature: this.options.temperature, temperature: this.options.temperature,
systemPrompt: this.options.systemPrompt, systemPrompt: this.options.systemPrompt,
abortSignal: this.options.abortSignal, abortSignal: effectiveAbortSignal,
} }
try { try {
@ -250,7 +263,7 @@ export class AgentRunner {
// ----------------------------------------------------------------- // -----------------------------------------------------------------
while (true) { while (true) {
// Respect abort before each LLM call. // Respect abort before each LLM call.
if (this.options.abortSignal?.aborted) { if (effectiveAbortSignal?.aborted) {
break break
} }
@ -311,6 +324,15 @@ export class AgentRunner {
// Step 3: Decide whether to continue looping. // Step 3: Decide whether to continue looping.
// ------------------------------------------------------------------ // ------------------------------------------------------------------
if (toolUseBlocks.length === 0) { if (toolUseBlocks.length === 0) {
// Warn on first turn if tools were provided but model didn't use them.
if (turns === 1 && toolDefs.length > 0 && options.onWarning) {
const agentName = this.options.agentName ?? 'unknown'
options.onWarning(
`Agent "${agentName}" has ${toolDefs.length} tool(s) available but the model ` +
`returned no tool calls. If using a local model, verify it supports tool calling ` +
`(see https://ollama.com/search?c=tools).`,
)
}
// No tools requested — this is the terminal assistant turn. // No tools requested — this is the terminal assistant turn.
finalOutput = turnText finalOutput = turnText
break break

View File

@ -313,7 +313,8 @@ export class CopilotAdapter implements LLMAdapter {
}, },
) )
return fromOpenAICompletion(completion) const toolNames = options.tools?.map(t => t.name)
return fromOpenAICompletion(completion, toolNames)
} }
// ------------------------------------------------------------------------- // -------------------------------------------------------------------------

View File

@ -25,6 +25,7 @@ import type {
TextBlock, TextBlock,
ToolUseBlock, ToolUseBlock,
} from '../types.js' } from '../types.js'
import { extractToolCallsFromText } from '../tool/text-tool-extractor.js'
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Framework → OpenAI // Framework → OpenAI
@ -166,8 +167,18 @@ function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessa
* *
* Takes only the first choice (index 0), consistent with how the framework * Takes only the first choice (index 0), consistent with how the framework
* is designed for single-output agents. * is designed for single-output agents.
*
* @param completion - The raw OpenAI completion.
* @param knownToolNames - Optional whitelist of tool names. When the model
* returns no `tool_calls` but the text contains JSON
* that looks like a tool call, the fallback extractor
* uses this list to validate matches. Pass the names
* of tools sent in the request for best results.
*/ */
export function fromOpenAICompletion(completion: ChatCompletion): LLMResponse { export function fromOpenAICompletion(
completion: ChatCompletion,
knownToolNames?: string[],
): LLMResponse {
const choice = completion.choices[0] const choice = completion.choices[0]
if (choice === undefined) { if (choice === undefined) {
throw new Error('OpenAI returned a completion with no choices') throw new Error('OpenAI returned a completion with no choices')
@ -201,7 +212,35 @@ export function fromOpenAICompletion(completion: ChatCompletion): LLMResponse {
content.push(toolUseBlock) content.push(toolUseBlock)
} }
const stopReason = normalizeFinishReason(choice.finish_reason ?? 'stop') // ---------------------------------------------------------------------------
// Fallback: extract tool calls from text when native tool_calls is empty.
//
// Some local models (Ollama thinking models, misconfigured vLLM) return tool
// calls as plain text instead of using the tool_calls wire format. When we
// have text but no tool_calls, try to extract them from the text.
// ---------------------------------------------------------------------------
const hasNativeToolCalls = (message.tool_calls ?? []).length > 0
if (
!hasNativeToolCalls &&
knownToolNames !== undefined &&
knownToolNames.length > 0 &&
message.content !== null &&
message.content !== undefined &&
message.content.length > 0
) {
const extracted = extractToolCallsFromText(message.content, knownToolNames)
if (extracted.length > 0) {
content.push(...extracted)
}
}
const hasToolUseBlocks = content.some(b => b.type === 'tool_use')
const rawStopReason = choice.finish_reason ?? 'stop'
// If we extracted tool calls from text but the finish_reason was 'stop',
// correct it to 'tool_use' so the agent runner continues the loop.
const stopReason = hasToolUseBlocks && rawStopReason === 'stop'
? 'tool_use'
: normalizeFinishReason(rawStopReason)
return { return {
id: completion.id, id: completion.id,

View File

@ -54,6 +54,7 @@ import {
normalizeFinishReason, normalizeFinishReason,
buildOpenAIMessageList, buildOpenAIMessageList,
} from './openai-common.js' } from './openai-common.js'
import { extractToolCallsFromText } from '../tool/text-tool-extractor.js'
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Adapter implementation // Adapter implementation
@ -104,7 +105,8 @@ export class OpenAIAdapter implements LLMAdapter {
}, },
) )
return fromOpenAICompletion(completion) const toolNames = options.tools?.map(t => t.name)
return fromOpenAICompletion(completion, toolNames)
} }
// ------------------------------------------------------------------------- // -------------------------------------------------------------------------
@ -241,11 +243,29 @@ export class OpenAIAdapter implements LLMAdapter {
} }
doneContent.push(...finalToolUseBlocks) doneContent.push(...finalToolUseBlocks)
// Fallback: extract tool calls from text when streaming produced no
// native tool_calls (same logic as fromOpenAICompletion).
if (finalToolUseBlocks.length === 0 && fullText.length > 0 && options.tools) {
const toolNames = options.tools.map(t => t.name)
const extracted = extractToolCallsFromText(fullText, toolNames)
if (extracted.length > 0) {
doneContent.push(...extracted)
for (const block of extracted) {
yield { type: 'tool_use', data: block } satisfies StreamEvent
}
}
}
const hasToolUseBlocks = doneContent.some(b => b.type === 'tool_use')
const resolvedStopReason = hasToolUseBlocks && finalFinishReason === 'stop'
? 'tool_use'
: normalizeFinishReason(finalFinishReason)
const finalResponse: LLMResponse = { const finalResponse: LLMResponse = {
id: completionId, id: completionId,
content: doneContent, content: doneContent,
model: completionModel, model: completionModel,
stop_reason: normalizeFinishReason(finalFinishReason), stop_reason: resolvedStopReason,
usage: { input_tokens: inputTokens, output_tokens: outputTokens }, usage: { input_tokens: inputTokens, output_tokens: outputTokens },
} }

View File

@ -0,0 +1,228 @@
/**
* @fileoverview Fallback tool-call extractor for local models.
*
* When a local model (Ollama, vLLM, LM Studio) returns tool calls as plain
* text instead of using the OpenAI `tool_calls` wire format, this module
* attempts to extract them from the text output.
*
* Common scenarios:
* - Ollama thinking-model bug: tool call JSON ends up inside unclosed `<think>` tags
* - Model outputs raw JSON tool calls without the server parsing them
* - Model wraps tool calls in markdown code fences
* - Hermes-format `<tool_call>` tags
*
* This is a **safety net**, not the primary path. Native `tool_calls` from
* the server are always preferred.
*/
import type { ToolUseBlock } from '../types.js'
// ---------------------------------------------------------------------------
// ID generation
// ---------------------------------------------------------------------------
let callCounter = 0
/** Generate a unique tool-call ID for extracted calls. */
function generateToolCallId(): string {
return `extracted_call_${Date.now()}_${++callCounter}`
}
// ---------------------------------------------------------------------------
// Internal parsers
// ---------------------------------------------------------------------------
/**
* Try to parse a single JSON object as a tool call.
*
* Accepted shapes:
* ```json
* { "name": "bash", "arguments": { "command": "ls" } }
* { "name": "bash", "parameters": { "command": "ls" } }
* { "function": { "name": "bash", "arguments": { "command": "ls" } } }
* ```
*/
function parseToolCallJSON(
json: unknown,
knownToolNames: ReadonlySet<string>,
): ToolUseBlock | null {
if (json === null || typeof json !== 'object' || Array.isArray(json)) {
return null
}
const obj = json as Record<string, unknown>
// Shape: { function: { name, arguments } }
if (typeof obj['function'] === 'object' && obj['function'] !== null) {
const fn = obj['function'] as Record<string, unknown>
return parseFlat(fn, knownToolNames)
}
// Shape: { name, arguments|parameters }
return parseFlat(obj, knownToolNames)
}
function parseFlat(
obj: Record<string, unknown>,
knownToolNames: ReadonlySet<string>,
): ToolUseBlock | null {
const name = obj['name']
if (typeof name !== 'string' || name.length === 0) return null
// Whitelist check — don't treat arbitrary JSON as a tool call
if (knownToolNames.size > 0 && !knownToolNames.has(name)) return null
let input: Record<string, unknown> = {}
const args = obj['arguments'] ?? obj['parameters'] ?? obj['input']
if (args !== null && args !== undefined) {
if (typeof args === 'string') {
try {
const parsed = JSON.parse(args)
if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) {
input = parsed as Record<string, unknown>
}
} catch {
// Malformed — use empty input
}
} else if (typeof args === 'object' && !Array.isArray(args)) {
input = args as Record<string, unknown>
}
}
return {
type: 'tool_use',
id: generateToolCallId(),
name,
input,
}
}
// ---------------------------------------------------------------------------
// JSON extraction from text
// ---------------------------------------------------------------------------
/**
* Find all top-level JSON objects in a string by tracking brace depth.
* Returns the parsed objects (not sub-objects).
*/
function extractJSONObjects(text: string): unknown[] {
const results: unknown[] = []
let depth = 0
let start = -1
let inString = false
let escape = false
for (let i = 0; i < text.length; i++) {
const ch = text[i]!
if (escape) {
escape = false
continue
}
if (ch === '\\' && inString) {
escape = true
continue
}
if (ch === '"') {
inString = !inString
continue
}
if (inString) continue
if (ch === '{') {
if (depth === 0) start = i
depth++
} else if (ch === '}') {
depth--
if (depth === 0 && start !== -1) {
const candidate = text.slice(start, i + 1)
try {
results.push(JSON.parse(candidate))
} catch {
// Not valid JSON — skip
}
start = -1
}
}
}
return results
}
// ---------------------------------------------------------------------------
// Hermes format: <tool_call>...</tool_call>
// ---------------------------------------------------------------------------
function extractHermesToolCalls(
text: string,
knownToolNames: ReadonlySet<string>,
): ToolUseBlock[] {
const results: ToolUseBlock[] = []
for (const match of text.matchAll(/<tool_call>\s*([\s\S]*?)\s*<\/tool_call>/g)) {
const inner = match[1]!.trim()
try {
const parsed: unknown = JSON.parse(inner)
const block = parseToolCallJSON(parsed, knownToolNames)
if (block !== null) results.push(block)
} catch {
// Malformed hermes content — skip
}
}
return results
}
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
/**
* Attempt to extract tool calls from a model's text output.
*
* Tries multiple strategies in order:
* 1. Hermes `<tool_call>` tags
* 2. JSON objects in text (bare or inside code fences)
*
* @param text - The model's text output.
* @param knownToolNames - Whitelist of registered tool names. When non-empty,
* only JSON objects whose `name` matches a known tool
* are treated as tool calls.
* @returns Extracted {@link ToolUseBlock}s, or an empty array if none found.
*/
export function extractToolCallsFromText(
text: string,
knownToolNames: string[],
): ToolUseBlock[] {
if (text.length === 0) return []
const nameSet = new Set(knownToolNames)
// Strategy 1: Hermes format
const hermesResults = extractHermesToolCalls(text, nameSet)
if (hermesResults.length > 0) return hermesResults
// Strategy 2: Strip code fences, then extract JSON objects
const stripped = text.replace(/```(?:json)?\s*\n?([\s\S]*?)\n?\s*```/g, '$1')
const jsonObjects = extractJSONObjects(stripped)
const results: ToolUseBlock[] = []
for (const obj of jsonObjects) {
// Handle array of tool calls
if (Array.isArray(obj)) {
for (const item of obj) {
const block = parseToolCallJSON(item, nameSet)
if (block !== null) results.push(block)
}
continue
}
const block = parseToolCallJSON(obj, nameSet)
if (block !== null) results.push(block)
}
return results
}

View File

@ -209,6 +209,12 @@ export interface AgentConfig {
readonly maxTurns?: number readonly maxTurns?: number
readonly maxTokens?: number readonly maxTokens?: number
readonly temperature?: number readonly temperature?: number
/**
* Maximum wall-clock time (in milliseconds) for the entire agent run.
* When exceeded, the run is aborted via `AbortSignal.timeout()`.
* Useful for local models where inference can be unpredictably slow.
*/
readonly timeoutMs?: number
/** /**
* Optional Zod schema for structured output. When set, the agent's final * Optional Zod schema for structured output. When set, the agent's final
* output is parsed as JSON and validated against this schema. A single * output is parsed as JSON and validated against this schema. A single

View File

@ -0,0 +1,159 @@
import { describe, it, expect } from 'vitest'
import { fromOpenAICompletion } from '../src/llm/openai-common.js'
import type { ChatCompletion } from 'openai/resources/chat/completions/index.js'
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function makeCompletion(overrides: {
content?: string | null
tool_calls?: ChatCompletion.Choice['message']['tool_calls']
finish_reason?: string
}): ChatCompletion {
return {
id: 'chatcmpl-test',
object: 'chat.completion',
created: Date.now(),
model: 'test-model',
choices: [
{
index: 0,
message: {
role: 'assistant',
content: overrides.content ?? null,
tool_calls: overrides.tool_calls,
refusal: null,
},
finish_reason: (overrides.finish_reason ?? 'stop') as 'stop' | 'tool_calls',
logprobs: null,
},
],
usage: {
prompt_tokens: 10,
completion_tokens: 20,
total_tokens: 30,
},
}
}
const TOOL_NAMES = ['bash', 'file_read', 'file_write']
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
describe('fromOpenAICompletion fallback extraction', () => {
it('returns normal tool_calls when present (no fallback)', () => {
const completion = makeCompletion({
content: 'Let me run a command.',
tool_calls: [
{
id: 'call_123',
type: 'function',
function: {
name: 'bash',
arguments: '{"command": "ls"}',
},
},
],
finish_reason: 'tool_calls',
})
const response = fromOpenAICompletion(completion, TOOL_NAMES)
const toolBlocks = response.content.filter(b => b.type === 'tool_use')
expect(toolBlocks).toHaveLength(1)
expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.name).toBe('bash')
expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.id).toBe('call_123')
expect(response.stop_reason).toBe('tool_use')
})
it('extracts tool calls from text when tool_calls is absent', () => {
const completion = makeCompletion({
content: 'I will run this:\n{"name": "bash", "arguments": {"command": "pwd"}}',
finish_reason: 'stop',
})
const response = fromOpenAICompletion(completion, TOOL_NAMES)
const toolBlocks = response.content.filter(b => b.type === 'tool_use')
expect(toolBlocks).toHaveLength(1)
expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.name).toBe('bash')
expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.input).toEqual({ command: 'pwd' })
// stop_reason should be corrected to tool_use
expect(response.stop_reason).toBe('tool_use')
})
it('does not fallback when knownToolNames is not provided', () => {
const completion = makeCompletion({
content: '{"name": "bash", "arguments": {"command": "ls"}}',
finish_reason: 'stop',
})
const response = fromOpenAICompletion(completion)
const toolBlocks = response.content.filter(b => b.type === 'tool_use')
expect(toolBlocks).toHaveLength(0)
expect(response.stop_reason).toBe('end_turn')
})
it('does not fallback when knownToolNames is empty', () => {
const completion = makeCompletion({
content: '{"name": "bash", "arguments": {"command": "ls"}}',
finish_reason: 'stop',
})
const response = fromOpenAICompletion(completion, [])
const toolBlocks = response.content.filter(b => b.type === 'tool_use')
expect(toolBlocks).toHaveLength(0)
expect(response.stop_reason).toBe('end_turn')
})
it('returns plain text when no tool calls found in text', () => {
const completion = makeCompletion({
content: 'Hello! How can I help you today?',
finish_reason: 'stop',
})
const response = fromOpenAICompletion(completion, TOOL_NAMES)
const toolBlocks = response.content.filter(b => b.type === 'tool_use')
expect(toolBlocks).toHaveLength(0)
expect(response.stop_reason).toBe('end_turn')
})
it('preserves text block alongside extracted tool blocks', () => {
const completion = makeCompletion({
content: 'Let me check:\n{"name": "file_read", "arguments": {"path": "/tmp/x"}}',
finish_reason: 'stop',
})
const response = fromOpenAICompletion(completion, TOOL_NAMES)
const textBlocks = response.content.filter(b => b.type === 'text')
const toolBlocks = response.content.filter(b => b.type === 'tool_use')
expect(textBlocks).toHaveLength(1)
expect(toolBlocks).toHaveLength(1)
})
it('does not double-extract when native tool_calls already present', () => {
// Text also contains a tool call JSON, but native tool_calls is populated.
// The fallback should NOT run.
const completion = makeCompletion({
content: '{"name": "file_read", "arguments": {"path": "/tmp/y"}}',
tool_calls: [
{
id: 'call_native',
type: 'function',
function: {
name: 'bash',
arguments: '{"command": "ls"}',
},
},
],
finish_reason: 'tool_calls',
})
const response = fromOpenAICompletion(completion, TOOL_NAMES)
const toolBlocks = response.content.filter(b => b.type === 'tool_use')
// Should only have the native one, not the text-extracted one
expect(toolBlocks).toHaveLength(1)
expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.id).toBe('call_native')
})
})

View File

@ -0,0 +1,170 @@
import { describe, it, expect } from 'vitest'
import { extractToolCallsFromText } from '../src/tool/text-tool-extractor.js'
const TOOLS = ['bash', 'file_read', 'file_write']
describe('extractToolCallsFromText', () => {
// -------------------------------------------------------------------------
// No tool calls
// -------------------------------------------------------------------------
it('returns empty array for empty text', () => {
expect(extractToolCallsFromText('', TOOLS)).toEqual([])
})
it('returns empty array for plain text with no JSON', () => {
expect(extractToolCallsFromText('Hello, I am a helpful assistant.', TOOLS)).toEqual([])
})
it('returns empty array for JSON that does not match any known tool', () => {
const text = '{"name": "unknown_tool", "arguments": {"x": 1}}'
expect(extractToolCallsFromText(text, TOOLS)).toEqual([])
})
// -------------------------------------------------------------------------
// Bare JSON
// -------------------------------------------------------------------------
it('extracts a bare JSON tool call with "arguments"', () => {
const text = 'I will run this command:\n{"name": "bash", "arguments": {"command": "ls -la"}}'
const result = extractToolCallsFromText(text, TOOLS)
expect(result).toHaveLength(1)
expect(result[0]!.type).toBe('tool_use')
expect(result[0]!.name).toBe('bash')
expect(result[0]!.input).toEqual({ command: 'ls -la' })
expect(result[0]!.id).toMatch(/^extracted_call_/)
})
it('extracts a bare JSON tool call with "parameters"', () => {
const text = '{"name": "file_read", "parameters": {"path": "/tmp/test.txt"}}'
const result = extractToolCallsFromText(text, TOOLS)
expect(result).toHaveLength(1)
expect(result[0]!.name).toBe('file_read')
expect(result[0]!.input).toEqual({ path: '/tmp/test.txt' })
})
it('extracts a bare JSON tool call with "input"', () => {
const text = '{"name": "bash", "input": {"command": "pwd"}}'
const result = extractToolCallsFromText(text, TOOLS)
expect(result).toHaveLength(1)
expect(result[0]!.name).toBe('bash')
expect(result[0]!.input).toEqual({ command: 'pwd' })
})
it('extracts { function: { name, arguments } } shape', () => {
const text = '{"function": {"name": "bash", "arguments": {"command": "echo hi"}}}'
const result = extractToolCallsFromText(text, TOOLS)
expect(result).toHaveLength(1)
expect(result[0]!.name).toBe('bash')
expect(result[0]!.input).toEqual({ command: 'echo hi' })
})
it('handles string-encoded arguments', () => {
const text = '{"name": "bash", "arguments": "{\\"command\\": \\"ls\\"}"}'
const result = extractToolCallsFromText(text, TOOLS)
expect(result).toHaveLength(1)
expect(result[0]!.input).toEqual({ command: 'ls' })
})
// -------------------------------------------------------------------------
// Multiple tool calls
// -------------------------------------------------------------------------
it('extracts multiple tool calls from text', () => {
const text = `Let me do two things:
{"name": "bash", "arguments": {"command": "ls"}}
And then:
{"name": "file_read", "arguments": {"path": "/tmp/x"}}`
const result = extractToolCallsFromText(text, TOOLS)
expect(result).toHaveLength(2)
expect(result[0]!.name).toBe('bash')
expect(result[1]!.name).toBe('file_read')
})
// -------------------------------------------------------------------------
// Code fence wrapped
// -------------------------------------------------------------------------
it('extracts tool call from markdown code fence', () => {
const text = 'Here is the tool call:\n```json\n{"name": "bash", "arguments": {"command": "whoami"}}\n```'
const result = extractToolCallsFromText(text, TOOLS)
expect(result).toHaveLength(1)
expect(result[0]!.name).toBe('bash')
expect(result[0]!.input).toEqual({ command: 'whoami' })
})
it('extracts tool call from code fence without language tag', () => {
const text = '```\n{"name": "file_write", "arguments": {"path": "/tmp/a.txt", "content": "hi"}}\n```'
const result = extractToolCallsFromText(text, TOOLS)
expect(result).toHaveLength(1)
expect(result[0]!.name).toBe('file_write')
})
// -------------------------------------------------------------------------
// Hermes format
// -------------------------------------------------------------------------
it('extracts tool call from <tool_call> tags', () => {
const text = '<tool_call>\n{"name": "bash", "arguments": {"command": "date"}}\n</tool_call>'
const result = extractToolCallsFromText(text, TOOLS)
expect(result).toHaveLength(1)
expect(result[0]!.name).toBe('bash')
expect(result[0]!.input).toEqual({ command: 'date' })
})
it('extracts multiple hermes tool calls', () => {
const text = `<tool_call>{"name": "bash", "arguments": {"command": "ls"}}</tool_call>
Some text in between
<tool_call>{"name": "file_read", "arguments": {"path": "/tmp/x"}}</tool_call>`
const result = extractToolCallsFromText(text, TOOLS)
expect(result).toHaveLength(2)
expect(result[0]!.name).toBe('bash')
expect(result[1]!.name).toBe('file_read')
})
// -------------------------------------------------------------------------
// Edge cases
// -------------------------------------------------------------------------
it('skips malformed JSON gracefully', () => {
const text = '{"name": "bash", "arguments": {invalid json}}'
const result = extractToolCallsFromText(text, TOOLS)
expect(result).toEqual([])
})
it('skips JSON objects without a name field', () => {
const text = '{"command": "ls", "arguments": {"x": 1}}'
const result = extractToolCallsFromText(text, TOOLS)
expect(result).toEqual([])
})
it('works with empty knownToolNames (no whitelist filtering)', () => {
const text = '{"name": "anything", "arguments": {"x": 1}}'
const result = extractToolCallsFromText(text, [])
expect(result).toHaveLength(1)
expect(result[0]!.name).toBe('anything')
})
it('generates unique IDs for each extracted call', () => {
const text = `{"name": "bash", "arguments": {"command": "a"}}
{"name": "bash", "arguments": {"command": "b"}}`
const result = extractToolCallsFromText(text, TOOLS)
expect(result).toHaveLength(2)
expect(result[0]!.id).not.toBe(result[1]!.id)
})
it('handles tool call with no arguments', () => {
const text = '{"name": "bash"}'
const result = extractToolCallsFromText(text, TOOLS)
expect(result).toHaveLength(1)
expect(result[0]!.input).toEqual({})
})
it('handles text with nested JSON objects that are not tool calls', () => {
const text = `Here is some config: {"port": 3000, "host": "localhost"}
And a tool call: {"name": "bash", "arguments": {"command": "ls"}}`
const result = extractToolCallsFromText(text, TOOLS)
expect(result).toHaveLength(1)
expect(result[0]!.name).toBe('bash')
})
})