diff --git a/README.md b/README.md index 103bdb9..b762ae6 100644 --- a/README.md +++ b/README.md @@ -197,6 +197,33 @@ Verified local models with tool-calling: **Gemma 4** (see [example 08](examples/ Any OpenAI-compatible API should work via `provider: 'openai'` + `baseURL` (DeepSeek, Groq, Mistral, Qwen, MiniMax, etc.). **Grok now has first-class support** via `provider: 'grok'`. +### Local Model Tool-Calling + +The framework supports tool-calling with local models served by Ollama, vLLM, LM Studio, or llama.cpp. Tool-calling is handled natively by these servers via the OpenAI-compatible API. + +**Verified models:** Gemma 4, Llama 3.1, Qwen 3, Mistral, Phi-4. See the full list at [ollama.com/search?c=tools](https://ollama.com/search?c=tools). + +**Fallback extraction:** If a local model returns tool calls as text instead of using the `tool_calls` wire format (common with thinking models or misconfigured servers), the framework automatically extracts them from the text output. + +**Timeout:** Local inference can be slow. Use `timeoutMs` on `AgentConfig` to prevent indefinite hangs: + +```typescript +const localAgent: AgentConfig = { + name: 'local', + model: 'llama3.1', + provider: 'openai', + baseURL: 'http://localhost:11434/v1', + apiKey: 'ollama', + tools: ['bash', 'file_read'], + timeoutMs: 120_000, // abort after 2 minutes +} +``` + +**Troubleshooting:** +- Model not calling tools? Ensure it appears in Ollama's [Tools category](https://ollama.com/search?c=tools). Not all models support tool-calling. +- Using Ollama? Update to the latest version (`ollama update`) — older versions have known tool-calling bugs. +- Proxy interfering? Use `no_proxy=localhost` when running against local servers. + ### LLM Configuration Examples ```typescript diff --git a/examples/06-local-model.ts b/examples/06-local-model.ts index d7cf292..977950b 100644 --- a/examples/06-local-model.ts +++ b/examples/06-local-model.ts @@ -64,6 +64,7 @@ Your review MUST include these sections: Be specific and constructive. Reference line numbers or function names when possible.`, tools: ['file_read'], maxTurns: 4, + timeoutMs: 120_000, // 2 min — local models can be slow } // --------------------------------------------------------------------------- diff --git a/src/agent/agent.ts b/src/agent/agent.ts index caf5a9c..3290347 100644 --- a/src/agent/agent.ts +++ b/src/agent/agent.ts @@ -50,6 +50,19 @@ import { const ZERO_USAGE: TokenUsage = { input_tokens: 0, output_tokens: 0 } +/** + * Combine two {@link AbortSignal}s so that aborting either one cancels the + * returned signal. Works on Node 18+ (no `AbortSignal.any` required). + */ +function mergeAbortSignals(a: AbortSignal, b: AbortSignal): AbortSignal { + const controller = new AbortController() + if (a.aborted || b.aborted) { controller.abort(); return controller.signal } + const abort = () => controller.abort() + a.addEventListener('abort', abort, { once: true }) + b.addEventListener('abort', abort, { once: true }) + return controller.signal +} + function addUsage(a: TokenUsage, b: TokenUsage): TokenUsage { return { input_tokens: a.input_tokens + b.input_tokens, @@ -293,10 +306,22 @@ export class Agent { } // Auto-generate runId when onTrace is provided but runId is missing const needsRunId = callerOptions?.onTrace && !callerOptions.runId + // Create a fresh timeout signal per run (not per runner) so that + // each run() / prompt() call gets its own timeout window. + const timeoutSignal = this.config.timeoutMs !== undefined && this.config.timeoutMs > 0 + ? AbortSignal.timeout(this.config.timeoutMs) + : undefined + // Merge caller-provided abortSignal with the timeout signal so that + // either cancellation source is respected. + const callerAbort = callerOptions?.abortSignal + const effectiveAbort = timeoutSignal && callerAbort + ? mergeAbortSignals(timeoutSignal, callerAbort) + : timeoutSignal ?? callerAbort const runOptions: RunOptions = { ...callerOptions, onMessage: internalOnMessage, ...(needsRunId ? { runId: generateRunId() } : undefined), + ...(effectiveAbort ? { abortSignal: effectiveAbort } : undefined), } const result = await runner.run(messages, runOptions) @@ -466,8 +491,12 @@ export class Agent { } const runner = await this.getRunner() + // Fresh timeout per stream call, same as executeRun. + const timeoutSignal = this.config.timeoutMs !== undefined && this.config.timeoutMs > 0 + ? AbortSignal.timeout(this.config.timeoutMs) + : undefined - for await (const event of runner.stream(messages)) { + for await (const event of runner.stream(messages, timeoutSignal ? { abortSignal: timeoutSignal } : {})) { if (event.type === 'done') { const result = event.data as import('./runner.js').RunResult this.state.tokenUsage = addUsage(this.state.tokenUsage, result.tokenUsage) diff --git a/src/agent/runner.ts b/src/agent/runner.ts index 113f93c..5e74254 100644 --- a/src/agent/runner.ts +++ b/src/agent/runner.ts @@ -78,6 +78,11 @@ export interface RunOptions { readonly onToolResult?: (name: string, result: ToolResult) => void /** Fired after each complete {@link LLMMessage} is appended. */ readonly onMessage?: (message: LLMMessage) => void + /** + * Fired when the runner detects a potential configuration issue. + * For example, when a model appears to ignore tool definitions. + */ + readonly onWarning?: (message: string) => void /** Trace callback for observability spans. Async callbacks are safe. */ readonly onTrace?: (event: TraceEvent) => void | Promise /** Run ID for trace correlation. */ @@ -86,6 +91,11 @@ export interface RunOptions { readonly taskId?: string /** Agent name for trace correlation (overrides RunnerOptions.agentName). */ readonly traceAgent?: string + /** + * Per-call abort signal. When set, takes precedence over the static + * {@link RunnerOptions.abortSignal}. Useful for per-run timeouts. + */ + readonly abortSignal?: AbortSignal } /** The aggregated result returned when a full run completes. */ @@ -235,13 +245,16 @@ export class AgentRunner { ? allDefs.filter(d => this.options.allowedTools!.includes(d.name)) : allDefs + // Per-call abortSignal takes precedence over the static one. + const effectiveAbortSignal = options.abortSignal ?? this.options.abortSignal + const baseChatOptions: LLMChatOptions = { model: this.options.model, tools: toolDefs.length > 0 ? toolDefs : undefined, maxTokens: this.options.maxTokens, temperature: this.options.temperature, systemPrompt: this.options.systemPrompt, - abortSignal: this.options.abortSignal, + abortSignal: effectiveAbortSignal, } try { @@ -250,7 +263,7 @@ export class AgentRunner { // ----------------------------------------------------------------- while (true) { // Respect abort before each LLM call. - if (this.options.abortSignal?.aborted) { + if (effectiveAbortSignal?.aborted) { break } @@ -311,6 +324,15 @@ export class AgentRunner { // Step 3: Decide whether to continue looping. // ------------------------------------------------------------------ if (toolUseBlocks.length === 0) { + // Warn on first turn if tools were provided but model didn't use them. + if (turns === 1 && toolDefs.length > 0 && options.onWarning) { + const agentName = this.options.agentName ?? 'unknown' + options.onWarning( + `Agent "${agentName}" has ${toolDefs.length} tool(s) available but the model ` + + `returned no tool calls. If using a local model, verify it supports tool calling ` + + `(see https://ollama.com/search?c=tools).`, + ) + } // No tools requested — this is the terminal assistant turn. finalOutput = turnText break diff --git a/src/llm/copilot.ts b/src/llm/copilot.ts index 7e829fe..44349f8 100644 --- a/src/llm/copilot.ts +++ b/src/llm/copilot.ts @@ -313,7 +313,8 @@ export class CopilotAdapter implements LLMAdapter { }, ) - return fromOpenAICompletion(completion) + const toolNames = options.tools?.map(t => t.name) + return fromOpenAICompletion(completion, toolNames) } // ------------------------------------------------------------------------- diff --git a/src/llm/openai-common.ts b/src/llm/openai-common.ts index 46fc67a..cdb16a0 100644 --- a/src/llm/openai-common.ts +++ b/src/llm/openai-common.ts @@ -25,6 +25,7 @@ import type { TextBlock, ToolUseBlock, } from '../types.js' +import { extractToolCallsFromText } from '../tool/text-tool-extractor.js' // --------------------------------------------------------------------------- // Framework → OpenAI @@ -166,8 +167,18 @@ function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessa * * Takes only the first choice (index 0), consistent with how the framework * is designed for single-output agents. + * + * @param completion - The raw OpenAI completion. + * @param knownToolNames - Optional whitelist of tool names. When the model + * returns no `tool_calls` but the text contains JSON + * that looks like a tool call, the fallback extractor + * uses this list to validate matches. Pass the names + * of tools sent in the request for best results. */ -export function fromOpenAICompletion(completion: ChatCompletion): LLMResponse { +export function fromOpenAICompletion( + completion: ChatCompletion, + knownToolNames?: string[], +): LLMResponse { const choice = completion.choices[0] if (choice === undefined) { throw new Error('OpenAI returned a completion with no choices') @@ -201,7 +212,35 @@ export function fromOpenAICompletion(completion: ChatCompletion): LLMResponse { content.push(toolUseBlock) } - const stopReason = normalizeFinishReason(choice.finish_reason ?? 'stop') + // --------------------------------------------------------------------------- + // Fallback: extract tool calls from text when native tool_calls is empty. + // + // Some local models (Ollama thinking models, misconfigured vLLM) return tool + // calls as plain text instead of using the tool_calls wire format. When we + // have text but no tool_calls, try to extract them from the text. + // --------------------------------------------------------------------------- + const hasNativeToolCalls = (message.tool_calls ?? []).length > 0 + if ( + !hasNativeToolCalls && + knownToolNames !== undefined && + knownToolNames.length > 0 && + message.content !== null && + message.content !== undefined && + message.content.length > 0 + ) { + const extracted = extractToolCallsFromText(message.content, knownToolNames) + if (extracted.length > 0) { + content.push(...extracted) + } + } + + const hasToolUseBlocks = content.some(b => b.type === 'tool_use') + const rawStopReason = choice.finish_reason ?? 'stop' + // If we extracted tool calls from text but the finish_reason was 'stop', + // correct it to 'tool_use' so the agent runner continues the loop. + const stopReason = hasToolUseBlocks && rawStopReason === 'stop' + ? 'tool_use' + : normalizeFinishReason(rawStopReason) return { id: completion.id, diff --git a/src/llm/openai.ts b/src/llm/openai.ts index e3f166f..cd48086 100644 --- a/src/llm/openai.ts +++ b/src/llm/openai.ts @@ -54,6 +54,7 @@ import { normalizeFinishReason, buildOpenAIMessageList, } from './openai-common.js' +import { extractToolCallsFromText } from '../tool/text-tool-extractor.js' // --------------------------------------------------------------------------- // Adapter implementation @@ -104,7 +105,8 @@ export class OpenAIAdapter implements LLMAdapter { }, ) - return fromOpenAICompletion(completion) + const toolNames = options.tools?.map(t => t.name) + return fromOpenAICompletion(completion, toolNames) } // ------------------------------------------------------------------------- @@ -241,11 +243,29 @@ export class OpenAIAdapter implements LLMAdapter { } doneContent.push(...finalToolUseBlocks) + // Fallback: extract tool calls from text when streaming produced no + // native tool_calls (same logic as fromOpenAICompletion). + if (finalToolUseBlocks.length === 0 && fullText.length > 0 && options.tools) { + const toolNames = options.tools.map(t => t.name) + const extracted = extractToolCallsFromText(fullText, toolNames) + if (extracted.length > 0) { + doneContent.push(...extracted) + for (const block of extracted) { + yield { type: 'tool_use', data: block } satisfies StreamEvent + } + } + } + + const hasToolUseBlocks = doneContent.some(b => b.type === 'tool_use') + const resolvedStopReason = hasToolUseBlocks && finalFinishReason === 'stop' + ? 'tool_use' + : normalizeFinishReason(finalFinishReason) + const finalResponse: LLMResponse = { id: completionId, content: doneContent, model: completionModel, - stop_reason: normalizeFinishReason(finalFinishReason), + stop_reason: resolvedStopReason, usage: { input_tokens: inputTokens, output_tokens: outputTokens }, } diff --git a/src/tool/text-tool-extractor.ts b/src/tool/text-tool-extractor.ts new file mode 100644 index 0000000..8c64d1d --- /dev/null +++ b/src/tool/text-tool-extractor.ts @@ -0,0 +1,219 @@ +/** + * @fileoverview Fallback tool-call extractor for local models. + * + * When a local model (Ollama, vLLM, LM Studio) returns tool calls as plain + * text instead of using the OpenAI `tool_calls` wire format, this module + * attempts to extract them from the text output. + * + * Common scenarios: + * - Ollama thinking-model bug: tool call JSON ends up inside unclosed `` tags + * - Model outputs raw JSON tool calls without the server parsing them + * - Model wraps tool calls in markdown code fences + * - Hermes-format `` tags + * + * This is a **safety net**, not the primary path. Native `tool_calls` from + * the server are always preferred. + */ + +import type { ToolUseBlock } from '../types.js' + +// --------------------------------------------------------------------------- +// ID generation +// --------------------------------------------------------------------------- + +let callCounter = 0 + +/** Generate a unique tool-call ID for extracted calls. */ +function generateToolCallId(): string { + return `extracted_call_${Date.now()}_${++callCounter}` +} + +// --------------------------------------------------------------------------- +// Internal parsers +// --------------------------------------------------------------------------- + +/** + * Try to parse a single JSON object as a tool call. + * + * Accepted shapes: + * ```json + * { "name": "bash", "arguments": { "command": "ls" } } + * { "name": "bash", "parameters": { "command": "ls" } } + * { "function": { "name": "bash", "arguments": { "command": "ls" } } } + * ``` + */ +function parseToolCallJSON( + json: unknown, + knownToolNames: ReadonlySet, +): ToolUseBlock | null { + if (json === null || typeof json !== 'object' || Array.isArray(json)) { + return null + } + + const obj = json as Record + + // Shape: { function: { name, arguments } } + if (typeof obj['function'] === 'object' && obj['function'] !== null) { + const fn = obj['function'] as Record + return parseFlat(fn, knownToolNames) + } + + // Shape: { name, arguments|parameters } + return parseFlat(obj, knownToolNames) +} + +function parseFlat( + obj: Record, + knownToolNames: ReadonlySet, +): ToolUseBlock | null { + const name = obj['name'] + if (typeof name !== 'string' || name.length === 0) return null + + // Whitelist check — don't treat arbitrary JSON as a tool call + if (knownToolNames.size > 0 && !knownToolNames.has(name)) return null + + let input: Record = {} + const args = obj['arguments'] ?? obj['parameters'] ?? obj['input'] + if (args !== null && args !== undefined) { + if (typeof args === 'string') { + try { + const parsed = JSON.parse(args) + if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) { + input = parsed as Record + } + } catch { + // Malformed — use empty input + } + } else if (typeof args === 'object' && !Array.isArray(args)) { + input = args as Record + } + } + + return { + type: 'tool_use', + id: generateToolCallId(), + name, + input, + } +} + +// --------------------------------------------------------------------------- +// JSON extraction from text +// --------------------------------------------------------------------------- + +/** + * Find all top-level JSON objects in a string by tracking brace depth. + * Returns the parsed objects (not sub-objects). + */ +function extractJSONObjects(text: string): unknown[] { + const results: unknown[] = [] + let depth = 0 + let start = -1 + let inString = false + let escape = false + + for (let i = 0; i < text.length; i++) { + const ch = text[i]! + + if (escape) { + escape = false + continue + } + + if (ch === '\\' && inString) { + escape = true + continue + } + + if (ch === '"') { + inString = !inString + continue + } + + if (inString) continue + + if (ch === '{') { + if (depth === 0) start = i + depth++ + } else if (ch === '}') { + depth-- + if (depth === 0 && start !== -1) { + const candidate = text.slice(start, i + 1) + try { + results.push(JSON.parse(candidate)) + } catch { + // Not valid JSON — skip + } + start = -1 + } + } + } + + return results +} + +// --------------------------------------------------------------------------- +// Hermes format: ... +// --------------------------------------------------------------------------- + +function extractHermesToolCalls( + text: string, + knownToolNames: ReadonlySet, +): ToolUseBlock[] { + const results: ToolUseBlock[] = [] + + for (const match of text.matchAll(/\s*([\s\S]*?)\s*<\/tool_call>/g)) { + const inner = match[1]!.trim() + try { + const parsed: unknown = JSON.parse(inner) + const block = parseToolCallJSON(parsed, knownToolNames) + if (block !== null) results.push(block) + } catch { + // Malformed hermes content — skip + } + } + + return results +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/** + * Attempt to extract tool calls from a model's text output. + * + * Tries multiple strategies in order: + * 1. Hermes `` tags + * 2. JSON objects in text (bare or inside code fences) + * + * @param text - The model's text output. + * @param knownToolNames - Whitelist of registered tool names. When non-empty, + * only JSON objects whose `name` matches a known tool + * are treated as tool calls. + * @returns Extracted {@link ToolUseBlock}s, or an empty array if none found. + */ +export function extractToolCallsFromText( + text: string, + knownToolNames: string[], +): ToolUseBlock[] { + if (text.length === 0) return [] + + const nameSet = new Set(knownToolNames) + + // Strategy 1: Hermes format + const hermesResults = extractHermesToolCalls(text, nameSet) + if (hermesResults.length > 0) return hermesResults + + // Strategy 2: Strip code fences, then extract JSON objects + const stripped = text.replace(/```(?:json)?\s*\n?([\s\S]*?)\n?\s*```/g, '$1') + const jsonObjects = extractJSONObjects(stripped) + + const results: ToolUseBlock[] = [] + for (const obj of jsonObjects) { + const block = parseToolCallJSON(obj, nameSet) + if (block !== null) results.push(block) + } + + return results +} diff --git a/src/types.ts b/src/types.ts index 2887b6c..e7e1b6b 100644 --- a/src/types.ts +++ b/src/types.ts @@ -209,6 +209,12 @@ export interface AgentConfig { readonly maxTurns?: number readonly maxTokens?: number readonly temperature?: number + /** + * Maximum wall-clock time (in milliseconds) for the entire agent run. + * When exceeded, the run is aborted via `AbortSignal.timeout()`. + * Useful for local models where inference can be unpredictably slow. + */ + readonly timeoutMs?: number /** * Optional Zod schema for structured output. When set, the agent's final * output is parsed as JSON and validated against this schema. A single diff --git a/tests/openai-fallback.test.ts b/tests/openai-fallback.test.ts new file mode 100644 index 0000000..6200146 --- /dev/null +++ b/tests/openai-fallback.test.ts @@ -0,0 +1,159 @@ +import { describe, it, expect } from 'vitest' +import { fromOpenAICompletion } from '../src/llm/openai-common.js' +import type { ChatCompletion } from 'openai/resources/chat/completions/index.js' + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeCompletion(overrides: { + content?: string | null + tool_calls?: ChatCompletion.Choice['message']['tool_calls'] + finish_reason?: string +}): ChatCompletion { + return { + id: 'chatcmpl-test', + object: 'chat.completion', + created: Date.now(), + model: 'test-model', + choices: [ + { + index: 0, + message: { + role: 'assistant', + content: overrides.content ?? null, + tool_calls: overrides.tool_calls, + refusal: null, + }, + finish_reason: (overrides.finish_reason ?? 'stop') as 'stop' | 'tool_calls', + logprobs: null, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 20, + total_tokens: 30, + }, + } +} + +const TOOL_NAMES = ['bash', 'file_read', 'file_write'] + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('fromOpenAICompletion fallback extraction', () => { + it('returns normal tool_calls when present (no fallback)', () => { + const completion = makeCompletion({ + content: 'Let me run a command.', + tool_calls: [ + { + id: 'call_123', + type: 'function', + function: { + name: 'bash', + arguments: '{"command": "ls"}', + }, + }, + ], + finish_reason: 'tool_calls', + }) + + const response = fromOpenAICompletion(completion, TOOL_NAMES) + const toolBlocks = response.content.filter(b => b.type === 'tool_use') + expect(toolBlocks).toHaveLength(1) + expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.name).toBe('bash') + expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.id).toBe('call_123') + expect(response.stop_reason).toBe('tool_use') + }) + + it('extracts tool calls from text when tool_calls is absent', () => { + const completion = makeCompletion({ + content: 'I will run this:\n{"name": "bash", "arguments": {"command": "pwd"}}', + finish_reason: 'stop', + }) + + const response = fromOpenAICompletion(completion, TOOL_NAMES) + const toolBlocks = response.content.filter(b => b.type === 'tool_use') + expect(toolBlocks).toHaveLength(1) + expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.name).toBe('bash') + expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.input).toEqual({ command: 'pwd' }) + // stop_reason should be corrected to tool_use + expect(response.stop_reason).toBe('tool_use') + }) + + it('does not fallback when knownToolNames is not provided', () => { + const completion = makeCompletion({ + content: '{"name": "bash", "arguments": {"command": "ls"}}', + finish_reason: 'stop', + }) + + const response = fromOpenAICompletion(completion) + const toolBlocks = response.content.filter(b => b.type === 'tool_use') + expect(toolBlocks).toHaveLength(0) + expect(response.stop_reason).toBe('end_turn') + }) + + it('does not fallback when knownToolNames is empty', () => { + const completion = makeCompletion({ + content: '{"name": "bash", "arguments": {"command": "ls"}}', + finish_reason: 'stop', + }) + + const response = fromOpenAICompletion(completion, []) + const toolBlocks = response.content.filter(b => b.type === 'tool_use') + expect(toolBlocks).toHaveLength(0) + expect(response.stop_reason).toBe('end_turn') + }) + + it('returns plain text when no tool calls found in text', () => { + const completion = makeCompletion({ + content: 'Hello! How can I help you today?', + finish_reason: 'stop', + }) + + const response = fromOpenAICompletion(completion, TOOL_NAMES) + const toolBlocks = response.content.filter(b => b.type === 'tool_use') + expect(toolBlocks).toHaveLength(0) + expect(response.stop_reason).toBe('end_turn') + }) + + it('preserves text block alongside extracted tool blocks', () => { + const completion = makeCompletion({ + content: 'Let me check:\n{"name": "file_read", "arguments": {"path": "/tmp/x"}}', + finish_reason: 'stop', + }) + + const response = fromOpenAICompletion(completion, TOOL_NAMES) + const textBlocks = response.content.filter(b => b.type === 'text') + const toolBlocks = response.content.filter(b => b.type === 'tool_use') + expect(textBlocks).toHaveLength(1) + expect(toolBlocks).toHaveLength(1) + }) + + it('does not double-extract when native tool_calls already present', () => { + // Text also contains a tool call JSON, but native tool_calls is populated. + // The fallback should NOT run. + const completion = makeCompletion({ + content: '{"name": "file_read", "arguments": {"path": "/tmp/y"}}', + tool_calls: [ + { + id: 'call_native', + type: 'function', + function: { + name: 'bash', + arguments: '{"command": "ls"}', + }, + }, + ], + finish_reason: 'tool_calls', + }) + + const response = fromOpenAICompletion(completion, TOOL_NAMES) + const toolBlocks = response.content.filter(b => b.type === 'tool_use') + // Should only have the native one, not the text-extracted one + expect(toolBlocks).toHaveLength(1) + expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.id).toBe('call_native') + }) +}) diff --git a/tests/text-tool-extractor.test.ts b/tests/text-tool-extractor.test.ts new file mode 100644 index 0000000..dba185e --- /dev/null +++ b/tests/text-tool-extractor.test.ts @@ -0,0 +1,170 @@ +import { describe, it, expect } from 'vitest' +import { extractToolCallsFromText } from '../src/tool/text-tool-extractor.js' + +const TOOLS = ['bash', 'file_read', 'file_write'] + +describe('extractToolCallsFromText', () => { + // ------------------------------------------------------------------------- + // No tool calls + // ------------------------------------------------------------------------- + + it('returns empty array for empty text', () => { + expect(extractToolCallsFromText('', TOOLS)).toEqual([]) + }) + + it('returns empty array for plain text with no JSON', () => { + expect(extractToolCallsFromText('Hello, I am a helpful assistant.', TOOLS)).toEqual([]) + }) + + it('returns empty array for JSON that does not match any known tool', () => { + const text = '{"name": "unknown_tool", "arguments": {"x": 1}}' + expect(extractToolCallsFromText(text, TOOLS)).toEqual([]) + }) + + // ------------------------------------------------------------------------- + // Bare JSON + // ------------------------------------------------------------------------- + + it('extracts a bare JSON tool call with "arguments"', () => { + const text = 'I will run this command:\n{"name": "bash", "arguments": {"command": "ls -la"}}' + const result = extractToolCallsFromText(text, TOOLS) + expect(result).toHaveLength(1) + expect(result[0]!.type).toBe('tool_use') + expect(result[0]!.name).toBe('bash') + expect(result[0]!.input).toEqual({ command: 'ls -la' }) + expect(result[0]!.id).toMatch(/^extracted_call_/) + }) + + it('extracts a bare JSON tool call with "parameters"', () => { + const text = '{"name": "file_read", "parameters": {"path": "/tmp/test.txt"}}' + const result = extractToolCallsFromText(text, TOOLS) + expect(result).toHaveLength(1) + expect(result[0]!.name).toBe('file_read') + expect(result[0]!.input).toEqual({ path: '/tmp/test.txt' }) + }) + + it('extracts a bare JSON tool call with "input"', () => { + const text = '{"name": "bash", "input": {"command": "pwd"}}' + const result = extractToolCallsFromText(text, TOOLS) + expect(result).toHaveLength(1) + expect(result[0]!.name).toBe('bash') + expect(result[0]!.input).toEqual({ command: 'pwd' }) + }) + + it('extracts { function: { name, arguments } } shape', () => { + const text = '{"function": {"name": "bash", "arguments": {"command": "echo hi"}}}' + const result = extractToolCallsFromText(text, TOOLS) + expect(result).toHaveLength(1) + expect(result[0]!.name).toBe('bash') + expect(result[0]!.input).toEqual({ command: 'echo hi' }) + }) + + it('handles string-encoded arguments', () => { + const text = '{"name": "bash", "arguments": "{\\"command\\": \\"ls\\"}"}' + const result = extractToolCallsFromText(text, TOOLS) + expect(result).toHaveLength(1) + expect(result[0]!.input).toEqual({ command: 'ls' }) + }) + + // ------------------------------------------------------------------------- + // Multiple tool calls + // ------------------------------------------------------------------------- + + it('extracts multiple tool calls from text', () => { + const text = `Let me do two things: +{"name": "bash", "arguments": {"command": "ls"}} +And then: +{"name": "file_read", "arguments": {"path": "/tmp/x"}}` + const result = extractToolCallsFromText(text, TOOLS) + expect(result).toHaveLength(2) + expect(result[0]!.name).toBe('bash') + expect(result[1]!.name).toBe('file_read') + }) + + // ------------------------------------------------------------------------- + // Code fence wrapped + // ------------------------------------------------------------------------- + + it('extracts tool call from markdown code fence', () => { + const text = 'Here is the tool call:\n```json\n{"name": "bash", "arguments": {"command": "whoami"}}\n```' + const result = extractToolCallsFromText(text, TOOLS) + expect(result).toHaveLength(1) + expect(result[0]!.name).toBe('bash') + expect(result[0]!.input).toEqual({ command: 'whoami' }) + }) + + it('extracts tool call from code fence without language tag', () => { + const text = '```\n{"name": "file_write", "arguments": {"path": "/tmp/a.txt", "content": "hi"}}\n```' + const result = extractToolCallsFromText(text, TOOLS) + expect(result).toHaveLength(1) + expect(result[0]!.name).toBe('file_write') + }) + + // ------------------------------------------------------------------------- + // Hermes format + // ------------------------------------------------------------------------- + + it('extracts tool call from tags', () => { + const text = '\n{"name": "bash", "arguments": {"command": "date"}}\n' + const result = extractToolCallsFromText(text, TOOLS) + expect(result).toHaveLength(1) + expect(result[0]!.name).toBe('bash') + expect(result[0]!.input).toEqual({ command: 'date' }) + }) + + it('extracts multiple hermes tool calls', () => { + const text = `{"name": "bash", "arguments": {"command": "ls"}} +Some text in between +{"name": "file_read", "arguments": {"path": "/tmp/x"}}` + const result = extractToolCallsFromText(text, TOOLS) + expect(result).toHaveLength(2) + expect(result[0]!.name).toBe('bash') + expect(result[1]!.name).toBe('file_read') + }) + + // ------------------------------------------------------------------------- + // Edge cases + // ------------------------------------------------------------------------- + + it('skips malformed JSON gracefully', () => { + const text = '{"name": "bash", "arguments": {invalid json}}' + const result = extractToolCallsFromText(text, TOOLS) + expect(result).toEqual([]) + }) + + it('skips JSON objects without a name field', () => { + const text = '{"command": "ls", "arguments": {"x": 1}}' + const result = extractToolCallsFromText(text, TOOLS) + expect(result).toEqual([]) + }) + + it('works with empty knownToolNames (no whitelist filtering)', () => { + const text = '{"name": "anything", "arguments": {"x": 1}}' + const result = extractToolCallsFromText(text, []) + expect(result).toHaveLength(1) + expect(result[0]!.name).toBe('anything') + }) + + it('generates unique IDs for each extracted call', () => { + const text = `{"name": "bash", "arguments": {"command": "a"}} +{"name": "bash", "arguments": {"command": "b"}}` + const result = extractToolCallsFromText(text, TOOLS) + expect(result).toHaveLength(2) + expect(result[0]!.id).not.toBe(result[1]!.id) + }) + + it('handles tool call with no arguments', () => { + const text = '{"name": "bash"}' + const result = extractToolCallsFromText(text, TOOLS) + expect(result).toHaveLength(1) + expect(result[0]!.input).toEqual({}) + }) + + it('handles text with nested JSON objects that are not tool calls', () => { + const text = `Here is some config: {"port": 3000, "host": "localhost"} +And a tool call: {"name": "bash", "arguments": {"command": "ls"}}` + const result = extractToolCallsFromText(text, TOOLS) + expect(result).toHaveLength(1) + expect(result[0]!.name).toBe('bash') + }) +})