feat(agent): add smart loop detection for stuck agents (#16)
Detect when agents repeat the same tool calls or text outputs in a sliding window. Three modes: warn (inject nudge, terminate on 2nd hit), terminate (immediate stop), or custom callback. Fully opt-in via `loopDetection` on AgentConfig — zero overhead when unconfigured.
This commit is contained in:
parent
9f5afb10f5
commit
cc957b3148
|
|
@ -136,6 +136,7 @@ export class Agent {
|
|||
allowedTools: this.config.tools,
|
||||
agentName: this.name,
|
||||
agentRole: this.config.systemPrompt?.slice(0, 50) ?? 'assistant',
|
||||
loopDetection: this.config.loopDetection,
|
||||
}
|
||||
|
||||
this.runner = new AgentRunner(
|
||||
|
|
@ -567,6 +568,7 @@ export class Agent {
|
|||
tokenUsage: result.tokenUsage,
|
||||
toolCalls: result.toolCalls,
|
||||
structured,
|
||||
...(result.loopDetected ? { loopDetected: true } : {}),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,137 @@
|
|||
/**
|
||||
* @fileoverview Sliding-window loop detector for the agent conversation loop.
|
||||
*
|
||||
* Tracks tool-call signatures and text outputs across turns to detect when an
|
||||
* agent is stuck repeating the same actions. Used by {@link AgentRunner} when
|
||||
* {@link LoopDetectionConfig} is provided.
|
||||
*/
|
||||
|
||||
import type { LoopDetectionConfig, LoopDetectionInfo } from '../types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Recursively sort object keys so that `{b:1, a:2}` and `{a:2, b:1}` produce
|
||||
* the same JSON string.
|
||||
*/
|
||||
function sortKeys(value: unknown): unknown {
|
||||
if (value === null || typeof value !== 'object') return value
|
||||
if (Array.isArray(value)) return value.map(sortKeys)
|
||||
const sorted: Record<string, unknown> = {}
|
||||
for (const key of Object.keys(value as Record<string, unknown>).sort()) {
|
||||
sorted[key] = sortKeys((value as Record<string, unknown>)[key])
|
||||
}
|
||||
return sorted
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// LoopDetector
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class LoopDetector {
|
||||
private readonly maxRepeats: number
|
||||
private readonly windowSize: number
|
||||
|
||||
private readonly toolSignatures: string[] = []
|
||||
private readonly textOutputs: string[] = []
|
||||
|
||||
constructor(config: LoopDetectionConfig = {}) {
|
||||
this.maxRepeats = config.maxRepeatedToolCalls ?? 3
|
||||
const requestedWindow = config.loopDetectionWindow ?? 4
|
||||
// Window must be >= threshold, otherwise detection can never trigger.
|
||||
this.windowSize = Math.max(requestedWindow, this.maxRepeats)
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a turn's tool calls. Returns detection info when a loop is found.
|
||||
*/
|
||||
recordToolCalls(
|
||||
blocks: ReadonlyArray<{ name: string; input: Record<string, unknown> }>,
|
||||
): LoopDetectionInfo | null {
|
||||
if (blocks.length === 0) return null
|
||||
|
||||
const signature = this.computeToolSignature(blocks)
|
||||
this.push(this.toolSignatures, signature)
|
||||
|
||||
const count = this.consecutiveRepeats(this.toolSignatures)
|
||||
if (count >= this.maxRepeats) {
|
||||
const names = blocks.map(b => b.name).join(', ')
|
||||
return {
|
||||
kind: 'tool_repetition',
|
||||
repetitions: count,
|
||||
detail:
|
||||
`Tool call "${names}" with identical arguments has repeated ` +
|
||||
`${count} times consecutively. The agent appears to be stuck in a loop.`,
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a turn's text output. Returns detection info when a loop is found.
|
||||
*/
|
||||
recordText(text: string): LoopDetectionInfo | null {
|
||||
const normalised = text.trim().replace(/\s+/g, ' ')
|
||||
if (normalised.length === 0) return null
|
||||
|
||||
this.push(this.textOutputs, normalised)
|
||||
|
||||
const count = this.consecutiveRepeats(this.textOutputs)
|
||||
if (count >= this.maxRepeats) {
|
||||
return {
|
||||
kind: 'text_repetition',
|
||||
repetitions: count,
|
||||
detail:
|
||||
`The agent has produced the same text response ${count} times ` +
|
||||
`consecutively. It appears to be stuck in a loop.`,
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Private
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Deterministic JSON signature for a set of tool calls.
|
||||
* Sorts calls by name (for multi-tool turns) and keys within each input.
|
||||
*/
|
||||
private computeToolSignature(
|
||||
blocks: ReadonlyArray<{ name: string; input: Record<string, unknown> }>,
|
||||
): string {
|
||||
const items = blocks
|
||||
.map(b => ({ name: b.name, input: sortKeys(b.input) }))
|
||||
.sort((a, b) => {
|
||||
const cmp = a.name.localeCompare(b.name)
|
||||
if (cmp !== 0) return cmp
|
||||
return JSON.stringify(a.input).localeCompare(JSON.stringify(b.input))
|
||||
})
|
||||
return JSON.stringify(items)
|
||||
}
|
||||
|
||||
/** Push an entry and trim the buffer to `windowSize`. */
|
||||
private push(buffer: string[], entry: string): void {
|
||||
buffer.push(entry)
|
||||
while (buffer.length > this.windowSize) {
|
||||
buffer.shift()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Count how many consecutive identical entries exist at the tail of `buffer`.
|
||||
* Returns 1 when the last entry is unique.
|
||||
*/
|
||||
private consecutiveRepeats(buffer: string[]): number {
|
||||
if (buffer.length === 0) return 0
|
||||
const last = buffer[buffer.length - 1]
|
||||
let count = 0
|
||||
for (let i = buffer.length - 1; i >= 0; i--) {
|
||||
if (buffer[i] === last) count++
|
||||
else break
|
||||
}
|
||||
return count
|
||||
}
|
||||
}
|
||||
|
|
@ -26,7 +26,10 @@ import type {
|
|||
LLMAdapter,
|
||||
LLMChatOptions,
|
||||
TraceEvent,
|
||||
LoopDetectionConfig,
|
||||
LoopDetectionInfo,
|
||||
} from '../types.js'
|
||||
import { LoopDetector } from './loop-detector.js'
|
||||
import { emitTrace } from '../utils/trace.js'
|
||||
import type { ToolRegistry } from '../tool/framework.js'
|
||||
import type { ToolExecutor } from '../tool/executor.js'
|
||||
|
|
@ -65,6 +68,8 @@ export interface RunnerOptions {
|
|||
readonly agentName?: string
|
||||
/** Short role description of the agent (used in tool context). */
|
||||
readonly agentRole?: string
|
||||
/** Loop detection configuration. When set, detects stuck agent loops. */
|
||||
readonly loopDetection?: LoopDetectionConfig
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -86,6 +91,11 @@ export interface RunOptions {
|
|||
readonly taskId?: string
|
||||
/** Agent name for trace correlation (overrides RunnerOptions.agentName). */
|
||||
readonly traceAgent?: string
|
||||
/**
|
||||
* Fired when the runner detects a potential issue (e.g. loop detection,
|
||||
* model ignoring tool definitions).
|
||||
*/
|
||||
readonly onWarning?: (message: string) => void
|
||||
}
|
||||
|
||||
/** The aggregated result returned when a full run completes. */
|
||||
|
|
@ -100,6 +110,8 @@ export interface RunResult {
|
|||
readonly tokenUsage: TokenUsage
|
||||
/** Total number of LLM turns (including tool-call follow-ups). */
|
||||
readonly turns: number
|
||||
/** True when the run was terminated or warned due to loop detection. */
|
||||
readonly loopDetected?: boolean
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -176,13 +188,7 @@ export class AgentRunner {
|
|||
options: RunOptions = {},
|
||||
): Promise<RunResult> {
|
||||
// Collect everything yielded by the internal streaming loop.
|
||||
const accumulated: {
|
||||
messages: LLMMessage[]
|
||||
output: string
|
||||
toolCalls: ToolCallRecord[]
|
||||
tokenUsage: TokenUsage
|
||||
turns: number
|
||||
} = {
|
||||
const accumulated: RunResult = {
|
||||
messages: [],
|
||||
output: '',
|
||||
toolCalls: [],
|
||||
|
|
@ -192,12 +198,7 @@ export class AgentRunner {
|
|||
|
||||
for await (const event of this.stream(messages, options)) {
|
||||
if (event.type === 'done') {
|
||||
const result = event.data as RunResult
|
||||
accumulated.messages = result.messages
|
||||
accumulated.output = result.output
|
||||
accumulated.toolCalls = result.toolCalls
|
||||
accumulated.tokenUsage = result.tokenUsage
|
||||
accumulated.turns = result.turns
|
||||
Object.assign(accumulated, event.data)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -244,6 +245,14 @@ export class AgentRunner {
|
|||
abortSignal: this.options.abortSignal,
|
||||
}
|
||||
|
||||
// Loop detection state — only allocated when configured.
|
||||
const detector = this.options.loopDetection
|
||||
? new LoopDetector(this.options.loopDetection)
|
||||
: null
|
||||
let loopDetected = false
|
||||
let loopWarned = false
|
||||
const loopAction = this.options.loopDetection?.onLoopDetected ?? 'warn'
|
||||
|
||||
try {
|
||||
// -----------------------------------------------------------------
|
||||
// Main agentic loop — `while (true)` until end_turn or maxTurns
|
||||
|
|
@ -307,6 +316,44 @@ export class AgentRunner {
|
|||
yield { type: 'tool_use', data: block } satisfies StreamEvent
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Step 2.5: Loop detection — check before executing tools.
|
||||
// ------------------------------------------------------------------
|
||||
let injectWarning = false
|
||||
let injectWarningKind: 'tool_repetition' | 'text_repetition' = 'tool_repetition'
|
||||
if (detector && toolUseBlocks.length > 0) {
|
||||
const toolInfo = detector.recordToolCalls(toolUseBlocks)
|
||||
const textInfo = turnText.length > 0 ? detector.recordText(turnText) : null
|
||||
const info = toolInfo ?? textInfo
|
||||
|
||||
if (info) {
|
||||
yield { type: 'loop_detected', data: info } satisfies StreamEvent
|
||||
options.onWarning?.(info.detail)
|
||||
|
||||
const action = typeof loopAction === 'function'
|
||||
? loopAction(info)
|
||||
: loopAction
|
||||
|
||||
if (action === 'terminate') {
|
||||
loopDetected = true
|
||||
finalOutput = turnText
|
||||
break
|
||||
} else if (action === 'warn' || action === 'inject') {
|
||||
if (loopWarned) {
|
||||
// Second detection after a warning — force terminate.
|
||||
loopDetected = true
|
||||
finalOutput = turnText
|
||||
break
|
||||
}
|
||||
loopWarned = true
|
||||
injectWarning = true
|
||||
injectWarningKind = info.kind
|
||||
// Fall through to execute tools, then inject warning.
|
||||
}
|
||||
// 'continue' — do nothing, let the loop proceed normally.
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Step 3: Decide whether to continue looping.
|
||||
// ------------------------------------------------------------------
|
||||
|
|
@ -395,6 +442,20 @@ export class AgentRunner {
|
|||
yield { type: 'tool_result', data: resultBlock } satisfies StreamEvent
|
||||
}
|
||||
|
||||
// Inject a loop-detection warning into the tool-result message so
|
||||
// the LLM sees it alongside the results (avoids two consecutive user
|
||||
// messages which violates the alternating-role constraint).
|
||||
if (injectWarning) {
|
||||
const warningText = injectWarningKind === 'text_repetition'
|
||||
? 'WARNING: You appear to be generating the same response repeatedly. ' +
|
||||
'This suggests you are stuck in a loop. Please try a different approach ' +
|
||||
'or provide new information.'
|
||||
: 'WARNING: You appear to be repeating the same tool calls with identical arguments. ' +
|
||||
'This suggests you are stuck in a loop. Please try a different approach, use different ' +
|
||||
'parameters, or explain what you are trying to accomplish.'
|
||||
toolResultBlocks.push({ type: 'text' as const, text: warningText })
|
||||
}
|
||||
|
||||
const toolResultMessage: LLMMessage = {
|
||||
role: 'user',
|
||||
content: toolResultBlocks,
|
||||
|
|
@ -428,6 +489,7 @@ export class AgentRunner {
|
|||
toolCalls: allToolCalls,
|
||||
tokenUsage: totalUsage,
|
||||
turns,
|
||||
...(loopDetected ? { loopDetected: true } : {}),
|
||||
}
|
||||
|
||||
yield { type: 'done', data: runResult } satisfies StreamEvent
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ export type { SchedulingStrategy } from './orchestrator/scheduler.js'
|
|||
// ---------------------------------------------------------------------------
|
||||
|
||||
export { Agent } from './agent/agent.js'
|
||||
export { LoopDetector } from './agent/loop-detector.js'
|
||||
export { buildStructuredOutputInstruction, extractJSON, validateOutput } from './agent/structured-output.js'
|
||||
export { AgentPool, Semaphore } from './agent/pool.js'
|
||||
export type { PoolStatus } from './agent/pool.js'
|
||||
|
|
@ -149,6 +150,8 @@ export type {
|
|||
AgentRunResult,
|
||||
BeforeRunHookContext,
|
||||
ToolCallRecord,
|
||||
LoopDetectionConfig,
|
||||
LoopDetectionInfo,
|
||||
|
||||
// Team
|
||||
TeamConfig,
|
||||
|
|
|
|||
44
src/types.ts
44
src/types.ts
|
|
@ -94,7 +94,7 @@ export interface LLMResponse {
|
|||
* - `error` — an unrecoverable error occurred; `data` is an `Error`
|
||||
*/
|
||||
export interface StreamEvent {
|
||||
readonly type: 'text' | 'tool_use' | 'tool_result' | 'done' | 'error'
|
||||
readonly type: 'text' | 'tool_use' | 'tool_result' | 'loop_detected' | 'done' | 'error'
|
||||
readonly data: unknown
|
||||
}
|
||||
|
||||
|
|
@ -209,6 +209,11 @@ export interface AgentConfig {
|
|||
readonly maxTurns?: number
|
||||
readonly maxTokens?: number
|
||||
readonly temperature?: number
|
||||
/**
|
||||
* Loop detection configuration. When set, the agent tracks repeated tool
|
||||
* calls and text outputs to detect stuck loops before `maxTurns` is reached.
|
||||
*/
|
||||
readonly loopDetection?: LoopDetectionConfig
|
||||
/**
|
||||
* Optional Zod schema for structured output. When set, the agent's final
|
||||
* output is parsed as JSON and validated against this schema. A single
|
||||
|
|
@ -229,6 +234,41 @@ export interface AgentConfig {
|
|||
readonly afterRun?: (result: AgentRunResult) => Promise<AgentRunResult> | AgentRunResult
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Loop detection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Configuration for agent loop detection. */
|
||||
export interface LoopDetectionConfig {
|
||||
/**
|
||||
* Maximum consecutive times the same tool call (name + args) can repeat
|
||||
* before detection triggers. Default: `3`.
|
||||
*/
|
||||
readonly maxRepeatedToolCalls?: number
|
||||
/**
|
||||
* Number of recent turns to track for repetition analysis. Default: `4`.
|
||||
*/
|
||||
readonly loopDetectionWindow?: number
|
||||
/**
|
||||
* Action to take when a loop is detected.
|
||||
* - `'warn'` — inject a "you appear stuck" message, give the LLM one
|
||||
* more chance; terminate if the loop persists (default)
|
||||
* - `'terminate'` — stop the run immediately
|
||||
* - `function` — custom callback; return `'continue'`, `'inject'`, or
|
||||
* `'terminate'` to control the outcome
|
||||
*/
|
||||
readonly onLoopDetected?: 'warn' | 'terminate' | ((info: LoopDetectionInfo) => 'continue' | 'inject' | 'terminate')
|
||||
}
|
||||
|
||||
/** Diagnostic payload emitted when a loop is detected. */
|
||||
export interface LoopDetectionInfo {
|
||||
readonly kind: 'tool_repetition' | 'text_repetition'
|
||||
/** Number of consecutive identical occurrences observed. */
|
||||
readonly repetitions: number
|
||||
/** Human-readable description of the detected loop. */
|
||||
readonly detail: string
|
||||
}
|
||||
|
||||
/** Lifecycle state tracked during an agent run. */
|
||||
export interface AgentState {
|
||||
status: 'idle' | 'running' | 'completed' | 'error'
|
||||
|
|
@ -259,6 +299,8 @@ export interface AgentRunResult {
|
|||
* failed after retry.
|
||||
*/
|
||||
readonly structured?: unknown
|
||||
/** True when the run was terminated or warned due to loop detection. */
|
||||
readonly loopDetected?: boolean
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -0,0 +1,375 @@
|
|||
import { describe, it, expect, vi } from 'vitest'
|
||||
import { z } from 'zod'
|
||||
import { LoopDetector } from '../src/agent/loop-detector.js'
|
||||
import { AgentRunner } from '../src/agent/runner.js'
|
||||
import { ToolRegistry, defineTool } from '../src/tool/framework.js'
|
||||
import { ToolExecutor } from '../src/tool/executor.js'
|
||||
import type { LLMAdapter, LLMResponse, StreamEvent } from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function mockAdapter(responses: LLMResponse[]): LLMAdapter {
|
||||
let callIndex = 0
|
||||
return {
|
||||
name: 'mock',
|
||||
async chat() {
|
||||
return responses[callIndex++]!
|
||||
},
|
||||
async *stream() {
|
||||
/* unused */
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
function textResponse(text: string): LLMResponse {
|
||||
return {
|
||||
id: `resp-${Math.random().toString(36).slice(2)}`,
|
||||
content: [{ type: 'text' as const, text }],
|
||||
model: 'mock-model',
|
||||
stop_reason: 'end_turn',
|
||||
usage: { input_tokens: 10, output_tokens: 20 },
|
||||
}
|
||||
}
|
||||
|
||||
function toolUseResponse(toolName: string, input: Record<string, unknown>): LLMResponse {
|
||||
return {
|
||||
id: `resp-${Math.random().toString(36).slice(2)}`,
|
||||
content: [
|
||||
{
|
||||
type: 'tool_use' as const,
|
||||
id: `tu-${Math.random().toString(36).slice(2)}`,
|
||||
name: toolName,
|
||||
input,
|
||||
},
|
||||
],
|
||||
model: 'mock-model',
|
||||
stop_reason: 'tool_use',
|
||||
usage: { input_tokens: 15, output_tokens: 25 },
|
||||
}
|
||||
}
|
||||
|
||||
const echoTool = defineTool({
|
||||
name: 'echo',
|
||||
description: 'Echoes input',
|
||||
inputSchema: z.object({ message: z.string() }),
|
||||
async execute({ message }) {
|
||||
return { data: message }
|
||||
},
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Unit tests — LoopDetector class
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('LoopDetector', () => {
|
||||
describe('tool call repetition', () => {
|
||||
it('returns null for non-repeating tool calls', () => {
|
||||
const detector = new LoopDetector()
|
||||
expect(detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])).toBeNull()
|
||||
expect(detector.recordToolCalls([{ name: 'b', input: { x: 2 } }])).toBeNull()
|
||||
expect(detector.recordToolCalls([{ name: 'c', input: { x: 3 } }])).toBeNull()
|
||||
})
|
||||
|
||||
it('detects 3 identical tool calls (default threshold)', () => {
|
||||
const detector = new LoopDetector()
|
||||
expect(detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])).toBeNull()
|
||||
expect(detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])).toBeNull()
|
||||
const info = detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])
|
||||
expect(info).not.toBeNull()
|
||||
expect(info!.kind).toBe('tool_repetition')
|
||||
expect(info!.repetitions).toBe(3)
|
||||
})
|
||||
|
||||
it('does not trigger when args differ', () => {
|
||||
const detector = new LoopDetector()
|
||||
expect(detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])).toBeNull()
|
||||
expect(detector.recordToolCalls([{ name: 'a', input: { x: 2 } }])).toBeNull()
|
||||
expect(detector.recordToolCalls([{ name: 'a', input: { x: 3 } }])).toBeNull()
|
||||
})
|
||||
|
||||
it('resets count when a different call intervenes', () => {
|
||||
const detector = new LoopDetector()
|
||||
detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])
|
||||
detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])
|
||||
// Different call breaks the streak
|
||||
detector.recordToolCalls([{ name: 'b', input: { x: 1 } }])
|
||||
expect(detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])).toBeNull()
|
||||
})
|
||||
|
||||
it('handles multi-tool turns with order-independent signatures', () => {
|
||||
const detector = new LoopDetector()
|
||||
const toolsA = [
|
||||
{ name: 'read', input: { file: 'a.ts' } },
|
||||
{ name: 'read', input: { file: 'b.ts' } },
|
||||
]
|
||||
// Same tools in different order
|
||||
const toolsB = [
|
||||
{ name: 'read', input: { file: 'b.ts' } },
|
||||
{ name: 'read', input: { file: 'a.ts' } },
|
||||
]
|
||||
expect(detector.recordToolCalls(toolsA)).toBeNull()
|
||||
expect(detector.recordToolCalls(toolsB)).toBeNull()
|
||||
const info = detector.recordToolCalls(toolsA)
|
||||
expect(info).not.toBeNull()
|
||||
expect(info!.kind).toBe('tool_repetition')
|
||||
})
|
||||
|
||||
it('respects custom threshold', () => {
|
||||
const detector = new LoopDetector({ maxRepeatedToolCalls: 2 })
|
||||
expect(detector.recordToolCalls([{ name: 'a', input: {} }])).toBeNull()
|
||||
const info = detector.recordToolCalls([{ name: 'a', input: {} }])
|
||||
expect(info).not.toBeNull()
|
||||
expect(info!.repetitions).toBe(2)
|
||||
})
|
||||
|
||||
it('returns null for empty blocks', () => {
|
||||
const detector = new LoopDetector()
|
||||
expect(detector.recordToolCalls([])).toBeNull()
|
||||
})
|
||||
|
||||
it('produces deterministic signatures regardless of key order', () => {
|
||||
const detector = new LoopDetector()
|
||||
detector.recordToolCalls([{ name: 'a', input: { b: 2, a: 1 } }])
|
||||
detector.recordToolCalls([{ name: 'a', input: { a: 1, b: 2 } }])
|
||||
const info = detector.recordToolCalls([{ name: 'a', input: { b: 2, a: 1 } }])
|
||||
expect(info).not.toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
describe('text repetition', () => {
|
||||
it('returns null for non-repeating text', () => {
|
||||
const detector = new LoopDetector()
|
||||
expect(detector.recordText('hello')).toBeNull()
|
||||
expect(detector.recordText('world')).toBeNull()
|
||||
expect(detector.recordText('foo')).toBeNull()
|
||||
})
|
||||
|
||||
it('detects 3 identical texts (default threshold)', () => {
|
||||
const detector = new LoopDetector()
|
||||
expect(detector.recordText('stuck')).toBeNull()
|
||||
expect(detector.recordText('stuck')).toBeNull()
|
||||
const info = detector.recordText('stuck')
|
||||
expect(info).not.toBeNull()
|
||||
expect(info!.kind).toBe('text_repetition')
|
||||
expect(info!.repetitions).toBe(3)
|
||||
})
|
||||
|
||||
it('ignores empty or whitespace-only text', () => {
|
||||
const detector = new LoopDetector()
|
||||
expect(detector.recordText('')).toBeNull()
|
||||
expect(detector.recordText(' ')).toBeNull()
|
||||
expect(detector.recordText('\n\t')).toBeNull()
|
||||
})
|
||||
|
||||
it('normalises whitespace before comparison', () => {
|
||||
const detector = new LoopDetector()
|
||||
detector.recordText('hello world')
|
||||
detector.recordText('hello world')
|
||||
const info = detector.recordText('hello world')
|
||||
expect(info).not.toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
describe('window size', () => {
|
||||
it('clamps windowSize to at least maxRepeats', () => {
|
||||
// Window of 2 with threshold 3 is auto-clamped to 3.
|
||||
const detector = new LoopDetector({ loopDetectionWindow: 2, maxRepeatedToolCalls: 3 })
|
||||
detector.recordToolCalls([{ name: 'a', input: {} }])
|
||||
detector.recordToolCalls([{ name: 'a', input: {} }])
|
||||
// Third call triggers because window was clamped to 3
|
||||
const info = detector.recordToolCalls([{ name: 'a', input: {} }])
|
||||
expect(info).not.toBeNull()
|
||||
expect(info!.repetitions).toBe(3)
|
||||
})
|
||||
|
||||
it('works correctly when window >= threshold', () => {
|
||||
const detector = new LoopDetector({ loopDetectionWindow: 4, maxRepeatedToolCalls: 3 })
|
||||
detector.recordToolCalls([{ name: 'a', input: {} }])
|
||||
detector.recordToolCalls([{ name: 'a', input: {} }])
|
||||
const info = detector.recordToolCalls([{ name: 'a', input: {} }])
|
||||
expect(info).not.toBeNull()
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Integration tests — AgentRunner with loop detection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('AgentRunner loop detection', () => {
|
||||
function buildRunner(
|
||||
responses: LLMResponse[],
|
||||
loopDetection: import('../src/types.js').LoopDetectionConfig,
|
||||
) {
|
||||
const adapter = mockAdapter(responses)
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(echoTool)
|
||||
const executor = new ToolExecutor(registry)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
maxTurns: 10,
|
||||
allowedTools: ['echo'],
|
||||
agentName: 'test-agent',
|
||||
loopDetection,
|
||||
})
|
||||
return runner
|
||||
}
|
||||
|
||||
it('terminates early in terminate mode', async () => {
|
||||
// 5 identical tool calls, then a text response (should never reach it)
|
||||
const responses = [
|
||||
...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
|
||||
textResponse('done'),
|
||||
]
|
||||
const runner = buildRunner(responses, {
|
||||
maxRepeatedToolCalls: 3,
|
||||
onLoopDetected: 'terminate',
|
||||
})
|
||||
|
||||
const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
|
||||
|
||||
expect(result.loopDetected).toBe(true)
|
||||
expect(result.turns).toBe(3)
|
||||
})
|
||||
|
||||
it('emits loop_detected stream event in terminate mode', async () => {
|
||||
const responses = [
|
||||
...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
|
||||
textResponse('done'),
|
||||
]
|
||||
const runner = buildRunner(responses, {
|
||||
maxRepeatedToolCalls: 3,
|
||||
onLoopDetected: 'terminate',
|
||||
})
|
||||
|
||||
const events: StreamEvent[] = []
|
||||
for await (const event of runner.stream([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])) {
|
||||
events.push(event)
|
||||
}
|
||||
|
||||
const loopEvents = events.filter(e => e.type === 'loop_detected')
|
||||
expect(loopEvents).toHaveLength(1)
|
||||
const info = loopEvents[0]!.data as import('../src/types.js').LoopDetectionInfo
|
||||
expect(info.kind).toBe('tool_repetition')
|
||||
expect(info.repetitions).toBe(3)
|
||||
})
|
||||
|
||||
it('calls onWarning in terminate mode', async () => {
|
||||
const responses = [
|
||||
...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
|
||||
textResponse('done'),
|
||||
]
|
||||
const runner = buildRunner(responses, {
|
||||
maxRepeatedToolCalls: 3,
|
||||
onLoopDetected: 'terminate',
|
||||
})
|
||||
|
||||
const warnings: string[] = []
|
||||
await runner.run(
|
||||
[{ role: 'user', content: [{ type: 'text', text: 'go' }] }],
|
||||
{ onWarning: (msg) => warnings.push(msg) },
|
||||
)
|
||||
|
||||
expect(warnings).toHaveLength(1)
|
||||
expect(warnings[0]).toContain('loop')
|
||||
})
|
||||
|
||||
it('injects warning message in warn mode and terminates on second detection', async () => {
|
||||
// 6 identical tool calls — warn fires at turn 3, then terminate at turn 4+
|
||||
const responses = [
|
||||
...Array.from({ length: 6 }, () => toolUseResponse('echo', { message: 'hi' })),
|
||||
textResponse('done'),
|
||||
]
|
||||
const runner = buildRunner(responses, {
|
||||
maxRepeatedToolCalls: 3,
|
||||
onLoopDetected: 'warn',
|
||||
})
|
||||
|
||||
const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
|
||||
|
||||
// Should have terminated after the second detection (turn 4), not run all 6
|
||||
expect(result.loopDetected).toBe(true)
|
||||
expect(result.turns).toBeLessThanOrEqual(5)
|
||||
})
|
||||
|
||||
it('supports custom callback returning terminate', async () => {
|
||||
const responses = [
|
||||
...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
|
||||
textResponse('done'),
|
||||
]
|
||||
const callback = vi.fn().mockReturnValue('terminate')
|
||||
const runner = buildRunner(responses, {
|
||||
maxRepeatedToolCalls: 3,
|
||||
onLoopDetected: callback,
|
||||
})
|
||||
|
||||
const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
|
||||
|
||||
expect(callback).toHaveBeenCalledOnce()
|
||||
expect(result.loopDetected).toBe(true)
|
||||
expect(result.turns).toBe(3)
|
||||
})
|
||||
|
||||
it('supports custom callback returning inject', async () => {
|
||||
// 'inject' behaves like 'warn': injects warning, terminates on second detection
|
||||
const responses = [
|
||||
...Array.from({ length: 6 }, () => toolUseResponse('echo', { message: 'hi' })),
|
||||
textResponse('done'),
|
||||
]
|
||||
const callback = vi.fn().mockReturnValue('inject')
|
||||
const runner = buildRunner(responses, {
|
||||
maxRepeatedToolCalls: 3,
|
||||
onLoopDetected: callback,
|
||||
})
|
||||
|
||||
const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
|
||||
|
||||
expect(callback).toHaveBeenCalledTimes(2) // first triggers inject, second forces terminate
|
||||
expect(result.loopDetected).toBe(true)
|
||||
expect(result.turns).toBeLessThanOrEqual(5)
|
||||
})
|
||||
|
||||
it('supports custom callback returning continue', async () => {
|
||||
const responses = [
|
||||
...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
|
||||
textResponse('done'),
|
||||
]
|
||||
const callback = vi.fn().mockReturnValue('continue')
|
||||
const runner = buildRunner(responses, {
|
||||
maxRepeatedToolCalls: 3,
|
||||
onLoopDetected: callback,
|
||||
})
|
||||
|
||||
const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
|
||||
|
||||
// continue means no termination — runs until maxTurns or text response
|
||||
// callback fires at turn 3, 4, 5 (all repeating)
|
||||
expect(callback).toHaveBeenCalledTimes(3)
|
||||
expect(result.loopDetected).toBeUndefined()
|
||||
})
|
||||
|
||||
it('does not interfere when loopDetection is not configured', async () => {
|
||||
const adapter = mockAdapter([
|
||||
...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
|
||||
textResponse('done'),
|
||||
])
|
||||
const registry = new ToolRegistry()
|
||||
registry.register(echoTool)
|
||||
const executor = new ToolExecutor(registry)
|
||||
const runner = new AgentRunner(adapter, registry, executor, {
|
||||
model: 'mock-model',
|
||||
maxTurns: 10,
|
||||
allowedTools: ['echo'],
|
||||
agentName: 'test-agent',
|
||||
// no loopDetection
|
||||
})
|
||||
|
||||
const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
|
||||
|
||||
// All 5 tool turns + 1 text turn = 6
|
||||
expect(result.turns).toBe(6)
|
||||
expect(result.loopDetected).toBeUndefined()
|
||||
})
|
||||
})
|
||||
Loading…
Reference in New Issue