feat: add glob tool

This commit is contained in:
Ibrahim Kazimov 2026-04-12 16:59:20 +03:00
parent ced1d90a93
commit dc88232885
7 changed files with 349 additions and 97 deletions

View File

@ -98,6 +98,7 @@ export {
fileReadTool, fileReadTool,
fileWriteTool, fileWriteTool,
fileEditTool, fileEditTool,
globTool,
grepTool, grepTool,
} from './tool/built-in/index.js' } from './tool/built-in/index.js'

View File

@ -0,0 +1,97 @@
/**
* Shared recursive directory walk for built-in file tools.
*
* Used by {@link grepTool} and {@link globTool} so glob filtering and skip
* rules stay consistent.
*/
import { readdir, stat } from 'fs/promises'
import { join } from 'path'
/** Directories that are almost never useful to traverse for code search. */
export const SKIP_DIRS = new Set([
'.git',
'.svn',
'.hg',
'node_modules',
'.next',
'dist',
'build',
])
export interface CollectFilesOptions {
/** When set, stop collecting once this many paths are gathered. */
readonly maxFiles?: number
}
/**
* Recursively walk `dir` and return file paths, honouring {@link SKIP_DIRS}
* and an optional filename glob pattern.
*/
export async function collectFiles(
dir: string,
glob: string | undefined,
signal: AbortSignal | undefined,
options?: CollectFilesOptions,
): Promise<string[]> {
const results: string[] = []
await walk(dir, glob, results, signal, options?.maxFiles)
return results
}
async function walk(
dir: string,
glob: string | undefined,
results: string[],
signal: AbortSignal | undefined,
maxFiles: number | undefined,
): Promise<void> {
if (signal?.aborted === true) return
if (maxFiles !== undefined && results.length >= maxFiles) return
let entryNames: string[]
try {
entryNames = await readdir(dir, { encoding: 'utf8' })
} catch {
return
}
for (const entryName of entryNames) {
if (signal !== undefined && signal.aborted) return
if (maxFiles !== undefined && results.length >= maxFiles) return
const fullPath = join(dir, entryName)
let entryInfo: Awaited<ReturnType<typeof stat>>
try {
entryInfo = await stat(fullPath)
} catch {
continue
}
if (entryInfo.isDirectory()) {
if (!SKIP_DIRS.has(entryName)) {
await walk(fullPath, glob, results, signal, maxFiles)
}
} else if (entryInfo.isFile()) {
if (glob === undefined || matchesGlob(entryName, glob)) {
results.push(fullPath)
}
}
}
}
/**
* Minimal glob match supporting `*.ext` and `**<pattern>` forms.
*
*/
export function matchesGlob(filename: string, glob: string): boolean {
const pattern = glob.startsWith('**/') ? glob.slice(3) : glob
const regexSource = pattern
.replace(/[.+^${}()|[\]\\]/g, '\\$&')
.replace(/\*/g, '.*')
.replace(/\?/g, '.')
const re = new RegExp(`^${regexSource}$`, 'i')
return re.test(filename)
}

99
src/tool/built-in/glob.ts Normal file
View File

@ -0,0 +1,99 @@
/**
* Built-in glob tool.
*
* Lists file paths under a directory matching an optional filename glob.
* Does not read file contents use {@link grepTool} to search inside files.
*/
import { stat } from 'fs/promises'
import { basename, relative } from 'path'
import { z } from 'zod'
import type { ToolResult } from '../../types.js'
import { collectFiles, matchesGlob } from './fs-walk.js'
import { defineTool } from '../framework.js'
const DEFAULT_MAX_FILES = 500
export const globTool = defineTool({
name: 'glob',
description:
'List file paths under a directory that match an optional filename glob. ' +
'Does not read file contents — use `grep` to search inside files. ' +
'Skips common bulky directories (node_modules, .git, dist, etc.). ' +
'Paths in the result are relative to the process working directory. ' +
'Results are capped by `maxFiles`.',
inputSchema: z.object({
path: z
.string()
.optional()
.describe(
'Directory to list files under. Defaults to the current working directory.',
),
pattern: z
.string()
.optional()
.describe(
'Filename glob (e.g. "*.ts", "**/*.json"). When omitted, every file ' +
'under the directory is listed (subject to maxFiles and skipped dirs).',
),
maxFiles: z
.number()
.int()
.positive()
.optional()
.describe(
`Maximum number of file paths to return. Defaults to ${DEFAULT_MAX_FILES}.`,
),
}),
execute: async (input, context): Promise<ToolResult> => {
const root = input.path ?? process.cwd()
const maxFiles = input.maxFiles ?? DEFAULT_MAX_FILES
const signal = context.abortSignal
let linesOut: string[]
let truncated = false
try {
const info = await stat(root)
if (info.isFile()) {
const name = basename(root)
if (
input.pattern !== undefined &&
!matchesGlob(name, input.pattern)
) {
return { data: 'No files matched.', isError: false }
}
linesOut = [relative(process.cwd(), root) || root]
} else {
const collected = await collectFiles(root, input.pattern, signal, {
maxFiles: maxFiles + 1,
})
truncated = collected.length > maxFiles
const capped = collected.slice(0, maxFiles)
linesOut = capped.map((f) => relative(process.cwd(), f) || f)
}
} catch (err) {
const message = err instanceof Error ? err.message : 'Unknown error'
return {
data: `Cannot access path "${root}": ${message}`,
isError: true,
}
}
if (linesOut.length === 0) {
return { data: 'No files matched.', isError: false }
}
const sorted = [...linesOut].sort((a, b) => a.localeCompare(b))
const truncationNote = truncated
? `\n\n(listing capped at ${maxFiles} paths; raise maxFiles for more)`
: ''
return {
data: sorted.join('\n') + truncationNote,
isError: false,
}
},
})

View File

@ -8,28 +8,18 @@
*/ */
import { spawn } from 'child_process' import { spawn } from 'child_process'
import { readdir, readFile, stat } from 'fs/promises' import { readFile, stat } from 'fs/promises'
// Note: readdir is used with { encoding: 'utf8' } to return string[] directly. import { relative } from 'path'
import { join, relative } from 'path'
import { z } from 'zod' import { z } from 'zod'
import type { ToolResult } from '../../types.js' import type { ToolResult } from '../../types.js'
import { defineTool } from '../framework.js' import { defineTool } from '../framework.js'
import { collectFiles } from './fs-walk.js'
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Constants // Constants
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
const DEFAULT_MAX_RESULTS = 100 const DEFAULT_MAX_RESULTS = 100
// Directories that are almost never useful to search inside
const SKIP_DIRS = new Set([
'.git',
'.svn',
'.hg',
'node_modules',
'.next',
'dist',
'build',
])
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Tool definition // Tool definition
@ -42,6 +32,7 @@ export const grepTool = defineTool({
'Returns matching lines with their file paths and 1-based line numbers. ' + 'Returns matching lines with their file paths and 1-based line numbers. ' +
'Use the `glob` parameter to restrict the search to specific file types ' + 'Use the `glob` parameter to restrict the search to specific file types ' +
'(e.g. "*.ts"). ' + '(e.g. "*.ts"). ' +
'To list matching file paths without reading contents, use the `glob` tool. ' +
'Results are capped by `maxResults` to keep the response manageable.', 'Results are capped by `maxResults` to keep the response manageable.',
inputSchema: z.object({ inputSchema: z.object({
@ -270,79 +261,6 @@ async function runNodeSearch(
} }
} }
// ---------------------------------------------------------------------------
// File collection with glob filtering
// ---------------------------------------------------------------------------
/**
* Recursively walk `dir` and return file paths, honouring `SKIP_DIRS` and an
* optional glob pattern.
*/
async function collectFiles(
dir: string,
glob: string | undefined,
signal: AbortSignal | undefined,
): Promise<string[]> {
const results: string[] = []
await walk(dir, glob, results, signal)
return results
}
async function walk(
dir: string,
glob: string | undefined,
results: string[],
signal: AbortSignal | undefined,
): Promise<void> {
if (signal?.aborted === true) return
let entryNames: string[]
try {
// Read as plain strings so we don't have to deal with Buffer Dirent variants.
entryNames = await readdir(dir, { encoding: 'utf8' })
} catch {
return
}
for (const entryName of entryNames) {
if (signal !== undefined && signal.aborted) return
const fullPath = join(dir, entryName)
let entryInfo: Awaited<ReturnType<typeof stat>>
try {
entryInfo = await stat(fullPath)
} catch {
continue
}
if (entryInfo.isDirectory()) {
if (!SKIP_DIRS.has(entryName)) {
await walk(fullPath, glob, results, signal)
}
} else if (entryInfo.isFile()) {
if (glob === undefined || matchesGlob(entryName, glob)) {
results.push(fullPath)
}
}
}
}
/**
* Minimal glob match supporting `*.ext` and `**\/<pattern>` forms.
*/
function matchesGlob(filename: string, glob: string): boolean {
// Strip leading **/ prefix — we already recurse into all directories
const pattern = glob.startsWith('**/') ? glob.slice(3) : glob
// Convert shell glob characters to regex equivalents
const regexSource = pattern
.replace(/[.+^${}()|[\]\\]/g, '\\$&') // escape special regex chars first
.replace(/\*/g, '.*') // * -> .*
.replace(/\?/g, '.') // ? -> .
const re = new RegExp(`^${regexSource}$`, 'i')
return re.test(filename)
}
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// ripgrep availability check (cached per process) // ripgrep availability check (cached per process)
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------

View File

@ -11,9 +11,10 @@ import { bashTool } from './bash.js'
import { fileEditTool } from './file-edit.js' import { fileEditTool } from './file-edit.js'
import { fileReadTool } from './file-read.js' import { fileReadTool } from './file-read.js'
import { fileWriteTool } from './file-write.js' import { fileWriteTool } from './file-write.js'
import { globTool } from './glob.js'
import { grepTool } from './grep.js' import { grepTool } from './grep.js'
export { bashTool, fileEditTool, fileReadTool, fileWriteTool, grepTool } export { bashTool, fileEditTool, fileReadTool, fileWriteTool, globTool, grepTool }
/** /**
* The ordered list of all built-in tools. Import this when you need to * The ordered list of all built-in tools. Import this when you need to
@ -29,6 +30,7 @@ export const BUILT_IN_TOOLS: ToolDefinition<any>[] = [
fileWriteTool, fileWriteTool,
fileEditTool, fileEditTool,
grepTool, grepTool,
globTool,
] ]
/** /**

View File

@ -6,6 +6,7 @@ import { fileReadTool } from '../src/tool/built-in/file-read.js'
import { fileWriteTool } from '../src/tool/built-in/file-write.js' import { fileWriteTool } from '../src/tool/built-in/file-write.js'
import { fileEditTool } from '../src/tool/built-in/file-edit.js' import { fileEditTool } from '../src/tool/built-in/file-edit.js'
import { bashTool } from '../src/tool/built-in/bash.js' import { bashTool } from '../src/tool/built-in/bash.js'
import { globTool } from '../src/tool/built-in/glob.js'
import { grepTool } from '../src/tool/built-in/grep.js' import { grepTool } from '../src/tool/built-in/grep.js'
import { registerBuiltInTools, BUILT_IN_TOOLS } from '../src/tool/built-in/index.js' import { registerBuiltInTools, BUILT_IN_TOOLS } from '../src/tool/built-in/index.js'
import { ToolRegistry } from '../src/tool/framework.js' import { ToolRegistry } from '../src/tool/framework.js'
@ -34,7 +35,7 @@ afterEach(async () => {
// =========================================================================== // ===========================================================================
describe('registerBuiltInTools', () => { describe('registerBuiltInTools', () => {
it('registers all 5 built-in tools', () => { it('registers all 6 built-in tools', () => {
const registry = new ToolRegistry() const registry = new ToolRegistry()
registerBuiltInTools(registry) registerBuiltInTools(registry)
@ -43,10 +44,11 @@ describe('registerBuiltInTools', () => {
expect(registry.get('file_write')).toBeDefined() expect(registry.get('file_write')).toBeDefined()
expect(registry.get('file_edit')).toBeDefined() expect(registry.get('file_edit')).toBeDefined()
expect(registry.get('grep')).toBeDefined() expect(registry.get('grep')).toBeDefined()
expect(registry.get('glob')).toBeDefined()
}) })
it('BUILT_IN_TOOLS has correct length', () => { it('BUILT_IN_TOOLS has correct length', () => {
expect(BUILT_IN_TOOLS).toHaveLength(5) expect(BUILT_IN_TOOLS).toHaveLength(6)
}) })
}) })
@ -305,6 +307,102 @@ describe('bash', () => {
}) })
}) })
// ===========================================================================
// glob
// ===========================================================================
describe('glob', () => {
it('lists files matching a pattern without reading contents', async () => {
await writeFile(join(tmpDir, 'a.ts'), 'SECRET_CONTENT_SHOULD_NOT_APPEAR')
await writeFile(join(tmpDir, 'b.md'), 'also secret')
const result = await globTool.execute(
{ path: tmpDir, pattern: '*.ts' },
defaultContext,
)
expect(result.isError).toBe(false)
expect(result.data).toContain('.ts')
expect(result.data).not.toContain('SECRET')
expect(result.data).not.toContain('b.md')
})
it('lists all files when pattern is omitted', async () => {
await writeFile(join(tmpDir, 'x.txt'), 'x')
await writeFile(join(tmpDir, 'y.txt'), 'y')
const result = await globTool.execute({ path: tmpDir }, defaultContext)
expect(result.isError).toBe(false)
expect(result.data).toContain('x.txt')
expect(result.data).toContain('y.txt')
})
it('lists a single file when path is a file', async () => {
const filePath = join(tmpDir, 'only.ts')
await writeFile(filePath, 'body')
const result = await globTool.execute({ path: filePath }, defaultContext)
expect(result.isError).toBe(false)
expect(result.data).toContain('only.ts')
})
it('returns no match when single file does not match pattern', async () => {
const filePath = join(tmpDir, 'readme.md')
await writeFile(filePath, '# doc')
const result = await globTool.execute(
{ path: filePath, pattern: '*.ts' },
defaultContext,
)
expect(result.isError).toBe(false)
expect(result.data).toContain('No files matched')
})
it('recurses into subdirectories', async () => {
const sub = join(tmpDir, 'nested')
const { mkdir } = await import('fs/promises')
await mkdir(sub, { recursive: true })
await writeFile(join(sub, 'deep.ts'), '')
const result = await globTool.execute(
{ path: tmpDir, pattern: '*.ts' },
defaultContext,
)
expect(result.isError).toBe(false)
expect(result.data).toContain('deep.ts')
})
it('errors on inaccessible path', async () => {
const result = await globTool.execute(
{ path: '/nonexistent/path/xyz' },
defaultContext,
)
expect(result.isError).toBe(true)
expect(result.data).toContain('Cannot access path')
})
it('notes truncation when maxFiles is exceeded', async () => {
for (let i = 0; i < 5; i++) {
await writeFile(join(tmpDir, `f${i}.txt`), '')
}
const result = await globTool.execute(
{ path: tmpDir, pattern: '*.txt', maxFiles: 3 },
defaultContext,
)
expect(result.isError).toBe(false)
const lines = (result.data as string).split('\n').filter((l) => l.endsWith('.txt'))
expect(lines).toHaveLength(3)
expect(result.data).toContain('capped at 3')
})
})
// =========================================================================== // ===========================================================================
// grep (Node.js fallback — tests do not depend on ripgrep availability) // grep (Node.js fallback — tests do not depend on ripgrep availability)
// =========================================================================== // ===========================================================================

View File

@ -61,6 +61,13 @@ function createTestTools() {
execute: async () => ({ data: 'matches', isError: false }), execute: async () => ({ data: 'matches', isError: false }),
})) }))
registry.register(defineTool({
name: 'glob',
description: 'List paths',
inputSchema: z.object({ path: z.string().optional() }),
execute: async () => ({ data: 'paths', isError: false }),
}))
registry.register(defineTool({ registry.register(defineTool({
name: 'bash', name: 'bash',
description: 'Run shell command', description: 'Run shell command',
@ -110,7 +117,15 @@ describe('Tool filtering', () => {
const tools = (runner as any).resolveTools() as LLMToolDef[] const tools = (runner as any).resolveTools() as LLMToolDef[]
const toolNames = tools.map((t: LLMToolDef) => t.name).sort() const toolNames = tools.map((t: LLMToolDef) => t.name).sort()
expect(toolNames).toEqual(['bash', 'custom_tool', 'file_edit', 'file_read', 'file_write', 'grep']) expect(toolNames).toEqual([
'bash',
'custom_tool',
'file_edit',
'file_read',
'file_write',
'glob',
'grep',
])
}) })
}) })
@ -124,7 +139,7 @@ describe('Tool filtering', () => {
const tools = (runner as any).resolveTools() as LLMToolDef[] const tools = (runner as any).resolveTools() as LLMToolDef[]
const toolNames = tools.map((t: LLMToolDef) => t.name).sort() const toolNames = tools.map((t: LLMToolDef) => t.name).sort()
expect(toolNames).toEqual(['custom_tool', 'file_read', 'grep']) expect(toolNames).toEqual(['custom_tool', 'file_read', 'glob', 'grep'])
}) })
it('readwrite preset filters correctly', () => { it('readwrite preset filters correctly', () => {
@ -136,7 +151,14 @@ describe('Tool filtering', () => {
const tools = (runner as any).resolveTools() as LLMToolDef[] const tools = (runner as any).resolveTools() as LLMToolDef[]
const toolNames = tools.map((t: LLMToolDef) => t.name).sort() const toolNames = tools.map((t: LLMToolDef) => t.name).sort()
expect(toolNames).toEqual(['custom_tool', 'file_edit', 'file_read', 'file_write', 'grep']) expect(toolNames).toEqual([
'custom_tool',
'file_edit',
'file_read',
'file_write',
'glob',
'grep',
])
}) })
it('full preset filters correctly', () => { it('full preset filters correctly', () => {
@ -148,7 +170,15 @@ describe('Tool filtering', () => {
const tools = (runner as any).resolveTools() as LLMToolDef[] const tools = (runner as any).resolveTools() as LLMToolDef[]
const toolNames = tools.map((t: LLMToolDef) => t.name).sort() const toolNames = tools.map((t: LLMToolDef) => t.name).sort()
expect(toolNames).toEqual(['bash', 'custom_tool', 'file_edit', 'file_read', 'file_write', 'grep']) expect(toolNames).toEqual([
'bash',
'custom_tool',
'file_edit',
'file_read',
'file_write',
'glob',
'grep',
])
}) })
}) })
@ -186,7 +216,14 @@ describe('Tool filtering', () => {
const tools = (runner as any).resolveTools() as LLMToolDef[] const tools = (runner as any).resolveTools() as LLMToolDef[]
const toolNames = tools.map((t: LLMToolDef) => t.name).sort() const toolNames = tools.map((t: LLMToolDef) => t.name).sort()
expect(toolNames).toEqual(['custom_tool', 'file_edit', 'file_read', 'file_write', 'grep']) expect(toolNames).toEqual([
'custom_tool',
'file_edit',
'file_read',
'file_write',
'glob',
'grep',
])
}) })
it('empty denylist returns all tools', () => { it('empty denylist returns all tools', () => {
@ -196,13 +233,13 @@ describe('Tool filtering', () => {
}) })
const tools = (runner as any).resolveTools() const tools = (runner as any).resolveTools()
expect(tools).toHaveLength(6) // All registered tools expect(tools).toHaveLength(7) // All registered tools
}) })
}) })
describe('resolveTools - combined filtering (preset + allowlist + denylist)', () => { describe('resolveTools - combined filtering (preset + allowlist + denylist)', () => {
it('preset + allowlist + denylist work together', () => { it('preset + allowlist + denylist work together', () => {
// Start with readwrite preset: ['file_read', 'file_write', 'file_edit', 'grep'] // Start with readwrite preset: ['file_read', 'file_write', 'file_edit', 'grep', 'glob']
// Then allowlist: intersect with ['file_read', 'file_write', 'grep'] = ['file_read', 'file_write', 'grep'] // Then allowlist: intersect with ['file_read', 'file_write', 'grep'] = ['file_read', 'file_write', 'grep']
// Then denylist: subtract ['file_write'] = ['file_read', 'grep'] // Then denylist: subtract ['file_write'] = ['file_read', 'grep']
const runner = new AgentRunner(mockAdapter, registry, executor, { const runner = new AgentRunner(mockAdapter, registry, executor, {
@ -219,7 +256,7 @@ describe('Tool filtering', () => {
}) })
it('preset filters first, then allowlist intersects, then denylist subtracts', () => { it('preset filters first, then allowlist intersects, then denylist subtracts', () => {
// Start with readonly preset: ['file_read', 'grep'] // Start with readonly preset: ['file_read', 'grep', 'glob']
// Allowlist intersect with ['file_read', 'bash']: ['file_read'] // Allowlist intersect with ['file_read', 'bash']: ['file_read']
// Denylist subtract ['file_read']: [] // Denylist subtract ['file_read']: []
const runner = new AgentRunner(mockAdapter, registry, executor, { const runner = new AgentRunner(mockAdapter, registry, executor, {