cherry-studio/src/main/mcpServers/filesystem/tools/glob.ts
LiuVaayne 1d5dafa325
refactor: rewrite filesystem MCP server with improved tool set (#11937)
* refactor: rewrite filesystem MCP server with new tool set

- Replace existing filesystem MCP with modular architecture
- Implement 6 new tools: glob, ls, grep, read, write, delete
- Add comprehensive TypeScript types and Zod schemas
- Maintain security with path validation and allowed directories
- Improve error handling and user feedback
- Add result limits for performance (100 files/matches max)
- Format output with clear, helpful messages
- Keep backward compatibility with existing import patterns

BREAKING CHANGE: Tools renamed from snake_case to lowercase
- read_file → read
- write_file → write
- list_directory → ls
- search_files → glob
- New tools: grep, delete
- Removed: edit_file, create_directory, directory_tree, move_file, get_file_info

* 🐛 fix: remove filesystem allowed directories restriction

* 🐛 fix: relax binary detection for text files

*  feat: add edit tool with fuzzy matching to filesystem MCP server

- Add edit tool with 9 fallback replacers from opencode for robust
  string replacement (SimpleReplacer, LineTrimmedReplacer,
  BlockAnchorReplacer, WhitespaceNormalizedReplacer, etc.)
- Add Levenshtein distance algorithm for similarity matching
- Improve descriptions for all tools (read, write, glob, grep, ls, delete)
  following opencode patterns for better LLM guidance
- Register edit tool in server and export from tools index

* ♻️ refactor: replace allowedDirectories with baseDir in filesystem MCP server

- Change server to use single baseDir (from WORKSPACE_ROOT env or userData/workspace default)
- Remove list_allowed_directories tool as restriction mechanism is removed
- Add ripgrep integration for faster grep searches with JS fallback
- Simplify validatePath() by removing allowlist checks
- Display paths relative to baseDir in tool outputs

* 📝 docs: standardize filesystem MCP server tool descriptions

- Unify description format to bullet-point style across all tools
- Add absolute path requirement to ls, glob, grep schemas and descriptions
- Update glob and grep to output absolute paths instead of relative paths
- Add missing error case documentation for edit tool (old_string === new_string)
- Standardize optional path parameter descriptions

* ♻️ refactor: use ripgrep for glob tool and extract shared utilities

- Extract shared ripgrep utilities (runRipgrep, getRipgrepAddonPath) to types.ts
- Rewrite glob tool to use `rg --files --glob` for reliable file matching
- Update grep tool to import shared ripgrep utilities

* 🐛 fix: handle ripgrep exit code 2 with valid results in glob tool

- Process ripgrep stdout when content exists, regardless of exit code
- Exit code 2 can indicate partial errors while still returning valid results
- Remove fallback directory listing (had buggy regex for root-level files)
- Update tool description to clarify patterns without "/" match at any depth

* 🔥 chore: remove filesystem.ts.backup file

Remove unnecessary backup file from mcpServers directory

* 🐛 fix: use correct default workspace path in filesystem MCP server

Change default baseDir from userData/workspace to userData/Data/Workspace
to match the app's data storage convention (Data/Files, Data/Notes, etc.)

Addresses PR #11937 review feedback.

* 🐛 fix: pass WORKSPACE_ROOT to FileSystemServer constructor

The envs object passed to createInMemoryMCPServer was not being used
for the filesystem server. Now WORKSPACE_ROOT is passed as a constructor
parameter, following the same pattern as other MCP servers.

* \feat: add link to documentation for MCP server configuration requirement

Wrap the configuration requirement tag in a link to the documentation for better user guidance on MCP server settings.

---------

Co-authored-by: kangfenmao <kangfenmao@qq.com>
2025-12-17 23:08:42 +08:00

150 lines
4.7 KiB
TypeScript

import fs from 'fs/promises'
import path from 'path'
import * as z from 'zod'
import type { FileInfo } from '../types'
import { logger, MAX_FILES_LIMIT, runRipgrep, validatePath } from '../types'
// Schema definition
export const GlobToolSchema = z.object({
pattern: z.string().describe('The glob pattern to match files against'),
path: z
.string()
.optional()
.describe('The directory to search in (must be absolute path). Defaults to the base directory')
})
// Tool definition with detailed description
export const globToolDefinition = {
name: 'glob',
description: `Fast file pattern matching tool that works with any codebase size.
- Supports glob patterns like "**/*.js" or "src/**/*.ts"
- Returns matching absolute file paths sorted by modification time (newest first)
- Use this when you need to find files by name patterns
- Patterns without "/" (e.g., "*.txt") match files at ANY depth in the directory tree
- Patterns with "/" (e.g., "src/*.ts") match relative to the search path
- Pattern syntax: * (any chars), ** (any path), {a,b} (alternatives), ? (single char)
- Results are limited to 100 files
- The path parameter must be an absolute path if specified
- If path is not specified, defaults to the base directory
- IMPORTANT: Omit the path field for the default directory (don't use "undefined" or "null")`,
inputSchema: z.toJSONSchema(GlobToolSchema)
}
// Handler implementation
export async function handleGlobTool(args: unknown, baseDir: string) {
const parsed = GlobToolSchema.safeParse(args)
if (!parsed.success) {
throw new Error(`Invalid arguments for glob: ${parsed.error}`)
}
const searchPath = parsed.data.path || baseDir
const validPath = await validatePath(searchPath, baseDir)
// Verify the search directory exists
try {
const stats = await fs.stat(validPath)
if (!stats.isDirectory()) {
throw new Error(`Path is not a directory: ${validPath}`)
}
} catch (error: unknown) {
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
throw new Error(`Directory not found: ${validPath}`)
}
throw error
}
// Validate pattern
const pattern = parsed.data.pattern.trim()
if (!pattern) {
throw new Error('Pattern cannot be empty')
}
const files: FileInfo[] = []
let truncated = false
// Build ripgrep arguments for file listing using --glob=pattern format
const rgArgs: string[] = [
'--files',
'--follow',
'--hidden',
`--glob=${pattern}`,
'--glob=!.git/*',
'--glob=!node_modules/*',
'--glob=!dist/*',
'--glob=!build/*',
'--glob=!__pycache__/*',
validPath
]
// Use ripgrep for file listing
logger.debug('Running ripgrep with args', { rgArgs })
const rgResult = await runRipgrep(rgArgs)
logger.debug('Ripgrep result', {
ok: rgResult.ok,
exitCode: rgResult.exitCode,
stdoutLength: rgResult.stdout.length,
stdoutPreview: rgResult.stdout.slice(0, 500)
})
// Process results if we have stdout content
// Exit code 2 can indicate partial errors (e.g., permission denied on some dirs) but still have valid results
if (rgResult.ok && rgResult.stdout.length > 0) {
const lines = rgResult.stdout.split('\n').filter(Boolean)
logger.debug('Parsed lines from ripgrep', { lineCount: lines.length, lines })
for (const line of lines) {
if (files.length >= MAX_FILES_LIMIT) {
truncated = true
break
}
const filePath = line.trim()
if (!filePath) continue
const absolutePath = path.isAbsolute(filePath) ? filePath : path.resolve(validPath, filePath)
try {
const stats = await fs.stat(absolutePath)
files.push({
path: absolutePath,
type: 'file', // ripgrep --files only returns files
size: stats.size,
modified: stats.mtime
})
} catch (error) {
logger.debug('Failed to stat file from ripgrep output, skipping', { file: absolutePath, error })
}
}
}
// Sort by modification time (newest first)
files.sort((a, b) => {
const aTime = a.modified ? a.modified.getTime() : 0
const bTime = b.modified ? b.modified.getTime() : 0
return bTime - aTime
})
// Format output - always use absolute paths
const output: string[] = []
if (files.length === 0) {
output.push(`No files found matching pattern "${parsed.data.pattern}" in ${validPath}`)
} else {
output.push(...files.map((f) => f.path))
if (truncated) {
output.push('')
output.push(`(Results truncated to ${MAX_FILES_LIMIT} files. Consider using a more specific pattern.)`)
}
}
return {
content: [
{
type: 'text',
text: output.join('\n')
}
]
}
}