mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-01-07 05:39:05 +08:00
* refactor: rewrite filesystem MCP server with new tool set - Replace existing filesystem MCP with modular architecture - Implement 6 new tools: glob, ls, grep, read, write, delete - Add comprehensive TypeScript types and Zod schemas - Maintain security with path validation and allowed directories - Improve error handling and user feedback - Add result limits for performance (100 files/matches max) - Format output with clear, helpful messages - Keep backward compatibility with existing import patterns BREAKING CHANGE: Tools renamed from snake_case to lowercase - read_file → read - write_file → write - list_directory → ls - search_files → glob - New tools: grep, delete - Removed: edit_file, create_directory, directory_tree, move_file, get_file_info * 🐛 fix: remove filesystem allowed directories restriction * 🐛 fix: relax binary detection for text files * ✨ feat: add edit tool with fuzzy matching to filesystem MCP server - Add edit tool with 9 fallback replacers from opencode for robust string replacement (SimpleReplacer, LineTrimmedReplacer, BlockAnchorReplacer, WhitespaceNormalizedReplacer, etc.) - Add Levenshtein distance algorithm for similarity matching - Improve descriptions for all tools (read, write, glob, grep, ls, delete) following opencode patterns for better LLM guidance - Register edit tool in server and export from tools index * ♻️ refactor: replace allowedDirectories with baseDir in filesystem MCP server - Change server to use single baseDir (from WORKSPACE_ROOT env or userData/workspace default) - Remove list_allowed_directories tool as restriction mechanism is removed - Add ripgrep integration for faster grep searches with JS fallback - Simplify validatePath() by removing allowlist checks - Display paths relative to baseDir in tool outputs * 📝 docs: standardize filesystem MCP server tool descriptions - Unify description format to bullet-point style across all tools - Add absolute path requirement to ls, glob, grep schemas and descriptions - Update glob and grep to output absolute paths instead of relative paths - Add missing error case documentation for edit tool (old_string === new_string) - Standardize optional path parameter descriptions * ♻️ refactor: use ripgrep for glob tool and extract shared utilities - Extract shared ripgrep utilities (runRipgrep, getRipgrepAddonPath) to types.ts - Rewrite glob tool to use `rg --files --glob` for reliable file matching - Update grep tool to import shared ripgrep utilities * 🐛 fix: handle ripgrep exit code 2 with valid results in glob tool - Process ripgrep stdout when content exists, regardless of exit code - Exit code 2 can indicate partial errors while still returning valid results - Remove fallback directory listing (had buggy regex for root-level files) - Update tool description to clarify patterns without "/" match at any depth * 🔥 chore: remove filesystem.ts.backup file Remove unnecessary backup file from mcpServers directory * 🐛 fix: use correct default workspace path in filesystem MCP server Change default baseDir from userData/workspace to userData/Data/Workspace to match the app's data storage convention (Data/Files, Data/Notes, etc.) Addresses PR #11937 review feedback. * 🐛 fix: pass WORKSPACE_ROOT to FileSystemServer constructor The envs object passed to createInMemoryMCPServer was not being used for the filesystem server. Now WORKSPACE_ROOT is passed as a constructor parameter, following the same pattern as other MCP servers. * \feat: add link to documentation for MCP server configuration requirement Wrap the configuration requirement tag in a link to the documentation for better user guidance on MCP server settings. --------- Co-authored-by: kangfenmao <kangfenmao@qq.com>
267 lines
7.4 KiB
TypeScript
267 lines
7.4 KiB
TypeScript
import fs from 'fs/promises'
|
|
import path from 'path'
|
|
import * as z from 'zod'
|
|
|
|
import type { GrepMatch } from '../types'
|
|
import { isBinaryFile, MAX_GREP_MATCHES, MAX_LINE_LENGTH, runRipgrep, validatePath } from '../types'
|
|
|
|
// Schema definition
|
|
export const GrepToolSchema = z.object({
|
|
pattern: z.string().describe('The regex pattern to search for in file contents'),
|
|
path: z
|
|
.string()
|
|
.optional()
|
|
.describe('The directory to search in (must be absolute path). Defaults to the base directory'),
|
|
include: z.string().optional().describe('File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}")')
|
|
})
|
|
|
|
// Tool definition with detailed description
|
|
export const grepToolDefinition = {
|
|
name: 'grep',
|
|
description: `Fast content search tool that works with any codebase size.
|
|
|
|
- Searches file contents using regular expressions
|
|
- Supports full regex syntax (e.g., "log.*Error", "function\\s+\\w+")
|
|
- Filter files by pattern with include (e.g., "*.js", "*.{ts,tsx}")
|
|
- Returns absolute file paths and line numbers with matching content
|
|
- Results are limited to 100 matches
|
|
- Binary files are automatically skipped
|
|
- Common directories (node_modules, .git, dist) are excluded
|
|
- The path parameter must be an absolute path if specified
|
|
- If path is not specified, defaults to the base directory`,
|
|
inputSchema: z.toJSONSchema(GrepToolSchema)
|
|
}
|
|
|
|
// Handler implementation
|
|
export async function handleGrepTool(args: unknown, baseDir: string) {
|
|
const parsed = GrepToolSchema.safeParse(args)
|
|
if (!parsed.success) {
|
|
throw new Error(`Invalid arguments for grep: ${parsed.error}`)
|
|
}
|
|
|
|
const data = parsed.data
|
|
|
|
if (!data.pattern) {
|
|
throw new Error('Pattern is required for grep')
|
|
}
|
|
|
|
const searchPath = data.path || baseDir
|
|
const validPath = await validatePath(searchPath, baseDir)
|
|
|
|
const matches: GrepMatch[] = []
|
|
let truncated = false
|
|
let regex: RegExp
|
|
|
|
// Build ripgrep arguments
|
|
const rgArgs: string[] = [
|
|
'--no-heading',
|
|
'--line-number',
|
|
'--color',
|
|
'never',
|
|
'--ignore-case',
|
|
'--glob',
|
|
'!.git/**',
|
|
'--glob',
|
|
'!node_modules/**',
|
|
'--glob',
|
|
'!dist/**',
|
|
'--glob',
|
|
'!build/**',
|
|
'--glob',
|
|
'!__pycache__/**'
|
|
]
|
|
|
|
if (data.include) {
|
|
for (const pat of data.include
|
|
.split(',')
|
|
.map((p) => p.trim())
|
|
.filter(Boolean)) {
|
|
rgArgs.push('--glob', pat)
|
|
}
|
|
}
|
|
|
|
rgArgs.push(data.pattern)
|
|
rgArgs.push(validPath)
|
|
|
|
try {
|
|
regex = new RegExp(data.pattern, 'gi')
|
|
} catch (error) {
|
|
throw new Error(`Invalid regex pattern: ${data.pattern}`)
|
|
}
|
|
|
|
async function searchFile(filePath: string): Promise<void> {
|
|
if (matches.length >= MAX_GREP_MATCHES) {
|
|
truncated = true
|
|
return
|
|
}
|
|
|
|
try {
|
|
// Skip binary files
|
|
if (await isBinaryFile(filePath)) {
|
|
return
|
|
}
|
|
|
|
const content = await fs.readFile(filePath, 'utf-8')
|
|
const lines = content.split('\n')
|
|
|
|
lines.forEach((line, index) => {
|
|
if (matches.length >= MAX_GREP_MATCHES) {
|
|
truncated = true
|
|
return
|
|
}
|
|
|
|
if (regex.test(line)) {
|
|
// Truncate long lines
|
|
const truncatedLine = line.length > MAX_LINE_LENGTH ? line.substring(0, MAX_LINE_LENGTH) + '...' : line
|
|
|
|
matches.push({
|
|
file: filePath,
|
|
line: index + 1,
|
|
content: truncatedLine.trim()
|
|
})
|
|
}
|
|
})
|
|
} catch (error) {
|
|
// Skip files we can't read
|
|
}
|
|
}
|
|
|
|
async function searchDirectory(dir: string): Promise<void> {
|
|
if (matches.length >= MAX_GREP_MATCHES) {
|
|
truncated = true
|
|
return
|
|
}
|
|
|
|
try {
|
|
const entries = await fs.readdir(dir, { withFileTypes: true })
|
|
|
|
for (const entry of entries) {
|
|
if (matches.length >= MAX_GREP_MATCHES) {
|
|
truncated = true
|
|
break
|
|
}
|
|
|
|
const fullPath = path.join(dir, entry.name)
|
|
|
|
// Skip common ignore patterns
|
|
if (entry.name.startsWith('.') && entry.name !== '.env.example') {
|
|
continue
|
|
}
|
|
if (['node_modules', 'dist', 'build', '__pycache__', '.git'].includes(entry.name)) {
|
|
continue
|
|
}
|
|
|
|
if (entry.isFile()) {
|
|
// Check if file matches include pattern
|
|
if (data.include) {
|
|
const includePatterns = data.include.split(',').map((p) => p.trim())
|
|
const fileName = path.basename(fullPath)
|
|
const matchesInclude = includePatterns.some((pattern) => {
|
|
// Simple glob pattern matching
|
|
const regexPattern = pattern
|
|
.replace(/\*/g, '.*')
|
|
.replace(/\?/g, '.')
|
|
.replace(/\{([^}]+)\}/g, (_, group) => `(${group.split(',').join('|')})`)
|
|
return new RegExp(`^${regexPattern}$`).test(fileName)
|
|
})
|
|
if (!matchesInclude) {
|
|
continue
|
|
}
|
|
}
|
|
|
|
await searchFile(fullPath)
|
|
} else if (entry.isDirectory()) {
|
|
await searchDirectory(fullPath)
|
|
}
|
|
}
|
|
} catch (error) {
|
|
// Skip directories we can't read
|
|
}
|
|
}
|
|
|
|
// Perform the search
|
|
let usedRipgrep = false
|
|
try {
|
|
const rgResult = await runRipgrep(rgArgs)
|
|
if (rgResult.ok && rgResult.exitCode !== null && rgResult.exitCode !== 2) {
|
|
usedRipgrep = true
|
|
const lines = rgResult.stdout.split('\n').filter(Boolean)
|
|
for (const line of lines) {
|
|
if (matches.length >= MAX_GREP_MATCHES) {
|
|
truncated = true
|
|
break
|
|
}
|
|
|
|
const firstColon = line.indexOf(':')
|
|
const secondColon = line.indexOf(':', firstColon + 1)
|
|
if (firstColon === -1 || secondColon === -1) continue
|
|
|
|
const filePart = line.slice(0, firstColon)
|
|
const linePart = line.slice(firstColon + 1, secondColon)
|
|
const contentPart = line.slice(secondColon + 1)
|
|
const lineNum = Number.parseInt(linePart, 10)
|
|
if (!Number.isFinite(lineNum)) continue
|
|
|
|
const absoluteFilePath = path.isAbsolute(filePart) ? filePart : path.resolve(baseDir, filePart)
|
|
const truncatedLine =
|
|
contentPart.length > MAX_LINE_LENGTH ? contentPart.substring(0, MAX_LINE_LENGTH) + '...' : contentPart
|
|
|
|
matches.push({
|
|
file: absoluteFilePath,
|
|
line: lineNum,
|
|
content: truncatedLine.trim()
|
|
})
|
|
}
|
|
}
|
|
} catch {
|
|
usedRipgrep = false
|
|
}
|
|
|
|
if (!usedRipgrep) {
|
|
const stats = await fs.stat(validPath)
|
|
if (stats.isFile()) {
|
|
await searchFile(validPath)
|
|
} else {
|
|
await searchDirectory(validPath)
|
|
}
|
|
}
|
|
|
|
// Format output
|
|
const output: string[] = []
|
|
|
|
if (matches.length === 0) {
|
|
output.push('No matches found')
|
|
} else {
|
|
// Group matches by file
|
|
const fileGroups = new Map<string, GrepMatch[]>()
|
|
matches.forEach((match) => {
|
|
if (!fileGroups.has(match.file)) {
|
|
fileGroups.set(match.file, [])
|
|
}
|
|
fileGroups.get(match.file)!.push(match)
|
|
})
|
|
|
|
// Format grouped matches - always use absolute paths
|
|
fileGroups.forEach((fileMatches, filePath) => {
|
|
output.push(`\n${filePath}:`)
|
|
fileMatches.forEach((match) => {
|
|
output.push(` ${match.line}: ${match.content}`)
|
|
})
|
|
})
|
|
|
|
if (truncated) {
|
|
output.push('')
|
|
output.push(`(Results truncated to ${MAX_GREP_MATCHES} matches. Consider using a more specific pattern or path.)`)
|
|
}
|
|
}
|
|
|
|
return {
|
|
content: [
|
|
{
|
|
type: 'text',
|
|
text: output.join('\n')
|
|
}
|
|
]
|
|
}
|
|
}
|