From b361d29940b26b92db4625a531a09499a84fc37d Mon Sep 17 00:00:00 2001 From: suyao Date: Wed, 3 Dec 2025 13:48:29 +0800 Subject: [PATCH 1/9] fix: enhance character sanitization for internationalization support in tool names --- src/main/utils/__tests__/mcp.test.ts | 72 +++++++++++++++++++++++++--- src/main/utils/mcp.ts | 9 ++-- 2 files changed, 71 insertions(+), 10 deletions(-) diff --git a/src/main/utils/__tests__/mcp.test.ts b/src/main/utils/__tests__/mcp.test.ts index b1a35f925..097dc8ba7 100644 --- a/src/main/utils/__tests__/mcp.test.ts +++ b/src/main/utils/__tests__/mcp.test.ts @@ -61,12 +61,12 @@ describe('buildFunctionCallToolName', () => { it('should replace invalid characters with underscores', () => { const result = buildFunctionCallToolName('test@server', 'tool#name') expect(result).not.toMatch(/[@#]/) - expect(result).toMatch(/^[a-zA-Z0-9_-]+$/) + expect(result).toMatch(/^[\p{L}\p{N}_-]+$/u) }) - it('should ensure name starts with a letter', () => { + it('should ensure name starts with a letter or underscore', () => { const result = buildFunctionCallToolName('123server', '456tool') - expect(result).toMatch(/^[a-zA-Z]/) + expect(result).toMatch(/^[\p{L}_]/u) }) it('should handle consecutive underscores/dashes', () => { @@ -130,7 +130,7 @@ describe('buildFunctionCallToolName', () => { // Should still produce a valid unique suffix via fallback hash expect(result).toBeTruthy() expect(result.length).toBeLessThanOrEqual(63) - expect(result).toMatch(/^[a-zA-Z][a-zA-Z0-9_-]*$/) + expect(result).toMatch(/^[\p{L}_][\p{L}\p{N}_-]*$/u) // Should have a suffix (underscore followed by something) expect(result).toMatch(/_[a-z0-9]+$/) }) @@ -178,8 +178,8 @@ describe('buildFunctionCallToolName', () => { expect(tool1).not.toBe(tool2) // Both should be valid identifiers - expect(tool1).toMatch(/^[a-zA-Z][a-zA-Z0-9_-]*$/) - expect(tool2).toMatch(/^[a-zA-Z][a-zA-Z0-9_-]*$/) + expect(tool1).toMatch(/^[\p{L}_][\p{L}\p{N}_-]*$/u) + expect(tool2).toMatch(/^[\p{L}_][\p{L}\p{N}_-]*$/u) // Both should be <= 63 chars expect(tool1.length).toBeLessThanOrEqual(63) @@ -193,4 +193,64 @@ describe('buildFunctionCallToolName', () => { expect(result.split('github').length - 1).toBeLessThanOrEqual(2) }) }) + + describe('internationalization support', () => { + it('should preserve Chinese characters in tool names', () => { + const result = buildFunctionCallToolName('ocr', '行驶证OCR_轻盈版') + expect(result).toContain('行驶证') + expect(result).toContain('OCR') + expect(result).toContain('轻盈版') + }) + + it('should distinguish between different Chinese OCR tools', () => { + const tools = [ + buildFunctionCallToolName('ocr', '行驶证OCR_轻盈版'), + buildFunctionCallToolName('ocr', '营业执照OCR_轻盈版'), + buildFunctionCallToolName('ocr', '车牌OCR_轻盈版'), + buildFunctionCallToolName('ocr', '身份证OCR') + ] + + // All tools should be unique + const uniqueTools = new Set(tools) + expect(uniqueTools.size).toBe(4) + + // Verify each tool contains its distinctive Chinese characters + expect(tools[0]).toContain('行驶证') + expect(tools[1]).toContain('营业执照') + expect(tools[2]).toContain('车牌') + expect(tools[3]).toContain('身份证') + }) + + it('should handle Japanese characters', () => { + const result = buildFunctionCallToolName('server', 'ユーザー検索') + expect(result).toContain('ユーザー検索') + expect(result).toMatch(/^[\p{L}_][\p{L}\p{N}_-]*$/u) + }) + + it('should handle Korean characters', () => { + const result = buildFunctionCallToolName('server', '사용자검색') + expect(result).toContain('사용자검색') + expect(result).toMatch(/^[\p{L}_][\p{L}\p{N}_-]*$/u) + }) + + it('should handle mixed language tool names', () => { + const result = buildFunctionCallToolName('api', 'search用户by名称') + expect(result).toContain('search') + expect(result).toContain('用户') + expect(result).toContain('by') + expect(result).toContain('名称') + expect(result).toMatch(/^[\p{L}_][\p{L}\p{N}_-]*$/u) + }) + + it('should replace only control characters and special symbols, not Unicode letters', () => { + const result = buildFunctionCallToolName('test', '文件@上传#工具') + // @ and # should be replaced with underscores + expect(result).not.toContain('@') + expect(result).not.toContain('#') + // Chinese characters should be preserved + expect(result).toContain('文件') + expect(result).toContain('上传') + expect(result).toContain('工具') + }) + }) }) diff --git a/src/main/utils/mcp.ts b/src/main/utils/mcp.ts index cfa700f2e..31d0fe9f5 100644 --- a/src/main/utils/mcp.ts +++ b/src/main/utils/mcp.ts @@ -27,11 +27,12 @@ export function buildFunctionCallToolName(serverName: string, toolName: string, } // Replace invalid characters with underscores or dashes - // Keep a-z, A-Z, 0-9, underscores and dashes - name = name.replace(/[^a-zA-Z0-9_-]/g, '_') + // Keep Unicode letters (\p{L}), Unicode numbers (\p{N}), underscores and dashes + // This supports international characters (Chinese, Japanese, Korean, etc.) + name = name.replace(/[^\p{L}\p{N}_-]/gu, '_') - // Ensure name starts with a letter or underscore (for valid JavaScript identifier) - if (!/^[a-zA-Z]/.test(name)) { + // Ensure name starts with a letter or underscore (supports Unicode letters) + if (!/^[\p{L}_]/u.test(name)) { name = `tool-${name}` } From b6efe41b8640b789dae0d3b84e27afbc0e8b9424 Mon Sep 17 00:00:00 2001 From: suyao Date: Wed, 3 Dec 2025 14:13:07 +0800 Subject: [PATCH 2/9] feat: add transliteration support for internationalization in tool names --- package.json | 1 + src/main/utils/__tests__/mcp.test.ts | 102 +++++++++++++++++++-------- src/main/utils/mcp.ts | 62 +++++++++++++--- yarn.lock | 13 ++++ 4 files changed, 138 insertions(+), 40 deletions(-) diff --git a/package.json b/package.json index 3f95aee6d..74c2ccaea 100644 --- a/package.json +++ b/package.json @@ -363,6 +363,7 @@ "tar": "^7.4.3", "tiny-pinyin": "^1.3.2", "tokenx": "^1.1.0", + "transliteration": "^2.3.5", "tsx": "^4.20.3", "turndown-plugin-gfm": "^1.0.2", "tw-animate-css": "^1.3.8", diff --git a/src/main/utils/__tests__/mcp.test.ts b/src/main/utils/__tests__/mcp.test.ts index 097dc8ba7..dee5ab2a8 100644 --- a/src/main/utils/__tests__/mcp.test.ts +++ b/src/main/utils/__tests__/mcp.test.ts @@ -61,12 +61,13 @@ describe('buildFunctionCallToolName', () => { it('should replace invalid characters with underscores', () => { const result = buildFunctionCallToolName('test@server', 'tool#name') expect(result).not.toMatch(/[@#]/) - expect(result).toMatch(/^[\p{L}\p{N}_-]+$/u) + // Should only contain ASCII alphanumeric, underscore, dash, dot, colon + expect(result).toMatch(/^[a-zA-Z0-9_.\-:]+$/) }) it('should ensure name starts with a letter or underscore', () => { const result = buildFunctionCallToolName('123server', '456tool') - expect(result).toMatch(/^[\p{L}_]/u) + expect(result).toMatch(/^[a-zA-Z_]/) }) it('should handle consecutive underscores/dashes', () => { @@ -130,7 +131,7 @@ describe('buildFunctionCallToolName', () => { // Should still produce a valid unique suffix via fallback hash expect(result).toBeTruthy() expect(result.length).toBeLessThanOrEqual(63) - expect(result).toMatch(/^[\p{L}_][\p{L}\p{N}_-]*$/u) + expect(result).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/) // Should have a suffix (underscore followed by something) expect(result).toMatch(/_[a-z0-9]+$/) }) @@ -177,9 +178,9 @@ describe('buildFunctionCallToolName', () => { // Should be different expect(tool1).not.toBe(tool2) - // Both should be valid identifiers - expect(tool1).toMatch(/^[\p{L}_][\p{L}\p{N}_-]*$/u) - expect(tool2).toMatch(/^[\p{L}_][\p{L}\p{N}_-]*$/u) + // Both should be valid AI model tool names (ASCII only) + expect(tool1).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/) + expect(tool2).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/) // Both should be <= 63 chars expect(tool1.length).toBeLessThanOrEqual(63) @@ -194,12 +195,14 @@ describe('buildFunctionCallToolName', () => { }) }) - describe('internationalization support', () => { - it('should preserve Chinese characters in tool names', () => { + describe('internationalization support (CJK to ASCII transliteration)', () => { + it('should convert Chinese characters to pinyin', () => { const result = buildFunctionCallToolName('ocr', '行驶证OCR_轻盈版') - expect(result).toContain('行驶证') - expect(result).toContain('OCR') - expect(result).toContain('轻盈版') + // Chinese characters should be transliterated to pinyin + expect(result).not.toMatch(/[\u4e00-\u9fff]/) // No Chinese characters + expect(result).toContain('ocr') // OCR is lowercased + // Should only contain ASCII characters (lowercase) + expect(result).toMatch(/^[a-z_][a-z0-9_.\-:]*$/) }) it('should distinguish between different Chinese OCR tools', () => { @@ -210,47 +213,84 @@ describe('buildFunctionCallToolName', () => { buildFunctionCallToolName('ocr', '身份证OCR') ] - // All tools should be unique + // All tools should be unique (pinyin transliterations are different) const uniqueTools = new Set(tools) expect(uniqueTools.size).toBe(4) - // Verify each tool contains its distinctive Chinese characters - expect(tools[0]).toContain('行驶证') - expect(tools[1]).toContain('营业执照') - expect(tools[2]).toContain('车牌') - expect(tools[3]).toContain('身份证') + // All should be ASCII-only valid tool names + tools.forEach((tool) => { + expect(tool).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/) + expect(tool).not.toMatch(/[\u4e00-\u9fff]/) // No Chinese characters + }) + + // Verify they contain transliterated pinyin (with underscores between characters) + // 行驶证 = xing_shi_zheng, 营业执照 = ying_ye_zhi_zhao, 车牌 = che_pai, 身份证 = shen_fen_zheng + expect(tools[0]).toContain('xing_shi_zheng') + expect(tools[1]).toContain('ying_ye_zhi_zhao') + expect(tools[2]).toContain('che_pai') + expect(tools[3]).toContain('shen_fen_zheng') }) - it('should handle Japanese characters', () => { + it('should handle Japanese characters with base36 encoding', () => { const result = buildFunctionCallToolName('server', 'ユーザー検索') - expect(result).toContain('ユーザー検索') - expect(result).toMatch(/^[\p{L}_][\p{L}\p{N}_-]*$/u) + // Should be ASCII-only + expect(result).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/) + // Should not contain original Japanese characters + expect(result).not.toMatch(/[\u3040-\u309f\u30a0-\u30ff]/) }) - it('should handle Korean characters', () => { + it('should handle Korean characters with base36 encoding', () => { const result = buildFunctionCallToolName('server', '사용자검색') - expect(result).toContain('사용자검색') - expect(result).toMatch(/^[\p{L}_][\p{L}\p{N}_-]*$/u) + // Should be ASCII-only + expect(result).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/) + // Should not contain original Korean characters + expect(result).not.toMatch(/[\uac00-\ud7af]/) }) it('should handle mixed language tool names', () => { const result = buildFunctionCallToolName('api', 'search用户by名称') + // ASCII parts should be preserved (lowercased) expect(result).toContain('search') - expect(result).toContain('用户') expect(result).toContain('by') - expect(result).toContain('名称') - expect(result).toMatch(/^[\p{L}_][\p{L}\p{N}_-]*$/u) + // Chinese parts should be transliterated (用户 = yong_hu, 名称 = ming_cheng) + expect(result).toContain('yong_hu') + expect(result).toContain('ming_cheng') + // Final result should be ASCII-only (lowercase) + expect(result).toMatch(/^[a-z_][a-z0-9_.\-:]*$/) }) - it('should replace only control characters and special symbols, not Unicode letters', () => { + it('should transliterate Chinese and replace special symbols', () => { const result = buildFunctionCallToolName('test', '文件@上传#工具') // @ and # should be replaced with underscores expect(result).not.toContain('@') expect(result).not.toContain('#') - // Chinese characters should be preserved - expect(result).toContain('文件') - expect(result).toContain('上传') - expect(result).toContain('工具') + // Chinese characters should be transliterated + // 文件 = wen_jian, 上传 = shang_chuan, 工具 = gong_ju + expect(result).toContain('wen_jian') + expect(result).toContain('shang_chuan') + expect(result).toContain('gong_ju') + // Should be ASCII-only (lowercase) + expect(result).toMatch(/^[a-z_][a-z0-9_.\-:]*$/) + }) + + it('should produce AI model compatible tool names', () => { + const testCases = [ + '行驶证OCR', + '营业执照识别', + 'get用户info', + '文件@处理', + '数据分析_v2' + ] + + testCases.forEach((testCase) => { + const result = buildFunctionCallToolName('server', testCase) + // Must start with letter or underscore + expect(result).toMatch(/^[a-zA-Z_]/) + // Must only contain a-z, A-Z, 0-9, _, -, ., : + expect(result).toMatch(/^[a-zA-Z0-9_.\-:]+$/) + // Must be <= 64 characters + expect(result.length).toBeLessThanOrEqual(64) + }) }) }) }) diff --git a/src/main/utils/mcp.ts b/src/main/utils/mcp.ts index 31d0fe9f5..0a144f49f 100644 --- a/src/main/utils/mcp.ts +++ b/src/main/utils/mcp.ts @@ -1,6 +1,51 @@ +import { transliterate } from 'transliteration' + +/** + * Transliterate non-ASCII characters to ASCII equivalents + * - Chinese → Pinyin (e.g., 行驶证 → xingshizheng) + * - Japanese → Romaji (e.g., ユーザー → yūzā) + * - Korean → Romanization (e.g., 사용자 → sayongja) + * - Other special characters → underscores + */ +/** + * Transliterates non-ASCII text (including CJK characters) to ASCII-compatible format. + * + * Converts input text to lowercase ASCII representation, replacing spaces with underscores + * and removing special characters. Unknown or special characters are replaced with underscores. + * + * @param text - The input string to transliterate, may contain Unicode characters including CJK + * @returns A lowercase ASCII string with spaces converted to underscores and special characters removed, + * preserving only alphanumeric characters, underscores, dots, hyphens, and colons + * + * @example + * ```typescript + * transliterateToAscii("Hello World") // returns "hello_world" + * transliterateToAscii("你好世界") // returns transliterated version with underscores + * transliterateToAscii("Café-123") // returns "cafe_123" + * ``` + */ +function transliterateToAscii(text: string): string { + // Use transliteration library which supports CJK (Chinese, Japanese, Korean) + const result = transliterate(text, { + // Unknown/special characters become underscores + unknown: '_', + ignore: [] + }) + + // Convert to lowercase, remove spaces, and clean up special chars + return result + .toLowerCase() + .replace(/\s+/g, '_') + .replace(/[^a-z0-9_.\-:]/g, '_') +} + export function buildFunctionCallToolName(serverName: string, toolName: string, serverId?: string) { - const sanitizedServer = serverName.trim().replace(/-/g, '_') - const sanitizedTool = toolName.trim().replace(/-/g, '_') + // First, transliterate non-ASCII characters to ASCII + const transliteratedServer = transliterateToAscii(serverName.trim()) + const transliteratedTool = transliterateToAscii(toolName.trim()) + + const sanitizedServer = transliteratedServer.replace(/-/g, '_') + const sanitizedTool = transliteratedTool.replace(/-/g, '_') // Calculate suffix first to reserve space for it // Suffix format: "_" + 6 alphanumeric chars = 7 chars total @@ -26,14 +71,13 @@ export function buildFunctionCallToolName(serverName: string, toolName: string, name = `${sanitizedServer.slice(0, 7) || ''}-${sanitizedTool || ''}` } - // Replace invalid characters with underscores or dashes - // Keep Unicode letters (\p{L}), Unicode numbers (\p{N}), underscores and dashes - // This supports international characters (Chinese, Japanese, Korean, etc.) - name = name.replace(/[^\p{L}\p{N}_-]/gu, '_') + // Replace invalid characters with underscores + // Keep only a-z, A-Z, 0-9, underscores, dashes, dots, colons (AI model compatible) + name = name.replace(/[^a-zA-Z0-9_.\-:]/g, '_') - // Ensure name starts with a letter or underscore (supports Unicode letters) - if (!/^[\p{L}_]/u.test(name)) { - name = `tool-${name}` + // Ensure name starts with a letter or underscore (AI model requirement) + if (!/^[a-zA-Z_]/.test(name)) { + name = `tool_${name}` } // Remove consecutive underscores/dashes (optional improvement) diff --git a/yarn.lock b/yarn.lock index 22b6c581d..da56898ba 100644 --- a/yarn.lock +++ b/yarn.lock @@ -10285,6 +10285,7 @@ __metadata: tesseract.js: "patch:tesseract.js@npm%3A6.0.1#~/.yarn/patches/tesseract.js-npm-6.0.1-2562a7e46d.patch" tiny-pinyin: "npm:^1.3.2" tokenx: "npm:^1.1.0" + transliteration: "npm:^2.3.5" tsx: "npm:^4.20.3" turndown: "npm:7.2.0" turndown-plugin-gfm: "npm:^1.0.2" @@ -24607,6 +24608,18 @@ __metadata: languageName: node linkType: hard +"transliteration@npm:^2.3.5": + version: 2.3.5 + resolution: "transliteration@npm:2.3.5" + dependencies: + yargs: "npm:^17.5.1" + bin: + slugify: dist/bin/slugify + transliterate: dist/bin/transliterate + checksum: 10c0/68397225c2ca59b8e33206c65f905724e86b64460cbf90576d352dc2366e763ded97e2c7b8b1f140fb36a565d61a97c51080df9fa638e6b1769f6cb24f383756 + languageName: node + linkType: hard + "tree-kill@npm:1.2.2, tree-kill@npm:^1.2.2": version: 1.2.2 resolution: "tree-kill@npm:1.2.2" From 59c606eac68072c9aaf35cc0fb9d82128285d108 Mon Sep 17 00:00:00 2001 From: suyao Date: Thu, 4 Dec 2025 20:51:20 +0800 Subject: [PATCH 3/9] fix format --- src/main/utils/__tests__/mcp.test.ts | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/main/utils/__tests__/mcp.test.ts b/src/main/utils/__tests__/mcp.test.ts index dee5ab2a8..d29c8c8cd 100644 --- a/src/main/utils/__tests__/mcp.test.ts +++ b/src/main/utils/__tests__/mcp.test.ts @@ -274,13 +274,7 @@ describe('buildFunctionCallToolName', () => { }) it('should produce AI model compatible tool names', () => { - const testCases = [ - '行驶证OCR', - '营业执照识别', - 'get用户info', - '文件@处理', - '数据分析_v2' - ] + const testCases = ['行驶证OCR', '营业执照识别', 'get用户info', '文件@处理', '数据分析_v2'] testCases.forEach((testCase) => { const result = buildFunctionCallToolName('server', testCase) From 4d9f75b0ee9c1e17b39dbac2cd476ca0c05ef14d Mon Sep 17 00:00:00 2001 From: SuYao Date: Thu, 4 Dec 2025 21:00:21 +0800 Subject: [PATCH 4/9] Update src/main/utils/mcp.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/main/utils/mcp.ts | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/main/utils/mcp.ts b/src/main/utils/mcp.ts index 0a144f49f..4a1dfd600 100644 --- a/src/main/utils/mcp.ts +++ b/src/main/utils/mcp.ts @@ -1,12 +1,5 @@ import { transliterate } from 'transliteration' -/** - * Transliterate non-ASCII characters to ASCII equivalents - * - Chinese → Pinyin (e.g., 行驶证 → xingshizheng) - * - Japanese → Romaji (e.g., ユーザー → yūzā) - * - Korean → Romanization (e.g., 사용자 → sayongja) - * - Other special characters → underscores - */ /** * Transliterates non-ASCII text (including CJK characters) to ASCII-compatible format. * From f800ec8593e53f4ef8508b5b6837015dd343342f Mon Sep 17 00:00:00 2001 From: SuYao Date: Thu, 4 Dec 2025 21:00:38 +0800 Subject: [PATCH 5/9] Update src/main/utils/__tests__/mcp.test.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/main/utils/__tests__/mcp.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/utils/__tests__/mcp.test.ts b/src/main/utils/__tests__/mcp.test.ts index d29c8c8cd..1d87eb334 100644 --- a/src/main/utils/__tests__/mcp.test.ts +++ b/src/main/utils/__tests__/mcp.test.ts @@ -231,9 +231,9 @@ describe('buildFunctionCallToolName', () => { expect(tools[3]).toContain('shen_fen_zheng') }) - it('should handle Japanese characters with base36 encoding', () => { + it('should handle Japanese characters with Romaji transliteration', () => { const result = buildFunctionCallToolName('server', 'ユーザー検索') - // Should be ASCII-only + // Should be ASCII-only (Japanese characters are transliterated to Romaji) expect(result).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/) // Should not contain original Japanese characters expect(result).not.toMatch(/[\u3040-\u309f\u30a0-\u30ff]/) From 0ac3a2dd72e7d34b43916445bdcad799a557fb3a Mon Sep 17 00:00:00 2001 From: SuYao Date: Thu, 4 Dec 2025 21:00:47 +0800 Subject: [PATCH 6/9] Update src/main/utils/__tests__/mcp.test.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/main/utils/__tests__/mcp.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/utils/__tests__/mcp.test.ts b/src/main/utils/__tests__/mcp.test.ts index 1d87eb334..4db48335d 100644 --- a/src/main/utils/__tests__/mcp.test.ts +++ b/src/main/utils/__tests__/mcp.test.ts @@ -239,7 +239,7 @@ describe('buildFunctionCallToolName', () => { expect(result).not.toMatch(/[\u3040-\u309f\u30a0-\u30ff]/) }) - it('should handle Korean characters with base36 encoding', () => { + it('should handle Korean characters with romanization', () => { const result = buildFunctionCallToolName('server', '사용자검색') // Should be ASCII-only expect(result).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/) From 2cac4325122a2d91c635a5d44f7ea5076b821874 Mon Sep 17 00:00:00 2001 From: SuYao Date: Thu, 4 Dec 2025 21:02:12 +0800 Subject: [PATCH 7/9] Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/main/utils/mcp.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/utils/mcp.ts b/src/main/utils/mcp.ts index 4a1dfd600..60f21037a 100644 --- a/src/main/utils/mcp.ts +++ b/src/main/utils/mcp.ts @@ -65,8 +65,8 @@ export function buildFunctionCallToolName(serverName: string, toolName: string, } // Replace invalid characters with underscores - // Keep only a-z, A-Z, 0-9, underscores, dashes, dots, colons (AI model compatible) - name = name.replace(/[^a-zA-Z0-9_.\-:]/g, '_') + // Keep only a-z, 0-9, underscores, dashes, dots, colons (AI model compatible) + name = name.replace(/[^a-z0-9_.\-:]/g, '_') // Ensure name starts with a letter or underscore (AI model requirement) if (!/^[a-zA-Z_]/.test(name)) { From 5242cf0c9dc0535257b5a1cbc121b9127fd28bee Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Sun, 14 Dec 2025 00:22:08 +0800 Subject: [PATCH 8/9] fix: Remove non-compliant characters from MCP tool names for API compatibility (#11903) * Initial plan * fix: address PR review feedback - remove dots/colons, add error handling and logging Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com> * refactor: improve error handling consistency and default values Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com> --- src/main/utils/__tests__/mcp.test.ts | 18 ++++---- src/main/utils/mcp.ts | 62 +++++++++++++++++++++------- 2 files changed, 55 insertions(+), 25 deletions(-) diff --git a/src/main/utils/__tests__/mcp.test.ts b/src/main/utils/__tests__/mcp.test.ts index 4db48335d..a5c6400f7 100644 --- a/src/main/utils/__tests__/mcp.test.ts +++ b/src/main/utils/__tests__/mcp.test.ts @@ -202,7 +202,7 @@ describe('buildFunctionCallToolName', () => { expect(result).not.toMatch(/[\u4e00-\u9fff]/) // No Chinese characters expect(result).toContain('ocr') // OCR is lowercased // Should only contain ASCII characters (lowercase) - expect(result).toMatch(/^[a-z_][a-z0-9_.\-:]*$/) + expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/) }) it('should distinguish between different Chinese OCR tools', () => { @@ -219,7 +219,7 @@ describe('buildFunctionCallToolName', () => { // All should be ASCII-only valid tool names tools.forEach((tool) => { - expect(tool).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/) + expect(tool).toMatch(/^[a-z_][a-z0-9_-]*$/) expect(tool).not.toMatch(/[\u4e00-\u9fff]/) // No Chinese characters }) @@ -234,7 +234,7 @@ describe('buildFunctionCallToolName', () => { it('should handle Japanese characters with Romaji transliteration', () => { const result = buildFunctionCallToolName('server', 'ユーザー検索') // Should be ASCII-only (Japanese characters are transliterated to Romaji) - expect(result).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/) + expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/) // Should not contain original Japanese characters expect(result).not.toMatch(/[\u3040-\u309f\u30a0-\u30ff]/) }) @@ -242,7 +242,7 @@ describe('buildFunctionCallToolName', () => { it('should handle Korean characters with romanization', () => { const result = buildFunctionCallToolName('server', '사용자검색') // Should be ASCII-only - expect(result).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/) + expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/) // Should not contain original Korean characters expect(result).not.toMatch(/[\uac00-\ud7af]/) }) @@ -256,7 +256,7 @@ describe('buildFunctionCallToolName', () => { expect(result).toContain('yong_hu') expect(result).toContain('ming_cheng') // Final result should be ASCII-only (lowercase) - expect(result).toMatch(/^[a-z_][a-z0-9_.\-:]*$/) + expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/) }) it('should transliterate Chinese and replace special symbols', () => { @@ -270,7 +270,7 @@ describe('buildFunctionCallToolName', () => { expect(result).toContain('shang_chuan') expect(result).toContain('gong_ju') // Should be ASCII-only (lowercase) - expect(result).toMatch(/^[a-z_][a-z0-9_.\-:]*$/) + expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/) }) it('should produce AI model compatible tool names', () => { @@ -279,9 +279,9 @@ describe('buildFunctionCallToolName', () => { testCases.forEach((testCase) => { const result = buildFunctionCallToolName('server', testCase) // Must start with letter or underscore - expect(result).toMatch(/^[a-zA-Z_]/) - // Must only contain a-z, A-Z, 0-9, _, -, ., : - expect(result).toMatch(/^[a-zA-Z0-9_.\-:]+$/) + expect(result).toMatch(/^[a-z_]/) + // Must only contain a-z, 0-9, _, - + expect(result).toMatch(/^[a-z0-9_-]+$/) // Must be <= 64 characters expect(result.length).toBeLessThanOrEqual(64) }) diff --git a/src/main/utils/mcp.ts b/src/main/utils/mcp.ts index 60f21037a..12b58e6b6 100644 --- a/src/main/utils/mcp.ts +++ b/src/main/utils/mcp.ts @@ -1,5 +1,8 @@ +import { loggerService } from '@logger' import { transliterate } from 'transliteration' +const logger = loggerService.withContext('Utils:MCP') + /** * Transliterates non-ASCII text (including CJK characters) to ASCII-compatible format. * @@ -8,31 +11,58 @@ import { transliterate } from 'transliteration' * * @param text - The input string to transliterate, may contain Unicode characters including CJK * @returns A lowercase ASCII string with spaces converted to underscores and special characters removed, - * preserving only alphanumeric characters, underscores, dots, hyphens, and colons + * preserving only alphanumeric characters, underscores, and hyphens * * @example * ```typescript * transliterateToAscii("Hello World") // returns "hello_world" * transliterateToAscii("你好世界") // returns transliterated version with underscores - * transliterateToAscii("Café-123") // returns "cafe_123" + * transliterateToAscii("Café-123") // returns "cafe-123" * ``` */ function transliterateToAscii(text: string): string { - // Use transliteration library which supports CJK (Chinese, Japanese, Korean) - const result = transliterate(text, { - // Unknown/special characters become underscores - unknown: '_', - ignore: [] - }) + // Input validation + if (!text || typeof text !== 'string') { + logger.warn('Invalid input to transliterateToAscii', { text }) + return 'invalid_input' + } - // Convert to lowercase, remove spaces, and clean up special chars - return result - .toLowerCase() - .replace(/\s+/g, '_') - .replace(/[^a-z0-9_.\-:]/g, '_') + try { + // Use transliteration library which supports CJK (Chinese, Japanese, Korean) + const result = transliterate(text, { + // Unknown/special characters become underscores + unknown: '_', + ignore: [] + }) + + logger.debug('Transliteration successful', { input: text, output: result }) + + // Convert to lowercase, remove spaces, and clean up special chars + // Only preserve a-z, 0-9, underscores, and hyphens (OpenAI/Anthropic API compatible) + return result + .toLowerCase() + .replace(/\s+/g, '_') + .replace(/[^a-z0-9_-]/g, '_') + } catch (error) { + logger.error('Transliteration failed, falling back to ASCII-only mode', { text, error }) + // Fallback: keep only ASCII alphanumeric, underscores, and hyphens for consistency + return text + .toLowerCase() + .replace(/[^a-z0-9_-]/g, '_') + } } export function buildFunctionCallToolName(serverName: string, toolName: string, serverId?: string) { + // Input validation with descriptive fallbacks to indicate invalid input + if (!serverName || typeof serverName !== 'string') { + logger.warn('Invalid serverName provided', { serverName }) + serverName = 'invalid_server' + } + if (!toolName || typeof toolName !== 'string') { + logger.warn('Invalid toolName provided', { toolName }) + toolName = 'invalid_tool' + } + // First, transliterate non-ASCII characters to ASCII const transliteratedServer = transliterateToAscii(serverName.trim()) const transliteratedTool = transliterateToAscii(toolName.trim()) @@ -65,11 +95,11 @@ export function buildFunctionCallToolName(serverName: string, toolName: string, } // Replace invalid characters with underscores - // Keep only a-z, 0-9, underscores, dashes, dots, colons (AI model compatible) - name = name.replace(/[^a-z0-9_.\-:]/g, '_') + // Keep only a-z, 0-9, underscores, dashes (OpenAI/Anthropic API compatible) + name = name.replace(/[^a-z0-9_-]/g, '_') // Ensure name starts with a letter or underscore (AI model requirement) - if (!/^[a-zA-Z_]/.test(name)) { + if (!/^[a-z_]/.test(name)) { name = `tool_${name}` } From 07ecdd8f2693dba2e63fab2e11dac28bf22e2c0a Mon Sep 17 00:00:00 2001 From: suyao Date: Sun, 14 Dec 2025 00:26:02 +0800 Subject: [PATCH 9/9] chore: format --- src/main/utils/mcp.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/utils/mcp.ts b/src/main/utils/mcp.ts index 12b58e6b6..8f3260b5a 100644 --- a/src/main/utils/mcp.ts +++ b/src/main/utils/mcp.ts @@ -46,9 +46,7 @@ function transliterateToAscii(text: string): string { } catch (error) { logger.error('Transliteration failed, falling back to ASCII-only mode', { text, error }) // Fallback: keep only ASCII alphanumeric, underscores, and hyphens for consistency - return text - .toLowerCase() - .replace(/[^a-z0-9_-]/g, '_') + return text.toLowerCase().replace(/[^a-z0-9_-]/g, '_') } }