fix: Remove non-compliant characters from MCP tool names for API compatibility (#11903)

* Initial plan * fix: address PR review feedback - remove dots/colons, add error handling and logging Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com> * refactor: improve error handling consistency and default values Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com>
2025-12-21 16:01:35 +08:00 · 2025-12-14 00:22:08 +08:00 · 2025-12-14 00:22:08 +08:00 · 5242cf0c9d
commit 5242cf0c9d
parent 2cac432512
2 changed files with 55 additions and 25 deletions
--- a/src/main/utils/tests/mcp.test.ts
+++ b/src/main/utils/tests/mcp.test.ts
@ -202,7 +202,7 @@ describe('buildFunctionCallToolName', () => {
      expect(result).not.toMatch(/[\u4e00-\u9fff]/) // No Chinese characters
      expect(result).toContain('ocr') // OCR is lowercased
      // Should only contain ASCII characters (lowercase)
-      expect(result).toMatch(/^[a-z_][a-z0-9_.\-:]*$/)
+      expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
    })
    it('should distinguish between different Chinese OCR tools', () => {
@ -219,7 +219,7 @@ describe('buildFunctionCallToolName', () => {
      // All should be ASCII-only valid tool names
      tools.forEach((tool) => {
-        expect(tool).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/)
+        expect(tool).toMatch(/^[a-z_][a-z0-9_-]*$/)
        expect(tool).not.toMatch(/[\u4e00-\u9fff]/) // No Chinese characters
      })
@ -234,7 +234,7 @@ describe('buildFunctionCallToolName', () => {
    it('should handle Japanese characters with Romaji transliteration', () => {
      const result = buildFunctionCallToolName('server', 'ユーザー検索')
      // Should be ASCII-only (Japanese characters are transliterated to Romaji)
-      expect(result).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/)
+      expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
      // Should not contain original Japanese characters
      expect(result).not.toMatch(/[\u3040-\u309f\u30a0-\u30ff]/)
    })
@ -242,7 +242,7 @@ describe('buildFunctionCallToolName', () => {
    it('should handle Korean characters with romanization', () => {
      const result = buildFunctionCallToolName('server', '사용자검색')
      // Should be ASCII-only
-      expect(result).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/)
+      expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
      // Should not contain original Korean characters
      expect(result).not.toMatch(/[\uac00-\ud7af]/)
    })
@ -256,7 +256,7 @@ describe('buildFunctionCallToolName', () => {
      expect(result).toContain('yong_hu')
      expect(result).toContain('ming_cheng')
      // Final result should be ASCII-only (lowercase)
-      expect(result).toMatch(/^[a-z_][a-z0-9_.\-:]*$/)
+      expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
    })
    it('should transliterate Chinese and replace special symbols', () => {
@ -270,7 +270,7 @@ describe('buildFunctionCallToolName', () => {
      expect(result).toContain('shang_chuan')
      expect(result).toContain('gong_ju')
      // Should be ASCII-only (lowercase)
-      expect(result).toMatch(/^[a-z_][a-z0-9_.\-:]*$/)
+      expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
    })
    it('should produce AI model compatible tool names', () => {
@ -279,9 +279,9 @@ describe('buildFunctionCallToolName', () => {
      testCases.forEach((testCase) => {
        const result = buildFunctionCallToolName('server', testCase)
        // Must start with letter or underscore
-        expect(result).toMatch(/^[a-zA-Z_]/)
+        expect(result).toMatch(/^[a-z_]/)
-        // Must only contain a-z, A-Z, 0-9, _, -, ., :
+        // Must only contain a-z, 0-9, _, -
-        expect(result).toMatch(/^[a-zA-Z0-9_.\-:]+$/)
+        expect(result).toMatch(/^[a-z0-9_-]+$/)
        // Must be <= 64 characters
        expect(result.length).toBeLessThanOrEqual(64)
      })
--- a/src/main/utils/mcp.ts
+++ b/src/main/utils/mcp.ts
@ -1,5 +1,8 @@
 import { loggerService } from '@logger'
 import { transliterate } from 'transliteration'
 const logger = loggerService.withContext('Utils:MCP')
 /**
 * Transliterates non-ASCII text (including CJK characters) to ASCII-compatible format.
 *
@ -8,16 +11,23 @@ import { transliterate } from 'transliteration'
 *
 * @param text - The input string to transliterate, may contain Unicode characters including CJK
 * @returns A lowercase ASCII string with spaces converted to underscores and special characters removed,
- *          preserving only alphanumeric characters, underscores, dots, hyphens, and colons
+ *          preserving only alphanumeric characters, underscores, and hyphens
 *
 * @example
 * ```typescript
 * transliterateToAscii("Hello World") // returns "hello_world"
 * transliterateToAscii("你好世界") // returns transliterated version with underscores
- * transliterateToAscii("Café-123") // returns "cafe_123"
+ * transliterateToAscii("Café-123") // returns "cafe-123"
 * ```
 */
 function transliterateToAscii(text: string): string {
  // Input validation
  if (!text || typeof text !== 'string') {
    logger.warn('Invalid input to transliterateToAscii', { text })
    return 'invalid_input'
  }
  try {
    // Use transliteration library which supports CJK (Chinese, Japanese, Korean)
    const result = transliterate(text, {
      // Unknown/special characters become underscores
@ -25,14 +35,34 @@ function transliterateToAscii(text: string): string {
      ignore: []
    })
    logger.debug('Transliteration successful', { input: text, output: result })
    // Convert to lowercase, remove spaces, and clean up special chars
    // Only preserve a-z, 0-9, underscores, and hyphens (OpenAI/Anthropic API compatible)
    return result
      .toLowerCase()
      .replace(/\s+/g, '_')
-    .replace(/[^a-z0-9_.\-:]/g, '_')
+      .replace(/[^a-z0-9_-]/g, '_')
  } catch (error) {
    logger.error('Transliteration failed, falling back to ASCII-only mode', { text, error })
    // Fallback: keep only ASCII alphanumeric, underscores, and hyphens for consistency
    return text
      .toLowerCase()
      .replace(/[^a-z0-9_-]/g, '_')
  }
 }
 export function buildFunctionCallToolName(serverName: string, toolName: string, serverId?: string) {
  // Input validation with descriptive fallbacks to indicate invalid input
  if (!serverName || typeof serverName !== 'string') {
    logger.warn('Invalid serverName provided', { serverName })
    serverName = 'invalid_server'
  }
  if (!toolName || typeof toolName !== 'string') {
    logger.warn('Invalid toolName provided', { toolName })
    toolName = 'invalid_tool'
  }
  // First, transliterate non-ASCII characters to ASCII
  const transliteratedServer = transliterateToAscii(serverName.trim())
  const transliteratedTool = transliterateToAscii(toolName.trim())
@ -65,11 +95,11 @@ export function buildFunctionCallToolName(serverName: string, toolName: string,
  }
  // Replace invalid characters with underscores
-  // Keep only a-z, 0-9, underscores, dashes, dots, colons (AI model compatible)
+  // Keep only a-z, 0-9, underscores, dashes (OpenAI/Anthropic API compatible)
-  name = name.replace(/[^a-z0-9_.\-:]/g, '_')
+  name = name.replace(/[^a-z0-9_-]/g, '_')
  // Ensure name starts with a letter or underscore (AI model requirement)
-  if (!/^[a-zA-Z_]/.test(name)) {
+  if (!/^[a-z_]/.test(name)) {
    name = `tool_${name}`
  }