fix: Remove non-compliant characters from MCP tool names for API compatibility (#11903)

* Initial plan

* fix: address PR review feedback - remove dots/colons, add error handling and logging

Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com>

* refactor: improve error handling consistency and default values

Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com>
This commit is contained in:
Copilot 2025-12-14 00:22:08 +08:00 committed by GitHub
parent 2cac432512
commit 5242cf0c9d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 55 additions and 25 deletions

View File

@ -202,7 +202,7 @@ describe('buildFunctionCallToolName', () => {
expect(result).not.toMatch(/[\u4e00-\u9fff]/) // No Chinese characters expect(result).not.toMatch(/[\u4e00-\u9fff]/) // No Chinese characters
expect(result).toContain('ocr') // OCR is lowercased expect(result).toContain('ocr') // OCR is lowercased
// Should only contain ASCII characters (lowercase) // Should only contain ASCII characters (lowercase)
expect(result).toMatch(/^[a-z_][a-z0-9_.\-:]*$/) expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
}) })
it('should distinguish between different Chinese OCR tools', () => { it('should distinguish between different Chinese OCR tools', () => {
@ -219,7 +219,7 @@ describe('buildFunctionCallToolName', () => {
// All should be ASCII-only valid tool names // All should be ASCII-only valid tool names
tools.forEach((tool) => { tools.forEach((tool) => {
expect(tool).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/) expect(tool).toMatch(/^[a-z_][a-z0-9_-]*$/)
expect(tool).not.toMatch(/[\u4e00-\u9fff]/) // No Chinese characters expect(tool).not.toMatch(/[\u4e00-\u9fff]/) // No Chinese characters
}) })
@ -234,7 +234,7 @@ describe('buildFunctionCallToolName', () => {
it('should handle Japanese characters with Romaji transliteration', () => { it('should handle Japanese characters with Romaji transliteration', () => {
const result = buildFunctionCallToolName('server', 'ユーザー検索') const result = buildFunctionCallToolName('server', 'ユーザー検索')
// Should be ASCII-only (Japanese characters are transliterated to Romaji) // Should be ASCII-only (Japanese characters are transliterated to Romaji)
expect(result).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/) expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
// Should not contain original Japanese characters // Should not contain original Japanese characters
expect(result).not.toMatch(/[\u3040-\u309f\u30a0-\u30ff]/) expect(result).not.toMatch(/[\u3040-\u309f\u30a0-\u30ff]/)
}) })
@ -242,7 +242,7 @@ describe('buildFunctionCallToolName', () => {
it('should handle Korean characters with romanization', () => { it('should handle Korean characters with romanization', () => {
const result = buildFunctionCallToolName('server', '사용자검색') const result = buildFunctionCallToolName('server', '사용자검색')
// Should be ASCII-only // Should be ASCII-only
expect(result).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/) expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
// Should not contain original Korean characters // Should not contain original Korean characters
expect(result).not.toMatch(/[\uac00-\ud7af]/) expect(result).not.toMatch(/[\uac00-\ud7af]/)
}) })
@ -256,7 +256,7 @@ describe('buildFunctionCallToolName', () => {
expect(result).toContain('yong_hu') expect(result).toContain('yong_hu')
expect(result).toContain('ming_cheng') expect(result).toContain('ming_cheng')
// Final result should be ASCII-only (lowercase) // Final result should be ASCII-only (lowercase)
expect(result).toMatch(/^[a-z_][a-z0-9_.\-:]*$/) expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
}) })
it('should transliterate Chinese and replace special symbols', () => { it('should transliterate Chinese and replace special symbols', () => {
@ -270,7 +270,7 @@ describe('buildFunctionCallToolName', () => {
expect(result).toContain('shang_chuan') expect(result).toContain('shang_chuan')
expect(result).toContain('gong_ju') expect(result).toContain('gong_ju')
// Should be ASCII-only (lowercase) // Should be ASCII-only (lowercase)
expect(result).toMatch(/^[a-z_][a-z0-9_.\-:]*$/) expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
}) })
it('should produce AI model compatible tool names', () => { it('should produce AI model compatible tool names', () => {
@ -279,9 +279,9 @@ describe('buildFunctionCallToolName', () => {
testCases.forEach((testCase) => { testCases.forEach((testCase) => {
const result = buildFunctionCallToolName('server', testCase) const result = buildFunctionCallToolName('server', testCase)
// Must start with letter or underscore // Must start with letter or underscore
expect(result).toMatch(/^[a-zA-Z_]/) expect(result).toMatch(/^[a-z_]/)
// Must only contain a-z, A-Z, 0-9, _, -, ., : // Must only contain a-z, 0-9, _, -
expect(result).toMatch(/^[a-zA-Z0-9_.\-:]+$/) expect(result).toMatch(/^[a-z0-9_-]+$/)
// Must be <= 64 characters // Must be <= 64 characters
expect(result.length).toBeLessThanOrEqual(64) expect(result.length).toBeLessThanOrEqual(64)
}) })

View File

@ -1,5 +1,8 @@
import { loggerService } from '@logger'
import { transliterate } from 'transliteration' import { transliterate } from 'transliteration'
const logger = loggerService.withContext('Utils:MCP')
/** /**
* Transliterates non-ASCII text (including CJK characters) to ASCII-compatible format. * Transliterates non-ASCII text (including CJK characters) to ASCII-compatible format.
* *
@ -8,16 +11,23 @@ import { transliterate } from 'transliteration'
* *
* @param text - The input string to transliterate, may contain Unicode characters including CJK * @param text - The input string to transliterate, may contain Unicode characters including CJK
* @returns A lowercase ASCII string with spaces converted to underscores and special characters removed, * @returns A lowercase ASCII string with spaces converted to underscores and special characters removed,
* preserving only alphanumeric characters, underscores, dots, hyphens, and colons * preserving only alphanumeric characters, underscores, and hyphens
* *
* @example * @example
* ```typescript * ```typescript
* transliterateToAscii("Hello World") // returns "hello_world" * transliterateToAscii("Hello World") // returns "hello_world"
* transliterateToAscii("你好世界") // returns transliterated version with underscores * transliterateToAscii("你好世界") // returns transliterated version with underscores
* transliterateToAscii("Café-123") // returns "cafe_123" * transliterateToAscii("Café-123") // returns "cafe-123"
* ``` * ```
*/ */
function transliterateToAscii(text: string): string { function transliterateToAscii(text: string): string {
// Input validation
if (!text || typeof text !== 'string') {
logger.warn('Invalid input to transliterateToAscii', { text })
return 'invalid_input'
}
try {
// Use transliteration library which supports CJK (Chinese, Japanese, Korean) // Use transliteration library which supports CJK (Chinese, Japanese, Korean)
const result = transliterate(text, { const result = transliterate(text, {
// Unknown/special characters become underscores // Unknown/special characters become underscores
@ -25,14 +35,34 @@ function transliterateToAscii(text: string): string {
ignore: [] ignore: []
}) })
logger.debug('Transliteration successful', { input: text, output: result })
// Convert to lowercase, remove spaces, and clean up special chars // Convert to lowercase, remove spaces, and clean up special chars
// Only preserve a-z, 0-9, underscores, and hyphens (OpenAI/Anthropic API compatible)
return result return result
.toLowerCase() .toLowerCase()
.replace(/\s+/g, '_') .replace(/\s+/g, '_')
.replace(/[^a-z0-9_.\-:]/g, '_') .replace(/[^a-z0-9_-]/g, '_')
} catch (error) {
logger.error('Transliteration failed, falling back to ASCII-only mode', { text, error })
// Fallback: keep only ASCII alphanumeric, underscores, and hyphens for consistency
return text
.toLowerCase()
.replace(/[^a-z0-9_-]/g, '_')
}
} }
export function buildFunctionCallToolName(serverName: string, toolName: string, serverId?: string) { export function buildFunctionCallToolName(serverName: string, toolName: string, serverId?: string) {
// Input validation with descriptive fallbacks to indicate invalid input
if (!serverName || typeof serverName !== 'string') {
logger.warn('Invalid serverName provided', { serverName })
serverName = 'invalid_server'
}
if (!toolName || typeof toolName !== 'string') {
logger.warn('Invalid toolName provided', { toolName })
toolName = 'invalid_tool'
}
// First, transliterate non-ASCII characters to ASCII // First, transliterate non-ASCII characters to ASCII
const transliteratedServer = transliterateToAscii(serverName.trim()) const transliteratedServer = transliterateToAscii(serverName.trim())
const transliteratedTool = transliterateToAscii(toolName.trim()) const transliteratedTool = transliterateToAscii(toolName.trim())
@ -65,11 +95,11 @@ export function buildFunctionCallToolName(serverName: string, toolName: string,
} }
// Replace invalid characters with underscores // Replace invalid characters with underscores
// Keep only a-z, 0-9, underscores, dashes, dots, colons (AI model compatible) // Keep only a-z, 0-9, underscores, dashes (OpenAI/Anthropic API compatible)
name = name.replace(/[^a-z0-9_.\-:]/g, '_') name = name.replace(/[^a-z0-9_-]/g, '_')
// Ensure name starts with a letter or underscore (AI model requirement) // Ensure name starts with a letter or underscore (AI model requirement)
if (!/^[a-zA-Z_]/.test(name)) { if (!/^[a-z_]/.test(name)) {
name = `tool_${name}` name = `tool_${name}`
} }