mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-19 14:41:24 +08:00
fix: Remove non-compliant characters from MCP tool names for API compatibility (#11903)
* Initial plan * fix: address PR review feedback - remove dots/colons, add error handling and logging Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com> * refactor: improve error handling consistency and default values Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com>
This commit is contained in:
parent
2cac432512
commit
5242cf0c9d
@ -202,7 +202,7 @@ describe('buildFunctionCallToolName', () => {
|
||||
expect(result).not.toMatch(/[\u4e00-\u9fff]/) // No Chinese characters
|
||||
expect(result).toContain('ocr') // OCR is lowercased
|
||||
// Should only contain ASCII characters (lowercase)
|
||||
expect(result).toMatch(/^[a-z_][a-z0-9_.\-:]*$/)
|
||||
expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
|
||||
})
|
||||
|
||||
it('should distinguish between different Chinese OCR tools', () => {
|
||||
@ -219,7 +219,7 @@ describe('buildFunctionCallToolName', () => {
|
||||
|
||||
// All should be ASCII-only valid tool names
|
||||
tools.forEach((tool) => {
|
||||
expect(tool).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/)
|
||||
expect(tool).toMatch(/^[a-z_][a-z0-9_-]*$/)
|
||||
expect(tool).not.toMatch(/[\u4e00-\u9fff]/) // No Chinese characters
|
||||
})
|
||||
|
||||
@ -234,7 +234,7 @@ describe('buildFunctionCallToolName', () => {
|
||||
it('should handle Japanese characters with Romaji transliteration', () => {
|
||||
const result = buildFunctionCallToolName('server', 'ユーザー検索')
|
||||
// Should be ASCII-only (Japanese characters are transliterated to Romaji)
|
||||
expect(result).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/)
|
||||
expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
|
||||
// Should not contain original Japanese characters
|
||||
expect(result).not.toMatch(/[\u3040-\u309f\u30a0-\u30ff]/)
|
||||
})
|
||||
@ -242,7 +242,7 @@ describe('buildFunctionCallToolName', () => {
|
||||
it('should handle Korean characters with romanization', () => {
|
||||
const result = buildFunctionCallToolName('server', '사용자검색')
|
||||
// Should be ASCII-only
|
||||
expect(result).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/)
|
||||
expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
|
||||
// Should not contain original Korean characters
|
||||
expect(result).not.toMatch(/[\uac00-\ud7af]/)
|
||||
})
|
||||
@ -256,7 +256,7 @@ describe('buildFunctionCallToolName', () => {
|
||||
expect(result).toContain('yong_hu')
|
||||
expect(result).toContain('ming_cheng')
|
||||
// Final result should be ASCII-only (lowercase)
|
||||
expect(result).toMatch(/^[a-z_][a-z0-9_.\-:]*$/)
|
||||
expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
|
||||
})
|
||||
|
||||
it('should transliterate Chinese and replace special symbols', () => {
|
||||
@ -270,7 +270,7 @@ describe('buildFunctionCallToolName', () => {
|
||||
expect(result).toContain('shang_chuan')
|
||||
expect(result).toContain('gong_ju')
|
||||
// Should be ASCII-only (lowercase)
|
||||
expect(result).toMatch(/^[a-z_][a-z0-9_.\-:]*$/)
|
||||
expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
|
||||
})
|
||||
|
||||
it('should produce AI model compatible tool names', () => {
|
||||
@ -279,9 +279,9 @@ describe('buildFunctionCallToolName', () => {
|
||||
testCases.forEach((testCase) => {
|
||||
const result = buildFunctionCallToolName('server', testCase)
|
||||
// Must start with letter or underscore
|
||||
expect(result).toMatch(/^[a-zA-Z_]/)
|
||||
// Must only contain a-z, A-Z, 0-9, _, -, ., :
|
||||
expect(result).toMatch(/^[a-zA-Z0-9_.\-:]+$/)
|
||||
expect(result).toMatch(/^[a-z_]/)
|
||||
// Must only contain a-z, 0-9, _, -
|
||||
expect(result).toMatch(/^[a-z0-9_-]+$/)
|
||||
// Must be <= 64 characters
|
||||
expect(result.length).toBeLessThanOrEqual(64)
|
||||
})
|
||||
|
||||
@ -1,5 +1,8 @@
|
||||
import { loggerService } from '@logger'
|
||||
import { transliterate } from 'transliteration'
|
||||
|
||||
const logger = loggerService.withContext('Utils:MCP')
|
||||
|
||||
/**
|
||||
* Transliterates non-ASCII text (including CJK characters) to ASCII-compatible format.
|
||||
*
|
||||
@ -8,31 +11,58 @@ import { transliterate } from 'transliteration'
|
||||
*
|
||||
* @param text - The input string to transliterate, may contain Unicode characters including CJK
|
||||
* @returns A lowercase ASCII string with spaces converted to underscores and special characters removed,
|
||||
* preserving only alphanumeric characters, underscores, dots, hyphens, and colons
|
||||
* preserving only alphanumeric characters, underscores, and hyphens
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* transliterateToAscii("Hello World") // returns "hello_world"
|
||||
* transliterateToAscii("你好世界") // returns transliterated version with underscores
|
||||
* transliterateToAscii("Café-123") // returns "cafe_123"
|
||||
* transliterateToAscii("Café-123") // returns "cafe-123"
|
||||
* ```
|
||||
*/
|
||||
function transliterateToAscii(text: string): string {
|
||||
// Use transliteration library which supports CJK (Chinese, Japanese, Korean)
|
||||
const result = transliterate(text, {
|
||||
// Unknown/special characters become underscores
|
||||
unknown: '_',
|
||||
ignore: []
|
||||
})
|
||||
// Input validation
|
||||
if (!text || typeof text !== 'string') {
|
||||
logger.warn('Invalid input to transliterateToAscii', { text })
|
||||
return 'invalid_input'
|
||||
}
|
||||
|
||||
// Convert to lowercase, remove spaces, and clean up special chars
|
||||
return result
|
||||
.toLowerCase()
|
||||
.replace(/\s+/g, '_')
|
||||
.replace(/[^a-z0-9_.\-:]/g, '_')
|
||||
try {
|
||||
// Use transliteration library which supports CJK (Chinese, Japanese, Korean)
|
||||
const result = transliterate(text, {
|
||||
// Unknown/special characters become underscores
|
||||
unknown: '_',
|
||||
ignore: []
|
||||
})
|
||||
|
||||
logger.debug('Transliteration successful', { input: text, output: result })
|
||||
|
||||
// Convert to lowercase, remove spaces, and clean up special chars
|
||||
// Only preserve a-z, 0-9, underscores, and hyphens (OpenAI/Anthropic API compatible)
|
||||
return result
|
||||
.toLowerCase()
|
||||
.replace(/\s+/g, '_')
|
||||
.replace(/[^a-z0-9_-]/g, '_')
|
||||
} catch (error) {
|
||||
logger.error('Transliteration failed, falling back to ASCII-only mode', { text, error })
|
||||
// Fallback: keep only ASCII alphanumeric, underscores, and hyphens for consistency
|
||||
return text
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9_-]/g, '_')
|
||||
}
|
||||
}
|
||||
|
||||
export function buildFunctionCallToolName(serverName: string, toolName: string, serverId?: string) {
|
||||
// Input validation with descriptive fallbacks to indicate invalid input
|
||||
if (!serverName || typeof serverName !== 'string') {
|
||||
logger.warn('Invalid serverName provided', { serverName })
|
||||
serverName = 'invalid_server'
|
||||
}
|
||||
if (!toolName || typeof toolName !== 'string') {
|
||||
logger.warn('Invalid toolName provided', { toolName })
|
||||
toolName = 'invalid_tool'
|
||||
}
|
||||
|
||||
// First, transliterate non-ASCII characters to ASCII
|
||||
const transliteratedServer = transliterateToAscii(serverName.trim())
|
||||
const transliteratedTool = transliterateToAscii(toolName.trim())
|
||||
@ -65,11 +95,11 @@ export function buildFunctionCallToolName(serverName: string, toolName: string,
|
||||
}
|
||||
|
||||
// Replace invalid characters with underscores
|
||||
// Keep only a-z, 0-9, underscores, dashes, dots, colons (AI model compatible)
|
||||
name = name.replace(/[^a-z0-9_.\-:]/g, '_')
|
||||
// Keep only a-z, 0-9, underscores, dashes (OpenAI/Anthropic API compatible)
|
||||
name = name.replace(/[^a-z0-9_-]/g, '_')
|
||||
|
||||
// Ensure name starts with a letter or underscore (AI model requirement)
|
||||
if (!/^[a-zA-Z_]/.test(name)) {
|
||||
if (!/^[a-z_]/.test(name)) {
|
||||
name = `tool_${name}`
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user