mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-19 14:41:24 +08:00
Merge 07ecdd8f26 into a6ba5d34e0
This commit is contained in:
commit
e5d2955a09
@ -364,6 +364,7 @@
|
|||||||
"tar": "^7.4.3",
|
"tar": "^7.4.3",
|
||||||
"tiny-pinyin": "^1.3.2",
|
"tiny-pinyin": "^1.3.2",
|
||||||
"tokenx": "^1.1.0",
|
"tokenx": "^1.1.0",
|
||||||
|
"transliteration": "^2.3.5",
|
||||||
"tsx": "^4.20.3",
|
"tsx": "^4.20.3",
|
||||||
"turndown-plugin-gfm": "^1.0.2",
|
"turndown-plugin-gfm": "^1.0.2",
|
||||||
"tw-animate-css": "^1.3.8",
|
"tw-animate-css": "^1.3.8",
|
||||||
|
|||||||
@ -61,12 +61,13 @@ describe('buildFunctionCallToolName', () => {
|
|||||||
it('should replace invalid characters with underscores', () => {
|
it('should replace invalid characters with underscores', () => {
|
||||||
const result = buildFunctionCallToolName('test@server', 'tool#name')
|
const result = buildFunctionCallToolName('test@server', 'tool#name')
|
||||||
expect(result).not.toMatch(/[@#]/)
|
expect(result).not.toMatch(/[@#]/)
|
||||||
expect(result).toMatch(/^[a-zA-Z0-9_-]+$/)
|
// Should only contain ASCII alphanumeric, underscore, dash, dot, colon
|
||||||
|
expect(result).toMatch(/^[a-zA-Z0-9_.\-:]+$/)
|
||||||
})
|
})
|
||||||
|
|
||||||
it('should ensure name starts with a letter', () => {
|
it('should ensure name starts with a letter or underscore', () => {
|
||||||
const result = buildFunctionCallToolName('123server', '456tool')
|
const result = buildFunctionCallToolName('123server', '456tool')
|
||||||
expect(result).toMatch(/^[a-zA-Z]/)
|
expect(result).toMatch(/^[a-zA-Z_]/)
|
||||||
})
|
})
|
||||||
|
|
||||||
it('should handle consecutive underscores/dashes', () => {
|
it('should handle consecutive underscores/dashes', () => {
|
||||||
@ -130,7 +131,7 @@ describe('buildFunctionCallToolName', () => {
|
|||||||
// Should still produce a valid unique suffix via fallback hash
|
// Should still produce a valid unique suffix via fallback hash
|
||||||
expect(result).toBeTruthy()
|
expect(result).toBeTruthy()
|
||||||
expect(result.length).toBeLessThanOrEqual(63)
|
expect(result.length).toBeLessThanOrEqual(63)
|
||||||
expect(result).toMatch(/^[a-zA-Z][a-zA-Z0-9_-]*$/)
|
expect(result).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/)
|
||||||
// Should have a suffix (underscore followed by something)
|
// Should have a suffix (underscore followed by something)
|
||||||
expect(result).toMatch(/_[a-z0-9]+$/)
|
expect(result).toMatch(/_[a-z0-9]+$/)
|
||||||
})
|
})
|
||||||
@ -177,9 +178,9 @@ describe('buildFunctionCallToolName', () => {
|
|||||||
// Should be different
|
// Should be different
|
||||||
expect(tool1).not.toBe(tool2)
|
expect(tool1).not.toBe(tool2)
|
||||||
|
|
||||||
// Both should be valid identifiers
|
// Both should be valid AI model tool names (ASCII only)
|
||||||
expect(tool1).toMatch(/^[a-zA-Z][a-zA-Z0-9_-]*$/)
|
expect(tool1).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/)
|
||||||
expect(tool2).toMatch(/^[a-zA-Z][a-zA-Z0-9_-]*$/)
|
expect(tool2).toMatch(/^[a-zA-Z_][a-zA-Z0-9_.\-:]*$/)
|
||||||
|
|
||||||
// Both should be <= 63 chars
|
// Both should be <= 63 chars
|
||||||
expect(tool1.length).toBeLessThanOrEqual(63)
|
expect(tool1.length).toBeLessThanOrEqual(63)
|
||||||
@ -193,4 +194,97 @@ describe('buildFunctionCallToolName', () => {
|
|||||||
expect(result.split('github').length - 1).toBeLessThanOrEqual(2)
|
expect(result.split('github').length - 1).toBeLessThanOrEqual(2)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe('internationalization support (CJK to ASCII transliteration)', () => {
|
||||||
|
it('should convert Chinese characters to pinyin', () => {
|
||||||
|
const result = buildFunctionCallToolName('ocr', '行驶证OCR_轻盈版')
|
||||||
|
// Chinese characters should be transliterated to pinyin
|
||||||
|
expect(result).not.toMatch(/[\u4e00-\u9fff]/) // No Chinese characters
|
||||||
|
expect(result).toContain('ocr') // OCR is lowercased
|
||||||
|
// Should only contain ASCII characters (lowercase)
|
||||||
|
expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should distinguish between different Chinese OCR tools', () => {
|
||||||
|
const tools = [
|
||||||
|
buildFunctionCallToolName('ocr', '行驶证OCR_轻盈版'),
|
||||||
|
buildFunctionCallToolName('ocr', '营业执照OCR_轻盈版'),
|
||||||
|
buildFunctionCallToolName('ocr', '车牌OCR_轻盈版'),
|
||||||
|
buildFunctionCallToolName('ocr', '身份证OCR')
|
||||||
|
]
|
||||||
|
|
||||||
|
// All tools should be unique (pinyin transliterations are different)
|
||||||
|
const uniqueTools = new Set(tools)
|
||||||
|
expect(uniqueTools.size).toBe(4)
|
||||||
|
|
||||||
|
// All should be ASCII-only valid tool names
|
||||||
|
tools.forEach((tool) => {
|
||||||
|
expect(tool).toMatch(/^[a-z_][a-z0-9_-]*$/)
|
||||||
|
expect(tool).not.toMatch(/[\u4e00-\u9fff]/) // No Chinese characters
|
||||||
|
})
|
||||||
|
|
||||||
|
// Verify they contain transliterated pinyin (with underscores between characters)
|
||||||
|
// 行驶证 = xing_shi_zheng, 营业执照 = ying_ye_zhi_zhao, 车牌 = che_pai, 身份证 = shen_fen_zheng
|
||||||
|
expect(tools[0]).toContain('xing_shi_zheng')
|
||||||
|
expect(tools[1]).toContain('ying_ye_zhi_zhao')
|
||||||
|
expect(tools[2]).toContain('che_pai')
|
||||||
|
expect(tools[3]).toContain('shen_fen_zheng')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle Japanese characters with Romaji transliteration', () => {
|
||||||
|
const result = buildFunctionCallToolName('server', 'ユーザー検索')
|
||||||
|
// Should be ASCII-only (Japanese characters are transliterated to Romaji)
|
||||||
|
expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
|
||||||
|
// Should not contain original Japanese characters
|
||||||
|
expect(result).not.toMatch(/[\u3040-\u309f\u30a0-\u30ff]/)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle Korean characters with romanization', () => {
|
||||||
|
const result = buildFunctionCallToolName('server', '사용자검색')
|
||||||
|
// Should be ASCII-only
|
||||||
|
expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
|
||||||
|
// Should not contain original Korean characters
|
||||||
|
expect(result).not.toMatch(/[\uac00-\ud7af]/)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle mixed language tool names', () => {
|
||||||
|
const result = buildFunctionCallToolName('api', 'search用户by名称')
|
||||||
|
// ASCII parts should be preserved (lowercased)
|
||||||
|
expect(result).toContain('search')
|
||||||
|
expect(result).toContain('by')
|
||||||
|
// Chinese parts should be transliterated (用户 = yong_hu, 名称 = ming_cheng)
|
||||||
|
expect(result).toContain('yong_hu')
|
||||||
|
expect(result).toContain('ming_cheng')
|
||||||
|
// Final result should be ASCII-only (lowercase)
|
||||||
|
expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should transliterate Chinese and replace special symbols', () => {
|
||||||
|
const result = buildFunctionCallToolName('test', '文件@上传#工具')
|
||||||
|
// @ and # should be replaced with underscores
|
||||||
|
expect(result).not.toContain('@')
|
||||||
|
expect(result).not.toContain('#')
|
||||||
|
// Chinese characters should be transliterated
|
||||||
|
// 文件 = wen_jian, 上传 = shang_chuan, 工具 = gong_ju
|
||||||
|
expect(result).toContain('wen_jian')
|
||||||
|
expect(result).toContain('shang_chuan')
|
||||||
|
expect(result).toContain('gong_ju')
|
||||||
|
// Should be ASCII-only (lowercase)
|
||||||
|
expect(result).toMatch(/^[a-z_][a-z0-9_-]*$/)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should produce AI model compatible tool names', () => {
|
||||||
|
const testCases = ['行驶证OCR', '营业执照识别', 'get用户info', '文件@处理', '数据分析_v2']
|
||||||
|
|
||||||
|
testCases.forEach((testCase) => {
|
||||||
|
const result = buildFunctionCallToolName('server', testCase)
|
||||||
|
// Must start with letter or underscore
|
||||||
|
expect(result).toMatch(/^[a-z_]/)
|
||||||
|
// Must only contain a-z, 0-9, _, -
|
||||||
|
expect(result).toMatch(/^[a-z0-9_-]+$/)
|
||||||
|
// Must be <= 64 characters
|
||||||
|
expect(result.length).toBeLessThanOrEqual(64)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@ -1,6 +1,72 @@
|
|||||||
|
import { loggerService } from '@logger'
|
||||||
|
import { transliterate } from 'transliteration'
|
||||||
|
|
||||||
|
const logger = loggerService.withContext('Utils:MCP')
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transliterates non-ASCII text (including CJK characters) to ASCII-compatible format.
|
||||||
|
*
|
||||||
|
* Converts input text to lowercase ASCII representation, replacing spaces with underscores
|
||||||
|
* and removing special characters. Unknown or special characters are replaced with underscores.
|
||||||
|
*
|
||||||
|
* @param text - The input string to transliterate, may contain Unicode characters including CJK
|
||||||
|
* @returns A lowercase ASCII string with spaces converted to underscores and special characters removed,
|
||||||
|
* preserving only alphanumeric characters, underscores, and hyphens
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* ```typescript
|
||||||
|
* transliterateToAscii("Hello World") // returns "hello_world"
|
||||||
|
* transliterateToAscii("你好世界") // returns transliterated version with underscores
|
||||||
|
* transliterateToAscii("Café-123") // returns "cafe-123"
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
function transliterateToAscii(text: string): string {
|
||||||
|
// Input validation
|
||||||
|
if (!text || typeof text !== 'string') {
|
||||||
|
logger.warn('Invalid input to transliterateToAscii', { text })
|
||||||
|
return 'invalid_input'
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Use transliteration library which supports CJK (Chinese, Japanese, Korean)
|
||||||
|
const result = transliterate(text, {
|
||||||
|
// Unknown/special characters become underscores
|
||||||
|
unknown: '_',
|
||||||
|
ignore: []
|
||||||
|
})
|
||||||
|
|
||||||
|
logger.debug('Transliteration successful', { input: text, output: result })
|
||||||
|
|
||||||
|
// Convert to lowercase, remove spaces, and clean up special chars
|
||||||
|
// Only preserve a-z, 0-9, underscores, and hyphens (OpenAI/Anthropic API compatible)
|
||||||
|
return result
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/\s+/g, '_')
|
||||||
|
.replace(/[^a-z0-9_-]/g, '_')
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Transliteration failed, falling back to ASCII-only mode', { text, error })
|
||||||
|
// Fallback: keep only ASCII alphanumeric, underscores, and hyphens for consistency
|
||||||
|
return text.toLowerCase().replace(/[^a-z0-9_-]/g, '_')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export function buildFunctionCallToolName(serverName: string, toolName: string, serverId?: string) {
|
export function buildFunctionCallToolName(serverName: string, toolName: string, serverId?: string) {
|
||||||
const sanitizedServer = serverName.trim().replace(/-/g, '_')
|
// Input validation with descriptive fallbacks to indicate invalid input
|
||||||
const sanitizedTool = toolName.trim().replace(/-/g, '_')
|
if (!serverName || typeof serverName !== 'string') {
|
||||||
|
logger.warn('Invalid serverName provided', { serverName })
|
||||||
|
serverName = 'invalid_server'
|
||||||
|
}
|
||||||
|
if (!toolName || typeof toolName !== 'string') {
|
||||||
|
logger.warn('Invalid toolName provided', { toolName })
|
||||||
|
toolName = 'invalid_tool'
|
||||||
|
}
|
||||||
|
|
||||||
|
// First, transliterate non-ASCII characters to ASCII
|
||||||
|
const transliteratedServer = transliterateToAscii(serverName.trim())
|
||||||
|
const transliteratedTool = transliterateToAscii(toolName.trim())
|
||||||
|
|
||||||
|
const sanitizedServer = transliteratedServer.replace(/-/g, '_')
|
||||||
|
const sanitizedTool = transliteratedTool.replace(/-/g, '_')
|
||||||
|
|
||||||
// Calculate suffix first to reserve space for it
|
// Calculate suffix first to reserve space for it
|
||||||
// Suffix format: "_" + 6 alphanumeric chars = 7 chars total
|
// Suffix format: "_" + 6 alphanumeric chars = 7 chars total
|
||||||
@ -26,13 +92,13 @@ export function buildFunctionCallToolName(serverName: string, toolName: string,
|
|||||||
name = `${sanitizedServer.slice(0, 7) || ''}-${sanitizedTool || ''}`
|
name = `${sanitizedServer.slice(0, 7) || ''}-${sanitizedTool || ''}`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Replace invalid characters with underscores or dashes
|
// Replace invalid characters with underscores
|
||||||
// Keep a-z, A-Z, 0-9, underscores and dashes
|
// Keep only a-z, 0-9, underscores, dashes (OpenAI/Anthropic API compatible)
|
||||||
name = name.replace(/[^a-zA-Z0-9_-]/g, '_')
|
name = name.replace(/[^a-z0-9_-]/g, '_')
|
||||||
|
|
||||||
// Ensure name starts with a letter or underscore (for valid JavaScript identifier)
|
// Ensure name starts with a letter or underscore (AI model requirement)
|
||||||
if (!/^[a-zA-Z]/.test(name)) {
|
if (!/^[a-z_]/.test(name)) {
|
||||||
name = `tool-${name}`
|
name = `tool_${name}`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove consecutive underscores/dashes (optional improvement)
|
// Remove consecutive underscores/dashes (optional improvement)
|
||||||
|
|||||||
13
yarn.lock
13
yarn.lock
@ -10336,6 +10336,7 @@ __metadata:
|
|||||||
tesseract.js: "patch:tesseract.js@npm%3A6.0.1#~/.yarn/patches/tesseract.js-npm-6.0.1-2562a7e46d.patch"
|
tesseract.js: "patch:tesseract.js@npm%3A6.0.1#~/.yarn/patches/tesseract.js-npm-6.0.1-2562a7e46d.patch"
|
||||||
tiny-pinyin: "npm:^1.3.2"
|
tiny-pinyin: "npm:^1.3.2"
|
||||||
tokenx: "npm:^1.1.0"
|
tokenx: "npm:^1.1.0"
|
||||||
|
transliteration: "npm:^2.3.5"
|
||||||
tsx: "npm:^4.20.3"
|
tsx: "npm:^4.20.3"
|
||||||
turndown: "npm:7.2.0"
|
turndown: "npm:7.2.0"
|
||||||
turndown-plugin-gfm: "npm:^1.0.2"
|
turndown-plugin-gfm: "npm:^1.0.2"
|
||||||
@ -24737,6 +24738,18 @@ __metadata:
|
|||||||
languageName: node
|
languageName: node
|
||||||
linkType: hard
|
linkType: hard
|
||||||
|
|
||||||
|
"transliteration@npm:^2.3.5":
|
||||||
|
version: 2.3.5
|
||||||
|
resolution: "transliteration@npm:2.3.5"
|
||||||
|
dependencies:
|
||||||
|
yargs: "npm:^17.5.1"
|
||||||
|
bin:
|
||||||
|
slugify: dist/bin/slugify
|
||||||
|
transliterate: dist/bin/transliterate
|
||||||
|
checksum: 10c0/68397225c2ca59b8e33206c65f905724e86b64460cbf90576d352dc2366e763ded97e2c7b8b1f140fb36a565d61a97c51080df9fa638e6b1769f6cb24f383756
|
||||||
|
languageName: node
|
||||||
|
linkType: hard
|
||||||
|
|
||||||
"tree-kill@npm:1.2.2, tree-kill@npm:^1.2.2":
|
"tree-kill@npm:1.2.2, tree-kill@npm:^1.2.2":
|
||||||
version: 1.2.2
|
version: 1.2.2
|
||||||
resolution: "tree-kill@npm:1.2.2"
|
resolution: "tree-kill@npm:1.2.2"
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user