diff --git a/package.json b/package.json index 08683e0f83..78951e0336 100644 --- a/package.json +++ b/package.json @@ -75,6 +75,7 @@ "@types/react-infinite-scroll-component": "^5.0.0", "archiver": "^7.0.1", "async-mutex": "^0.5.0", + "cld3-asm": "^4.0.0", "diff": "^7.0.0", "docx": "^9.0.2", "electron-log": "^5.1.5", @@ -83,7 +84,6 @@ "electron-window-state": "^5.0.3", "epub": "patch:epub@npm%3A1.3.0#~/.yarn/patches/epub-npm-1.3.0-8325494ffe.patch", "fast-xml-parser": "^5.2.0", - "franc": "^6.2.0", "fs-extra": "^11.2.0", "jsdom": "^26.0.0", "markdown-it": "^14.1.0", diff --git a/src/renderer/src/pages/translate/TranslatePage.tsx b/src/renderer/src/pages/translate/TranslatePage.tsx index 656c73b5bd..76f259a4fe 100644 --- a/src/renderer/src/pages/translate/TranslatePage.tsx +++ b/src/renderer/src/pages/translate/TranslatePage.tsx @@ -16,8 +16,7 @@ import { createInputScrollHandler, createOutputScrollHandler, detectLanguage, - getTargetLanguageForBidirectional, - isLanguageInPair + determineTargetLanguage } from '@renderer/utils/translate' import { Button, Empty, Flex, Modal, Popconfirm, Select, Space, Switch, Tooltip } from 'antd' import TextArea, { TextAreaRef } from 'antd/es/input/TextArea' @@ -290,30 +289,26 @@ const TranslatePage: FC = () => { setLoading(true) try { const sourceLanguage = await detectLanguage(text) - console.log('检测到的源语言:', sourceLanguage) + const result = determineTargetLanguage(sourceLanguage, targetLanguage, isBidirectional, bidirectionalPair) + if (!result.success) { + let errorMessage = '' + if (result.errorType === 'same_language') { + errorMessage = t('translate.language.same') + } else if (result.errorType === 'not_in_pair') { + errorMessage = t('translate.language.not_pair') + } - let actualTargetLanguage = targetLanguage + window.message.warning({ + content: errorMessage, + key: 'translate-message' + }) + setLoading(false) + return + } + const actualTargetLanguage = result.language as string if (isBidirectional) { - if (!isLanguageInPair(sourceLanguage, bidirectionalPair)) { - window.message.warning({ - content: t('translate.language.not_pair'), - key: 'translate-message' - }) - setLoading(false) - return - } - actualTargetLanguage = getTargetLanguageForBidirectional(sourceLanguage, bidirectionalPair) setTargetLanguage(actualTargetLanguage) - } else { - if (sourceLanguage === targetLanguage) { - window.message.warning({ - content: t('translate.language.same'), - key: 'translate-message' - }) - setLoading(false) - return - } } const assistant = getDefaultTranslateAssistant(actualTargetLanguage, text) diff --git a/src/renderer/src/utils/translate.ts b/src/renderer/src/utils/translate.ts index e4df23d392..2b44f3ec26 100644 --- a/src/renderer/src/utils/translate.ts +++ b/src/renderer/src/utils/translate.ts @@ -1,7 +1,74 @@ -import { fetchTranslate } from '@renderer/services/ApiService' -import { franc } from 'franc' +import * as cld3 from 'cld3-asm' import React, { MutableRefObject } from 'react' +let langIdentifier: any = null + +/** + * 初始化语言识别器 + */ +const initLangIdentifier = async () => { + if (!langIdentifier) { + langIdentifier = await cld3.loadModule() + } + return langIdentifier +} + +/** + * 使用Unicode字符范围检测语言 + * 适用于较短文本的语言检测 + * @param {string} text 需要检测语言的文本 + * @returns {string} 检测到的语言代码 + */ +export const detectLanguageByUnicode = (text: string): string => { + const counts = { + zh: 0, + ja: 0, + ko: 0, + ru: 0, + ar: 0, + latin: 0 + } + + let totalChars = 0 + + for (const char of text) { + const code = char.codePointAt(0) || 0 + totalChars++ + + if (code >= 0x4e00 && code <= 0x9fff) { + counts.zh++ + } else if ((code >= 0x3040 && code <= 0x309f) || (code >= 0x30a0 && code <= 0x30ff)) { + counts.ja++ + } else if ((code >= 0xac00 && code <= 0xd7a3) || (code >= 0x1100 && code <= 0x11ff)) { + counts.ko++ + } else if (code >= 0x0400 && code <= 0x04ff) { + counts.ru++ + } else if (code >= 0x0600 && code <= 0x06ff) { + counts.ar++ + } else if ((code >= 0x0020 && code <= 0x007f) || (code >= 0x0080 && code <= 0x00ff)) { + counts.latin++ + } else { + totalChars-- + } + } + + if (totalChars === 0) return 'en' + let maxLang = 'en' + let maxCount = 0 + + for (const [lang, count] of Object.entries(counts)) { + if (count > maxCount) { + maxCount = count + maxLang = lang === 'latin' ? 'en' : lang + } + } + + if (maxCount / totalChars < 0.3) { + return 'en' + } + return maxLang +} + /** * 检测输入文本的语言 * @param {string} inputText 需要检测语言的文本 @@ -11,66 +78,38 @@ export const detectLanguage = async (inputText: string): Promise => { if (!inputText.trim()) return 'any' const text = inputText.trim() - const detectedLangCode = franc(text) - // 映射 ISO 639-3 代码到应用使用的语言代码 + // 由于算法的局限性会导致对较短的字符串识别不准确 + let detected + if (text.length < 20) { + detected = detectLanguageByUnicode(text) + } else { + const identifier = await initLangIdentifier() + const result = identifier.findLanguage(text) + detected = result.reliable ? result.language : 'en' + } + console.log(detected) + const topLang = detected || 'en' + + // 映射cld3-asm返回的语言代码到应用使用的语言代码 const languageMap: Record = { - cmn: 'chinese', // 普通话 - zho: 'chinese', // 中文 - jpn: 'japanese', // 日语 - kor: 'korean', // 韩语 - rus: 'russian', // 俄语 - spa: 'spanish', // 西班牙语 - fra: 'french', // 法语 - deu: 'german', // 德语 - ita: 'italian', // 意大利语 - por: 'portuguese', // 葡萄牙语 - ara: 'arabic', // 阿拉伯语 - eng: 'english' // 英语 + zh: 'chinese', // 中文 + ja: 'japanese', // 日语 + ko: 'korean', // 韩语 + ru: 'russian', // 俄语 + es: 'spanish', // 西班牙语 + fr: 'french', // 法语 + de: 'german', // 德语 + it: 'italian', // 意大利语 + pt: 'portuguese', // 葡萄牙语 + ar: 'arabic', // 阿拉伯语 + en: 'english' // 英语 } - if (detectedLangCode !== 'und' && languageMap[detectedLangCode]) { - return languageMap[detectedLangCode] - } - - try { - const sampleText = text.substring(0, 200) - const prompt = `Identify the primary language in this text: "${sampleText}". Reply with only one word from this list: english, chinese, japanese, korean, russian, spanish, french, german, italian, portuguese, arabic.` - - let detectedCode = '' - await fetchTranslate({ - content: sampleText, - assistant: { - id: 'lang-detector', - name: 'Language Detector', - prompt, - topics: [], - type: 'translator' - }, - onResponse: (response) => { - detectedCode = response.trim().toLowerCase() - } - }) - - const validCodes = [ - 'english', - 'chinese', - 'japanese', - 'korean', - 'russian', - 'spanish', - 'french', - 'german', - 'italian', - 'portuguese', - 'arabic' - ] - - return validCodes.find((code) => detectedCode.includes(code)) || 'english' - } catch (error) { - console.error('语言检测错误:', error) - return 'english' + if (topLang && languageMap[topLang]) { + return languageMap[topLang] } + return 'english' } /** @@ -85,9 +124,7 @@ export const getTargetLanguageForBidirectional = (sourceLanguage: string, langua } else if (sourceLanguage === languagePair[1]) { return languagePair[0] } - - // 默认返回第一个不同于源语言的语言 - return languagePair[0] === sourceLanguage ? languagePair[1] : languagePair[0] + return languagePair[0] !== sourceLanguage ? languagePair[0] : languagePair[1] } /** @@ -100,6 +137,36 @@ export const isLanguageInPair = (sourceLanguage: string, languagePair: [string, return [languagePair[0], languagePair[1]].includes(sourceLanguage) } +/** + * 确定翻译的目标语言 + * @param sourceLanguage 检测到的源语言 + * @param targetLanguage 用户设置的目标语言 + * @param isBidirectional 是否开启双向翻译 + * @param bidirectionalPair 双向翻译的语言对 + * @returns 处理结果对象 + */ +export const determineTargetLanguage = ( + sourceLanguage: string, + targetLanguage: string, + isBidirectional: boolean, + bidirectionalPair: [string, string] +): { success: boolean; language?: string; errorType?: 'same_language' | 'not_in_pair' } => { + if (isBidirectional) { + if (!isLanguageInPair(sourceLanguage, bidirectionalPair)) { + return { success: false, errorType: 'not_in_pair' } + } + return { + success: true, + language: getTargetLanguageForBidirectional(sourceLanguage, bidirectionalPair) + } + } else { + if (sourceLanguage === targetLanguage) { + return { success: false, errorType: 'same_language' } + } + return { success: true, language: targetLanguage } + } +} + /** * 处理滚动同步 * @param sourceElement 源元素 diff --git a/yarn.lock b/yarn.lock index 2a7e3f7132..4ce1f78120 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5597,6 +5597,7 @@ __metadata: async-mutex: "npm:^0.5.0" axios: "npm:^1.7.3" browser-image-compression: "npm:^2.0.2" + cld3-asm: "npm:^4.0.0" color: "npm:^5.0.0" dayjs: "npm:^1.11.11" dexie: "npm:^4.0.8" @@ -5621,7 +5622,6 @@ __metadata: eslint-plugin-unused-imports: "npm:^4.1.4" fast-diff: "npm:^1.3.0" fast-xml-parser: "npm:^5.2.0" - franc: "npm:^6.2.0" fs-extra: "npm:^11.2.0" html-to-image: "npm:^1.11.13" husky: "npm:^9.1.7" @@ -6842,6 +6842,15 @@ __metadata: languageName: node linkType: hard +"cld3-asm@npm:^4.0.0": + version: 4.0.0 + resolution: "cld3-asm@npm:4.0.0" + dependencies: + emscripten-wasm-loader: "npm:^3.0.3" + checksum: 10c0/1edd4bad0e0aa68f05910e59aee63598ddce9c0d859de7bd7a60a81e4f20c29718a26a255c78c14c07d86ce78a26f617a54cd8ca777db9cb2d0628f450c298d2 + languageName: node + linkType: hard + "clean-stack@npm:^2.0.0": version: 2.2.0 resolution: "clean-stack@npm:2.2.0" @@ -8522,6 +8531,17 @@ __metadata: languageName: node linkType: hard +"emscripten-wasm-loader@npm:^3.0.3": + version: 3.0.3 + resolution: "emscripten-wasm-loader@npm:3.0.3" + dependencies: + getroot: "npm:^1.0.0" + nanoid: "npm:^2.0.3" + unixify: "npm:^1.0.0" + checksum: 10c0/3a171300ff671de0fec5cf239a86fbaa578bbdc3ac2e7d0009499c98496a27276bc76dcf8515d4799237a656763ab98aec21753a8947e51cfe66df28e1296c8b + languageName: node + linkType: hard + "encodeurl@npm:^2.0.0": version: 2.0.0 resolution: "encodeurl@npm:2.0.0" @@ -9813,15 +9833,6 @@ __metadata: languageName: node linkType: hard -"franc@npm:^6.2.0": - version: 6.2.0 - resolution: "franc@npm:6.2.0" - dependencies: - trigram-utils: "npm:^2.0.0" - checksum: 10c0/136a08d6e4632f17eae6f0ae93b224b0bf2233dc1d5dbd0b23e479960f6c71c0847bef834d3b6b7c9cefb4f905d5e08fc82b0738bb3ed4a6c83faffcf9fa2a11 - languageName: node - linkType: hard - "fresh@npm:^2.0.0": version: 2.0.0 resolution: "fresh@npm:2.0.0" @@ -10084,6 +10095,15 @@ __metadata: languageName: node linkType: hard +"getroot@npm:^1.0.0": + version: 1.0.0 + resolution: "getroot@npm:1.0.0" + dependencies: + tslib: "npm:^1.7.1" + checksum: 10c0/06fe0762e8f4076625a136415584cec896fad57338454349df6153e7543f81ae89959e175df7fb2e9def684a982a316db5862ad2d293419f052e5b1be45bc6f1 + languageName: node + linkType: hard + "github-from-package@npm:0.0.0": version: 0.0.0 resolution: "github-from-package@npm:0.0.0" @@ -13553,10 +13573,10 @@ __metadata: languageName: node linkType: hard -"n-gram@npm:^2.0.0": - version: 2.0.2 - resolution: "n-gram@npm:2.0.2" - checksum: 10c0/72e2cdc8c37c9253b556a0deb9cd26d5ac59a5d7a38b2d2928927e3959bc7d3cb591d766e30309a4c685dbc51330025cb30c5c6518ee516caf3318aed2635f1b +"nanoid@npm:^2.0.3": + version: 2.1.11 + resolution: "nanoid@npm:2.1.11" + checksum: 10c0/8640d17698633ff78b2549ec8d5dffd8f56909bad1cf0da08bf3a4012f98553b1b9f2327a2d7fb3613084f33189a8ab4b889eb4c7939f3f9e242d9fd8ff059d5 languageName: node linkType: hard @@ -13787,6 +13807,15 @@ __metadata: languageName: node linkType: hard +"normalize-path@npm:^2.1.1": + version: 2.1.1 + resolution: "normalize-path@npm:2.1.1" + dependencies: + remove-trailing-separator: "npm:^1.0.1" + checksum: 10c0/db814326ff88057437233361b4c7e9cac7b54815b051b57f2d341ce89b1d8ec8cbd43e7fa95d7652b3b69ea8fcc294b89b8530d556a84d1bdace94229e1e9a8b + languageName: node + linkType: hard + "normalize-path@npm:^3.0.0": version: 3.0.0 resolution: "normalize-path@npm:3.0.0" @@ -15974,6 +16003,13 @@ __metadata: languageName: node linkType: hard +"remove-trailing-separator@npm:^1.0.1": + version: 1.1.0 + resolution: "remove-trailing-separator@npm:1.1.0" + checksum: 10c0/3568f9f8f5af3737b4aee9e6e1e8ec4be65a92da9cb27f989e0893714d50aa95ed2ff02d40d1fa35e1b1a234dc9c2437050ef356704a3999feaca6667d9e9bfc + languageName: node + linkType: hard + "repeat-string@npm:^1.0.0": version: 1.6.1 resolution: "repeat-string@npm:1.6.1" @@ -17526,16 +17562,6 @@ __metadata: languageName: node linkType: hard -"trigram-utils@npm:^2.0.0": - version: 2.0.1 - resolution: "trigram-utils@npm:2.0.1" - dependencies: - collapse-white-space: "npm:^2.0.0" - n-gram: "npm:^2.0.0" - checksum: 10c0/d024dc91a9c0310e75fa68422185e3a32814831971b9e86a2925e74bd1932a30501aa2ac214768f0a545f3db63610ee14b4748ac31532e1bc46c791941d71c6d - languageName: node - linkType: hard - "trim-lines@npm:^3.0.0": version: 3.0.1 resolution: "trim-lines@npm:3.0.1" @@ -17607,6 +17633,13 @@ __metadata: languageName: node linkType: hard +"tslib@npm:^1.7.1": + version: 1.14.1 + resolution: "tslib@npm:1.14.1" + checksum: 10c0/69ae09c49eea644bc5ebe1bca4fa4cc2c82b7b3e02f43b84bd891504edf66dbc6b2ec0eef31a957042de2269139e4acff911e6d186a258fb14069cd7f6febce2 + languageName: node + linkType: hard + "tslib@npm:^2.0.1, tslib@npm:^2.1.0, tslib@npm:^2.4.0, tslib@npm:^2.8.1": version: 2.8.1 resolution: "tslib@npm:2.8.1" @@ -17934,6 +17967,15 @@ __metadata: languageName: node linkType: hard +"unixify@npm:^1.0.0": + version: 1.0.0 + resolution: "unixify@npm:1.0.0" + dependencies: + normalize-path: "npm:^2.1.1" + checksum: 10c0/8b89100619ebde9f0ab4024a4d402316fb7b1d4853723410fc828944e8d3d01480f210cddf94d9a1699559f8180d861eb6323da8011b7bcc1bbaf6a11a5b1f1e + languageName: node + linkType: hard + "unpipe@npm:1.0.0": version: 1.0.0 resolution: "unpipe@npm:1.0.0"