mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-25 11:20:07 +08:00
fix: change cld3-asm to franc
This commit is contained in:
parent
2a184062d8
commit
4310d42bd5
@ -75,7 +75,6 @@
|
||||
"@types/react-infinite-scroll-component": "^5.0.0",
|
||||
"archiver": "^7.0.1",
|
||||
"async-mutex": "^0.5.0",
|
||||
"cld3-asm": "^4.0.0",
|
||||
"diff": "^7.0.0",
|
||||
"docx": "^9.0.2",
|
||||
"electron-log": "^5.1.5",
|
||||
@ -84,6 +83,7 @@
|
||||
"electron-window-state": "^5.0.3",
|
||||
"epub": "patch:epub@npm%3A1.3.0#~/.yarn/patches/epub-npm-1.3.0-8325494ffe.patch",
|
||||
"fast-xml-parser": "^5.2.0",
|
||||
"franc": "^6.2.0",
|
||||
"fs-extra": "^11.2.0",
|
||||
"jsdom": "^26.0.0",
|
||||
"markdown-it": "^14.1.0",
|
||||
|
||||
@ -1,18 +1,6 @@
|
||||
import * as cld3 from 'cld3-asm'
|
||||
import { franc } from 'franc'
|
||||
import React, { MutableRefObject } from 'react'
|
||||
|
||||
let langIdentifier: any = null
|
||||
|
||||
/**
|
||||
* 初始化语言识别器
|
||||
*/
|
||||
const initLangIdentifier = async () => {
|
||||
if (!langIdentifier) {
|
||||
langIdentifier = await cld3.loadModule()
|
||||
}
|
||||
return langIdentifier
|
||||
}
|
||||
|
||||
/**
|
||||
* 使用Unicode字符范围检测语言
|
||||
* 适用于较短文本的语言检测
|
||||
@ -75,41 +63,48 @@ export const detectLanguageByUnicode = (text: string): string => {
|
||||
* @returns {Promise<string>} 检测到的语言代码
|
||||
*/
|
||||
export const detectLanguage = async (inputText: string): Promise<string> => {
|
||||
if (!inputText.trim()) return 'any'
|
||||
|
||||
const text = inputText.trim()
|
||||
if (!text) return 'any'
|
||||
let code: string
|
||||
|
||||
// 由于算法的局限性会导致对较短的字符串识别不准确
|
||||
let detected
|
||||
// 如果文本长度小于20个字符,使用Unicode范围检测
|
||||
if (text.length < 20) {
|
||||
detected = detectLanguageByUnicode(text)
|
||||
code = detectLanguageByUnicode(text)
|
||||
} else {
|
||||
const identifier = await initLangIdentifier()
|
||||
const result = identifier.findLanguage(text)
|
||||
detected = result.reliable ? result.language : 'en'
|
||||
// franc 返回 ISO 639-3 代码
|
||||
const iso3 = franc(text)
|
||||
const isoMap: Record<string, string> = {
|
||||
cmn: 'zh',
|
||||
jpn: 'ja',
|
||||
kor: 'ko',
|
||||
rus: 'ru',
|
||||
ara: 'ar',
|
||||
spa: 'es',
|
||||
fra: 'fr',
|
||||
deu: 'de',
|
||||
ita: 'it',
|
||||
por: 'pt',
|
||||
eng: 'en'
|
||||
}
|
||||
code = isoMap[iso3] || 'en'
|
||||
}
|
||||
console.log(detected)
|
||||
const topLang = detected || 'en'
|
||||
|
||||
// 映射cld3-asm返回的语言代码到应用使用的语言代码
|
||||
// 映射到应用使用的语言键
|
||||
const languageMap: Record<string, string> = {
|
||||
zh: 'chinese', // 中文
|
||||
ja: 'japanese', // 日语
|
||||
ko: 'korean', // 韩语
|
||||
ru: 'russian', // 俄语
|
||||
es: 'spanish', // 西班牙语
|
||||
fr: 'french', // 法语
|
||||
de: 'german', // 德语
|
||||
it: 'italian', // 意大利语
|
||||
pt: 'portuguese', // 葡萄牙语
|
||||
ar: 'arabic', // 阿拉伯语
|
||||
en: 'english' // 英语
|
||||
zh: 'chinese',
|
||||
ja: 'japanese',
|
||||
ko: 'korean',
|
||||
ru: 'russian',
|
||||
es: 'spanish',
|
||||
fr: 'french',
|
||||
de: 'german',
|
||||
it: 'italian',
|
||||
pt: 'portuguese',
|
||||
ar: 'arabic',
|
||||
en: 'english'
|
||||
}
|
||||
|
||||
if (topLang && languageMap[topLang]) {
|
||||
return languageMap[topLang]
|
||||
}
|
||||
return 'english'
|
||||
return languageMap[code] || 'english'
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
97
yarn.lock
97
yarn.lock
@ -5597,7 +5597,6 @@ __metadata:
|
||||
async-mutex: "npm:^0.5.0"
|
||||
axios: "npm:^1.7.3"
|
||||
browser-image-compression: "npm:^2.0.2"
|
||||
cld3-asm: "npm:^4.0.0"
|
||||
color: "npm:^5.0.0"
|
||||
dayjs: "npm:^1.11.11"
|
||||
dexie: "npm:^4.0.8"
|
||||
@ -5622,6 +5621,7 @@ __metadata:
|
||||
eslint-plugin-unused-imports: "npm:^4.1.4"
|
||||
fast-diff: "npm:^1.3.0"
|
||||
fast-xml-parser: "npm:^5.2.0"
|
||||
franc: "npm:^6.2.0"
|
||||
fs-extra: "npm:^11.2.0"
|
||||
html-to-image: "npm:^1.11.13"
|
||||
husky: "npm:^9.1.7"
|
||||
@ -6842,15 +6842,6 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"cld3-asm@npm:^4.0.0":
|
||||
version: 4.0.0
|
||||
resolution: "cld3-asm@npm:4.0.0"
|
||||
dependencies:
|
||||
emscripten-wasm-loader: "npm:^3.0.3"
|
||||
checksum: 10c0/1edd4bad0e0aa68f05910e59aee63598ddce9c0d859de7bd7a60a81e4f20c29718a26a255c78c14c07d86ce78a26f617a54cd8ca777db9cb2d0628f450c298d2
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"clean-stack@npm:^2.0.0":
|
||||
version: 2.2.0
|
||||
resolution: "clean-stack@npm:2.2.0"
|
||||
@ -6963,6 +6954,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"collapse-white-space@npm:^2.0.0":
|
||||
version: 2.1.0
|
||||
resolution: "collapse-white-space@npm:2.1.0"
|
||||
checksum: 10c0/b2e2800f4ab261e62eb27a1fbe853378296e3a726d6695117ed033e82d61fb6abeae4ffc1465d5454499e237005de9cfc52c9562dc7ca4ac759b9a222ef14453
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"color-convert@npm:^2.0.1":
|
||||
version: 2.0.1
|
||||
resolution: "color-convert@npm:2.0.1"
|
||||
@ -8531,17 +8529,6 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"emscripten-wasm-loader@npm:^3.0.3":
|
||||
version: 3.0.3
|
||||
resolution: "emscripten-wasm-loader@npm:3.0.3"
|
||||
dependencies:
|
||||
getroot: "npm:^1.0.0"
|
||||
nanoid: "npm:^2.0.3"
|
||||
unixify: "npm:^1.0.0"
|
||||
checksum: 10c0/3a171300ff671de0fec5cf239a86fbaa578bbdc3ac2e7d0009499c98496a27276bc76dcf8515d4799237a656763ab98aec21753a8947e51cfe66df28e1296c8b
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"encodeurl@npm:^2.0.0":
|
||||
version: 2.0.0
|
||||
resolution: "encodeurl@npm:2.0.0"
|
||||
@ -9833,6 +9820,15 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"franc@npm:^6.2.0":
|
||||
version: 6.2.0
|
||||
resolution: "franc@npm:6.2.0"
|
||||
dependencies:
|
||||
trigram-utils: "npm:^2.0.0"
|
||||
checksum: 10c0/136a08d6e4632f17eae6f0ae93b224b0bf2233dc1d5dbd0b23e479960f6c71c0847bef834d3b6b7c9cefb4f905d5e08fc82b0738bb3ed4a6c83faffcf9fa2a11
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"fresh@npm:^2.0.0":
|
||||
version: 2.0.0
|
||||
resolution: "fresh@npm:2.0.0"
|
||||
@ -10095,15 +10091,6 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"getroot@npm:^1.0.0":
|
||||
version: 1.0.0
|
||||
resolution: "getroot@npm:1.0.0"
|
||||
dependencies:
|
||||
tslib: "npm:^1.7.1"
|
||||
checksum: 10c0/06fe0762e8f4076625a136415584cec896fad57338454349df6153e7543f81ae89959e175df7fb2e9def684a982a316db5862ad2d293419f052e5b1be45bc6f1
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"github-from-package@npm:0.0.0":
|
||||
version: 0.0.0
|
||||
resolution: "github-from-package@npm:0.0.0"
|
||||
@ -13573,10 +13560,10 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"nanoid@npm:^2.0.3":
|
||||
version: 2.1.11
|
||||
resolution: "nanoid@npm:2.1.11"
|
||||
checksum: 10c0/8640d17698633ff78b2549ec8d5dffd8f56909bad1cf0da08bf3a4012f98553b1b9f2327a2d7fb3613084f33189a8ab4b889eb4c7939f3f9e242d9fd8ff059d5
|
||||
"n-gram@npm:^2.0.0":
|
||||
version: 2.0.2
|
||||
resolution: "n-gram@npm:2.0.2"
|
||||
checksum: 10c0/72e2cdc8c37c9253b556a0deb9cd26d5ac59a5d7a38b2d2928927e3959bc7d3cb591d766e30309a4c685dbc51330025cb30c5c6518ee516caf3318aed2635f1b
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
@ -13807,15 +13794,6 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"normalize-path@npm:^2.1.1":
|
||||
version: 2.1.1
|
||||
resolution: "normalize-path@npm:2.1.1"
|
||||
dependencies:
|
||||
remove-trailing-separator: "npm:^1.0.1"
|
||||
checksum: 10c0/db814326ff88057437233361b4c7e9cac7b54815b051b57f2d341ce89b1d8ec8cbd43e7fa95d7652b3b69ea8fcc294b89b8530d556a84d1bdace94229e1e9a8b
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"normalize-path@npm:^3.0.0":
|
||||
version: 3.0.0
|
||||
resolution: "normalize-path@npm:3.0.0"
|
||||
@ -16003,13 +15981,6 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"remove-trailing-separator@npm:^1.0.1":
|
||||
version: 1.1.0
|
||||
resolution: "remove-trailing-separator@npm:1.1.0"
|
||||
checksum: 10c0/3568f9f8f5af3737b4aee9e6e1e8ec4be65a92da9cb27f989e0893714d50aa95ed2ff02d40d1fa35e1b1a234dc9c2437050ef356704a3999feaca6667d9e9bfc
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"repeat-string@npm:^1.0.0":
|
||||
version: 1.6.1
|
||||
resolution: "repeat-string@npm:1.6.1"
|
||||
@ -17562,6 +17533,16 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"trigram-utils@npm:^2.0.0":
|
||||
version: 2.0.1
|
||||
resolution: "trigram-utils@npm:2.0.1"
|
||||
dependencies:
|
||||
collapse-white-space: "npm:^2.0.0"
|
||||
n-gram: "npm:^2.0.0"
|
||||
checksum: 10c0/d024dc91a9c0310e75fa68422185e3a32814831971b9e86a2925e74bd1932a30501aa2ac214768f0a545f3db63610ee14b4748ac31532e1bc46c791941d71c6d
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"trim-lines@npm:^3.0.0":
|
||||
version: 3.0.1
|
||||
resolution: "trim-lines@npm:3.0.1"
|
||||
@ -17633,13 +17614,6 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"tslib@npm:^1.7.1":
|
||||
version: 1.14.1
|
||||
resolution: "tslib@npm:1.14.1"
|
||||
checksum: 10c0/69ae09c49eea644bc5ebe1bca4fa4cc2c82b7b3e02f43b84bd891504edf66dbc6b2ec0eef31a957042de2269139e4acff911e6d186a258fb14069cd7f6febce2
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"tslib@npm:^2.0.1, tslib@npm:^2.1.0, tslib@npm:^2.4.0, tslib@npm:^2.8.1":
|
||||
version: 2.8.1
|
||||
resolution: "tslib@npm:2.8.1"
|
||||
@ -17967,15 +17941,6 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"unixify@npm:^1.0.0":
|
||||
version: 1.0.0
|
||||
resolution: "unixify@npm:1.0.0"
|
||||
dependencies:
|
||||
normalize-path: "npm:^2.1.1"
|
||||
checksum: 10c0/8b89100619ebde9f0ab4024a4d402316fb7b1d4853723410fc828944e8d3d01480f210cddf94d9a1699559f8180d861eb6323da8011b7bcc1bbaf6a11a5b1f1e
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"unpipe@npm:1.0.0":
|
||||
version: 1.0.0
|
||||
resolution: "unpipe@npm:1.0.0"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user