fix: change cld3-asm to franc

This commit is contained in:
Pleasurecruise 2025-05-29 11:25:52 +08:00
parent 2a184062d8
commit 4310d42bd5
No known key found for this signature in database
GPG Key ID: E6385136096279B6
3 changed files with 66 additions and 106 deletions

View File

@ -75,7 +75,6 @@
"@types/react-infinite-scroll-component": "^5.0.0",
"archiver": "^7.0.1",
"async-mutex": "^0.5.0",
"cld3-asm": "^4.0.0",
"diff": "^7.0.0",
"docx": "^9.0.2",
"electron-log": "^5.1.5",
@ -84,6 +83,7 @@
"electron-window-state": "^5.0.3",
"epub": "patch:epub@npm%3A1.3.0#~/.yarn/patches/epub-npm-1.3.0-8325494ffe.patch",
"fast-xml-parser": "^5.2.0",
"franc": "^6.2.0",
"fs-extra": "^11.2.0",
"jsdom": "^26.0.0",
"markdown-it": "^14.1.0",

View File

@ -1,18 +1,6 @@
import * as cld3 from 'cld3-asm'
import { franc } from 'franc'
import React, { MutableRefObject } from 'react'
let langIdentifier: any = null
/**
*
*/
const initLangIdentifier = async () => {
if (!langIdentifier) {
langIdentifier = await cld3.loadModule()
}
return langIdentifier
}
/**
* 使Unicode字符范围检测语言
*
@ -75,41 +63,48 @@ export const detectLanguageByUnicode = (text: string): string => {
* @returns {Promise<string>}
*/
export const detectLanguage = async (inputText: string): Promise<string> => {
if (!inputText.trim()) return 'any'
const text = inputText.trim()
if (!text) return 'any'
let code: string
// 由于算法的局限性会导致对较短的字符串识别不准确
let detected
// 如果文本长度小于20个字符使用Unicode范围检测
if (text.length < 20) {
detected = detectLanguageByUnicode(text)
code = detectLanguageByUnicode(text)
} else {
const identifier = await initLangIdentifier()
const result = identifier.findLanguage(text)
detected = result.reliable ? result.language : 'en'
// franc 返回 ISO 639-3 代码
const iso3 = franc(text)
const isoMap: Record<string, string> = {
cmn: 'zh',
jpn: 'ja',
kor: 'ko',
rus: 'ru',
ara: 'ar',
spa: 'es',
fra: 'fr',
deu: 'de',
ita: 'it',
por: 'pt',
eng: 'en'
}
code = isoMap[iso3] || 'en'
}
console.log(detected)
const topLang = detected || 'en'
// 映射cld3-asm返回的语言代码到应用使用的语言代码
// 映射到应用使用的语言键
const languageMap: Record<string, string> = {
zh: 'chinese', // 中文
ja: 'japanese', // 日语
ko: 'korean', // 韩语
ru: 'russian', // 俄语
es: 'spanish', // 西班牙语
fr: 'french', // 法语
de: 'german', // 德语
it: 'italian', // 意大利语
pt: 'portuguese', // 葡萄牙语
ar: 'arabic', // 阿拉伯语
en: 'english' // 英语
zh: 'chinese',
ja: 'japanese',
ko: 'korean',
ru: 'russian',
es: 'spanish',
fr: 'french',
de: 'german',
it: 'italian',
pt: 'portuguese',
ar: 'arabic',
en: 'english'
}
if (topLang && languageMap[topLang]) {
return languageMap[topLang]
}
return 'english'
return languageMap[code] || 'english'
}
/**

View File

@ -5597,7 +5597,6 @@ __metadata:
async-mutex: "npm:^0.5.0"
axios: "npm:^1.7.3"
browser-image-compression: "npm:^2.0.2"
cld3-asm: "npm:^4.0.0"
color: "npm:^5.0.0"
dayjs: "npm:^1.11.11"
dexie: "npm:^4.0.8"
@ -5622,6 +5621,7 @@ __metadata:
eslint-plugin-unused-imports: "npm:^4.1.4"
fast-diff: "npm:^1.3.0"
fast-xml-parser: "npm:^5.2.0"
franc: "npm:^6.2.0"
fs-extra: "npm:^11.2.0"
html-to-image: "npm:^1.11.13"
husky: "npm:^9.1.7"
@ -6842,15 +6842,6 @@ __metadata:
languageName: node
linkType: hard
"cld3-asm@npm:^4.0.0":
version: 4.0.0
resolution: "cld3-asm@npm:4.0.0"
dependencies:
emscripten-wasm-loader: "npm:^3.0.3"
checksum: 10c0/1edd4bad0e0aa68f05910e59aee63598ddce9c0d859de7bd7a60a81e4f20c29718a26a255c78c14c07d86ce78a26f617a54cd8ca777db9cb2d0628f450c298d2
languageName: node
linkType: hard
"clean-stack@npm:^2.0.0":
version: 2.2.0
resolution: "clean-stack@npm:2.2.0"
@ -6963,6 +6954,13 @@ __metadata:
languageName: node
linkType: hard
"collapse-white-space@npm:^2.0.0":
version: 2.1.0
resolution: "collapse-white-space@npm:2.1.0"
checksum: 10c0/b2e2800f4ab261e62eb27a1fbe853378296e3a726d6695117ed033e82d61fb6abeae4ffc1465d5454499e237005de9cfc52c9562dc7ca4ac759b9a222ef14453
languageName: node
linkType: hard
"color-convert@npm:^2.0.1":
version: 2.0.1
resolution: "color-convert@npm:2.0.1"
@ -8531,17 +8529,6 @@ __metadata:
languageName: node
linkType: hard
"emscripten-wasm-loader@npm:^3.0.3":
version: 3.0.3
resolution: "emscripten-wasm-loader@npm:3.0.3"
dependencies:
getroot: "npm:^1.0.0"
nanoid: "npm:^2.0.3"
unixify: "npm:^1.0.0"
checksum: 10c0/3a171300ff671de0fec5cf239a86fbaa578bbdc3ac2e7d0009499c98496a27276bc76dcf8515d4799237a656763ab98aec21753a8947e51cfe66df28e1296c8b
languageName: node
linkType: hard
"encodeurl@npm:^2.0.0":
version: 2.0.0
resolution: "encodeurl@npm:2.0.0"
@ -9833,6 +9820,15 @@ __metadata:
languageName: node
linkType: hard
"franc@npm:^6.2.0":
version: 6.2.0
resolution: "franc@npm:6.2.0"
dependencies:
trigram-utils: "npm:^2.0.0"
checksum: 10c0/136a08d6e4632f17eae6f0ae93b224b0bf2233dc1d5dbd0b23e479960f6c71c0847bef834d3b6b7c9cefb4f905d5e08fc82b0738bb3ed4a6c83faffcf9fa2a11
languageName: node
linkType: hard
"fresh@npm:^2.0.0":
version: 2.0.0
resolution: "fresh@npm:2.0.0"
@ -10095,15 +10091,6 @@ __metadata:
languageName: node
linkType: hard
"getroot@npm:^1.0.0":
version: 1.0.0
resolution: "getroot@npm:1.0.0"
dependencies:
tslib: "npm:^1.7.1"
checksum: 10c0/06fe0762e8f4076625a136415584cec896fad57338454349df6153e7543f81ae89959e175df7fb2e9def684a982a316db5862ad2d293419f052e5b1be45bc6f1
languageName: node
linkType: hard
"github-from-package@npm:0.0.0":
version: 0.0.0
resolution: "github-from-package@npm:0.0.0"
@ -13573,10 +13560,10 @@ __metadata:
languageName: node
linkType: hard
"nanoid@npm:^2.0.3":
version: 2.1.11
resolution: "nanoid@npm:2.1.11"
checksum: 10c0/8640d17698633ff78b2549ec8d5dffd8f56909bad1cf0da08bf3a4012f98553b1b9f2327a2d7fb3613084f33189a8ab4b889eb4c7939f3f9e242d9fd8ff059d5
"n-gram@npm:^2.0.0":
version: 2.0.2
resolution: "n-gram@npm:2.0.2"
checksum: 10c0/72e2cdc8c37c9253b556a0deb9cd26d5ac59a5d7a38b2d2928927e3959bc7d3cb591d766e30309a4c685dbc51330025cb30c5c6518ee516caf3318aed2635f1b
languageName: node
linkType: hard
@ -13807,15 +13794,6 @@ __metadata:
languageName: node
linkType: hard
"normalize-path@npm:^2.1.1":
version: 2.1.1
resolution: "normalize-path@npm:2.1.1"
dependencies:
remove-trailing-separator: "npm:^1.0.1"
checksum: 10c0/db814326ff88057437233361b4c7e9cac7b54815b051b57f2d341ce89b1d8ec8cbd43e7fa95d7652b3b69ea8fcc294b89b8530d556a84d1bdace94229e1e9a8b
languageName: node
linkType: hard
"normalize-path@npm:^3.0.0":
version: 3.0.0
resolution: "normalize-path@npm:3.0.0"
@ -16003,13 +15981,6 @@ __metadata:
languageName: node
linkType: hard
"remove-trailing-separator@npm:^1.0.1":
version: 1.1.0
resolution: "remove-trailing-separator@npm:1.1.0"
checksum: 10c0/3568f9f8f5af3737b4aee9e6e1e8ec4be65a92da9cb27f989e0893714d50aa95ed2ff02d40d1fa35e1b1a234dc9c2437050ef356704a3999feaca6667d9e9bfc
languageName: node
linkType: hard
"repeat-string@npm:^1.0.0":
version: 1.6.1
resolution: "repeat-string@npm:1.6.1"
@ -17562,6 +17533,16 @@ __metadata:
languageName: node
linkType: hard
"trigram-utils@npm:^2.0.0":
version: 2.0.1
resolution: "trigram-utils@npm:2.0.1"
dependencies:
collapse-white-space: "npm:^2.0.0"
n-gram: "npm:^2.0.0"
checksum: 10c0/d024dc91a9c0310e75fa68422185e3a32814831971b9e86a2925e74bd1932a30501aa2ac214768f0a545f3db63610ee14b4748ac31532e1bc46c791941d71c6d
languageName: node
linkType: hard
"trim-lines@npm:^3.0.0":
version: 3.0.1
resolution: "trim-lines@npm:3.0.1"
@ -17633,13 +17614,6 @@ __metadata:
languageName: node
linkType: hard
"tslib@npm:^1.7.1":
version: 1.14.1
resolution: "tslib@npm:1.14.1"
checksum: 10c0/69ae09c49eea644bc5ebe1bca4fa4cc2c82b7b3e02f43b84bd891504edf66dbc6b2ec0eef31a957042de2269139e4acff911e6d186a258fb14069cd7f6febce2
languageName: node
linkType: hard
"tslib@npm:^2.0.1, tslib@npm:^2.1.0, tslib@npm:^2.4.0, tslib@npm:^2.8.1":
version: 2.8.1
resolution: "tslib@npm:2.8.1"
@ -17967,15 +17941,6 @@ __metadata:
languageName: node
linkType: hard
"unixify@npm:^1.0.0":
version: 1.0.0
resolution: "unixify@npm:1.0.0"
dependencies:
normalize-path: "npm:^2.1.1"
checksum: 10c0/8b89100619ebde9f0ab4024a4d402316fb7b1d4853723410fc828944e8d3d01480f210cddf94d9a1699559f8180d861eb6323da8011b7bcc1bbaf6a11a5b1f1e
languageName: node
linkType: hard
"unpipe@npm:1.0.0":
version: 1.0.0
resolution: "unpipe@npm:1.0.0"