mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-23 10:00:08 +08:00
Centralize OCR configuration in shared package to avoid duplication and improve maintainability. This change affects multiple components that previously imported from renderer config.
183 lines
3.5 KiB
TypeScript
183 lines
3.5 KiB
TypeScript
import type {
|
|
BuiltinOcrProvider,
|
|
BuiltinOcrProviderId,
|
|
OcrOvConfig,
|
|
OcrOvProvider,
|
|
OcrPpocrConfig,
|
|
OcrPpocrProvider,
|
|
OcrSystemConfig,
|
|
OcrSystemProvider,
|
|
OcrTesseractConfig,
|
|
OcrTesseractProvider,
|
|
TesseractLangCode,
|
|
TranslateLanguageCode
|
|
} from '@types'
|
|
|
|
export const tesseract: OcrTesseractProvider = {
|
|
id: 'tesseract',
|
|
name: 'Tesseract',
|
|
capabilities: {
|
|
image: true
|
|
}
|
|
} as const
|
|
|
|
export const systemOcr: OcrSystemProvider = {
|
|
id: 'system',
|
|
name: 'System',
|
|
capabilities: {
|
|
image: true
|
|
// pdf: true
|
|
}
|
|
} as const satisfies OcrSystemProvider
|
|
|
|
export const ppocrOcr: OcrPpocrProvider = {
|
|
id: 'paddleocr',
|
|
name: 'PaddleOCR',
|
|
capabilities: {
|
|
image: true
|
|
// pdf: true
|
|
}
|
|
} as const
|
|
|
|
export const ovOcr: OcrOvProvider = {
|
|
id: 'ovocr',
|
|
name: 'Intel OV(NPU) OCR',
|
|
capabilities: {
|
|
image: true
|
|
// pdf: true
|
|
}
|
|
} as const satisfies OcrOvProvider
|
|
|
|
export const BUILTIN_OCR_PROVIDER_CONFIG_MAP = {
|
|
tesseract: {
|
|
langs: {
|
|
chi_sim: true,
|
|
chi_tra: true,
|
|
eng: true
|
|
}
|
|
} satisfies OcrTesseractConfig,
|
|
system: {
|
|
langs: ['en-us']
|
|
} satisfies OcrSystemConfig,
|
|
paddleocr: {
|
|
apiUrl: ''
|
|
} satisfies OcrPpocrConfig,
|
|
ovocr: {
|
|
langs: ['en-us', 'zh-cn']
|
|
} satisfies OcrOvConfig
|
|
} as const satisfies Record<BuiltinOcrProviderId, any>
|
|
|
|
export const BUILTIN_OCR_PROVIDERS_MAP = {
|
|
tesseract,
|
|
system: systemOcr,
|
|
paddleocr: ppocrOcr,
|
|
ovocr: ovOcr
|
|
} as const satisfies Record<BuiltinOcrProviderId, BuiltinOcrProvider>
|
|
|
|
export const BUILTIN_OCR_PROVIDERS: BuiltinOcrProvider[] = Object.values(BUILTIN_OCR_PROVIDERS_MAP)
|
|
|
|
export const TESSERACT_LANG_MAP: Record<TranslateLanguageCode, TesseractLangCode> = {
|
|
'af-za': 'afr',
|
|
'am-et': 'amh',
|
|
'ar-sa': 'ara',
|
|
'as-in': 'asm',
|
|
'az-az': 'aze',
|
|
'az-cyrl-az': 'aze_cyrl',
|
|
'be-by': 'bel',
|
|
'bn-bd': 'ben',
|
|
'bo-cn': 'bod',
|
|
'bs-ba': 'bos',
|
|
'bg-bg': 'bul',
|
|
'ca-es': 'cat',
|
|
'ceb-ph': 'ceb',
|
|
'cs-cz': 'ces',
|
|
'zh-cn': 'chi_sim',
|
|
'zh-tw': 'chi_tra',
|
|
'chr-us': 'chr',
|
|
'cy-gb': 'cym',
|
|
'da-dk': 'dan',
|
|
'de-de': 'deu',
|
|
'dz-bt': 'dzo',
|
|
'el-gr': 'ell',
|
|
'en-us': 'eng',
|
|
'enm-gb': 'enm',
|
|
'eo-world': 'epo',
|
|
'et-ee': 'est',
|
|
'eu-es': 'eus',
|
|
'fa-ir': 'fas',
|
|
'fi-fi': 'fin',
|
|
'fr-fr': 'fra',
|
|
'frk-de': 'frk',
|
|
'frm-fr': 'frm',
|
|
'ga-ie': 'gle',
|
|
'gl-es': 'glg',
|
|
'grc-gr': 'grc',
|
|
'gu-in': 'guj',
|
|
'ht-ht': 'hat',
|
|
'he-il': 'heb',
|
|
'hi-in': 'hin',
|
|
'hr-hr': 'hrv',
|
|
'hu-hu': 'hun',
|
|
'iu-ca': 'iku',
|
|
'id-id': 'ind',
|
|
'is-is': 'isl',
|
|
'it-it': 'ita',
|
|
'ita-it': 'ita_old',
|
|
'jv-id': 'jav',
|
|
'ja-jp': 'jpn',
|
|
'kn-in': 'kan',
|
|
'ka-ge': 'kat',
|
|
'kat-ge': 'kat_old',
|
|
'kk-kz': 'kaz',
|
|
'km-kh': 'khm',
|
|
'ky-kg': 'kir',
|
|
'ko-kr': 'kor',
|
|
'ku-tr': 'kur',
|
|
'la-la': 'lao',
|
|
'la-va': 'lat',
|
|
'lv-lv': 'lav',
|
|
'lt-lt': 'lit',
|
|
'ml-in': 'mal',
|
|
'mr-in': 'mar',
|
|
'mk-mk': 'mkd',
|
|
'mt-mt': 'mlt',
|
|
'ms-my': 'msa',
|
|
'my-mm': 'mya',
|
|
'ne-np': 'nep',
|
|
'nl-nl': 'nld',
|
|
'no-no': 'nor',
|
|
'or-in': 'ori',
|
|
'pa-in': 'pan',
|
|
'pl-pl': 'pol',
|
|
'pt-pt': 'por',
|
|
'ps-af': 'pus',
|
|
'ro-ro': 'ron',
|
|
'ru-ru': 'rus',
|
|
'sa-in': 'san',
|
|
'si-lk': 'sin',
|
|
'sk-sk': 'slk',
|
|
'sl-si': 'slv',
|
|
'es-es': 'spa',
|
|
'spa-es': 'spa_old',
|
|
'sq-al': 'sqi',
|
|
'sr-rs': 'srp',
|
|
'sr-latn-rs': 'srp_latn',
|
|
'sw-tz': 'swa',
|
|
'sv-se': 'swe',
|
|
'syr-sy': 'syr',
|
|
'ta-in': 'tam',
|
|
'te-in': 'tel',
|
|
'tg-tj': 'tgk',
|
|
'tl-ph': 'tgl',
|
|
'th-th': 'tha',
|
|
'ti-er': 'tir',
|
|
'tr-tr': 'tur',
|
|
'ug-cn': 'uig',
|
|
'uk-ua': 'ukr',
|
|
'ur-pk': 'urd',
|
|
'uz-uz': 'uzb',
|
|
'uz-cyrl-uz': 'uzb_cyrl',
|
|
'vi-vn': 'vie',
|
|
'yi-us': 'yid'
|
|
}
|