mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-30 07:39:06 +08:00
refactor(ocr): move ocr config to shared package for reuse
Centralize OCR configuration in shared package to avoid duplication and improve maintainability. This change affects multiple components that previously imported from renderer config.
This commit is contained in:
parent
9386a4d482
commit
e640beb874
182
packages/shared/config/ocr.ts
Normal file
182
packages/shared/config/ocr.ts
Normal file
@ -0,0 +1,182 @@
|
||||
import type {
|
||||
BuiltinOcrProvider,
|
||||
BuiltinOcrProviderId,
|
||||
OcrOvConfig,
|
||||
OcrOvProvider,
|
||||
OcrPpocrConfig,
|
||||
OcrPpocrProvider,
|
||||
OcrSystemConfig,
|
||||
OcrSystemProvider,
|
||||
OcrTesseractConfig,
|
||||
OcrTesseractProvider,
|
||||
TesseractLangCode,
|
||||
TranslateLanguageCode
|
||||
} from '@types'
|
||||
|
||||
export const tesseract: OcrTesseractProvider = {
|
||||
id: 'tesseract',
|
||||
name: 'Tesseract',
|
||||
capabilities: {
|
||||
image: true
|
||||
}
|
||||
} as const
|
||||
|
||||
export const systemOcr: OcrSystemProvider = {
|
||||
id: 'system',
|
||||
name: 'System',
|
||||
capabilities: {
|
||||
image: true
|
||||
// pdf: true
|
||||
}
|
||||
} as const satisfies OcrSystemProvider
|
||||
|
||||
export const ppocrOcr: OcrPpocrProvider = {
|
||||
id: 'paddleocr',
|
||||
name: 'PaddleOCR',
|
||||
capabilities: {
|
||||
image: true
|
||||
// pdf: true
|
||||
}
|
||||
} as const
|
||||
|
||||
export const ovOcr: OcrOvProvider = {
|
||||
id: 'ovocr',
|
||||
name: 'Intel OV(NPU) OCR',
|
||||
capabilities: {
|
||||
image: true
|
||||
// pdf: true
|
||||
}
|
||||
} as const satisfies OcrOvProvider
|
||||
|
||||
export const BUILTIN_OCR_PROVIDER_CONFIG_MAP = {
|
||||
tesseract: {
|
||||
langs: {
|
||||
chi_sim: true,
|
||||
chi_tra: true,
|
||||
eng: true
|
||||
}
|
||||
} satisfies OcrTesseractConfig,
|
||||
system: {
|
||||
langs: ['en-us']
|
||||
} satisfies OcrSystemConfig,
|
||||
paddleocr: {
|
||||
apiUrl: ''
|
||||
} satisfies OcrPpocrConfig,
|
||||
ovocr: {
|
||||
langs: ['en-us', 'zh-cn']
|
||||
} satisfies OcrOvConfig
|
||||
} as const satisfies Record<BuiltinOcrProviderId, any>
|
||||
|
||||
export const BUILTIN_OCR_PROVIDERS_MAP = {
|
||||
tesseract,
|
||||
system: systemOcr,
|
||||
paddleocr: ppocrOcr,
|
||||
ovocr: ovOcr
|
||||
} as const satisfies Record<BuiltinOcrProviderId, BuiltinOcrProvider>
|
||||
|
||||
export const BUILTIN_OCR_PROVIDERS: BuiltinOcrProvider[] = Object.values(BUILTIN_OCR_PROVIDERS_MAP)
|
||||
|
||||
export const TESSERACT_LANG_MAP: Record<TranslateLanguageCode, TesseractLangCode> = {
|
||||
'af-za': 'afr',
|
||||
'am-et': 'amh',
|
||||
'ar-sa': 'ara',
|
||||
'as-in': 'asm',
|
||||
'az-az': 'aze',
|
||||
'az-cyrl-az': 'aze_cyrl',
|
||||
'be-by': 'bel',
|
||||
'bn-bd': 'ben',
|
||||
'bo-cn': 'bod',
|
||||
'bs-ba': 'bos',
|
||||
'bg-bg': 'bul',
|
||||
'ca-es': 'cat',
|
||||
'ceb-ph': 'ceb',
|
||||
'cs-cz': 'ces',
|
||||
'zh-cn': 'chi_sim',
|
||||
'zh-tw': 'chi_tra',
|
||||
'chr-us': 'chr',
|
||||
'cy-gb': 'cym',
|
||||
'da-dk': 'dan',
|
||||
'de-de': 'deu',
|
||||
'dz-bt': 'dzo',
|
||||
'el-gr': 'ell',
|
||||
'en-us': 'eng',
|
||||
'enm-gb': 'enm',
|
||||
'eo-world': 'epo',
|
||||
'et-ee': 'est',
|
||||
'eu-es': 'eus',
|
||||
'fa-ir': 'fas',
|
||||
'fi-fi': 'fin',
|
||||
'fr-fr': 'fra',
|
||||
'frk-de': 'frk',
|
||||
'frm-fr': 'frm',
|
||||
'ga-ie': 'gle',
|
||||
'gl-es': 'glg',
|
||||
'grc-gr': 'grc',
|
||||
'gu-in': 'guj',
|
||||
'ht-ht': 'hat',
|
||||
'he-il': 'heb',
|
||||
'hi-in': 'hin',
|
||||
'hr-hr': 'hrv',
|
||||
'hu-hu': 'hun',
|
||||
'iu-ca': 'iku',
|
||||
'id-id': 'ind',
|
||||
'is-is': 'isl',
|
||||
'it-it': 'ita',
|
||||
'ita-it': 'ita_old',
|
||||
'jv-id': 'jav',
|
||||
'ja-jp': 'jpn',
|
||||
'kn-in': 'kan',
|
||||
'ka-ge': 'kat',
|
||||
'kat-ge': 'kat_old',
|
||||
'kk-kz': 'kaz',
|
||||
'km-kh': 'khm',
|
||||
'ky-kg': 'kir',
|
||||
'ko-kr': 'kor',
|
||||
'ku-tr': 'kur',
|
||||
'la-la': 'lao',
|
||||
'la-va': 'lat',
|
||||
'lv-lv': 'lav',
|
||||
'lt-lt': 'lit',
|
||||
'ml-in': 'mal',
|
||||
'mr-in': 'mar',
|
||||
'mk-mk': 'mkd',
|
||||
'mt-mt': 'mlt',
|
||||
'ms-my': 'msa',
|
||||
'my-mm': 'mya',
|
||||
'ne-np': 'nep',
|
||||
'nl-nl': 'nld',
|
||||
'no-no': 'nor',
|
||||
'or-in': 'ori',
|
||||
'pa-in': 'pan',
|
||||
'pl-pl': 'pol',
|
||||
'pt-pt': 'por',
|
||||
'ps-af': 'pus',
|
||||
'ro-ro': 'ron',
|
||||
'ru-ru': 'rus',
|
||||
'sa-in': 'san',
|
||||
'si-lk': 'sin',
|
||||
'sk-sk': 'slk',
|
||||
'sl-si': 'slv',
|
||||
'es-es': 'spa',
|
||||
'spa-es': 'spa_old',
|
||||
'sq-al': 'sqi',
|
||||
'sr-rs': 'srp',
|
||||
'sr-latn-rs': 'srp_latn',
|
||||
'sw-tz': 'swa',
|
||||
'sv-se': 'swe',
|
||||
'syr-sy': 'syr',
|
||||
'ta-in': 'tam',
|
||||
'te-in': 'tel',
|
||||
'tg-tj': 'tgk',
|
||||
'tl-ph': 'tgl',
|
||||
'th-th': 'tha',
|
||||
'ti-er': 'tir',
|
||||
'tr-tr': 'tur',
|
||||
'ug-cn': 'uig',
|
||||
'uk-ua': 'ukr',
|
||||
'ur-pk': 'urd',
|
||||
'uz-uz': 'uzb',
|
||||
'uz-cyrl-uz': 'uzb_cyrl',
|
||||
'vi-vn': 'vie',
|
||||
'yi-us': 'yid'
|
||||
}
|
||||
@ -1,189 +1,8 @@
|
||||
import type {
|
||||
BuiltinOcrProvider,
|
||||
BuiltinOcrProviderId,
|
||||
OcrOvConfig,
|
||||
OcrOvProvider,
|
||||
OcrPpocrConfig,
|
||||
OcrPpocrProvider,
|
||||
OcrProviderCapability,
|
||||
OcrSystemConfig,
|
||||
OcrSystemProvider,
|
||||
OcrTesseractConfig,
|
||||
OcrTesseractProvider,
|
||||
TesseractLangCode,
|
||||
TranslateLanguageCode
|
||||
} from '@renderer/types'
|
||||
import type { BuiltinOcrProvider, OcrProviderCapability } from '@renderer/types'
|
||||
import { systemOcr, tesseract } from '@shared/config/ocr'
|
||||
|
||||
import { isMac, isWin } from './constant'
|
||||
|
||||
const tesseract: OcrTesseractProvider = {
|
||||
id: 'tesseract',
|
||||
name: 'Tesseract',
|
||||
capabilities: {
|
||||
image: true
|
||||
}
|
||||
} as const
|
||||
|
||||
const systemOcr: OcrSystemProvider = {
|
||||
id: 'system',
|
||||
name: 'System',
|
||||
capabilities: {
|
||||
image: true
|
||||
// pdf: true
|
||||
}
|
||||
} as const satisfies OcrSystemProvider
|
||||
|
||||
const ppocrOcr: OcrPpocrProvider = {
|
||||
id: 'paddleocr',
|
||||
name: 'PaddleOCR',
|
||||
capabilities: {
|
||||
image: true
|
||||
// pdf: true
|
||||
}
|
||||
} as const
|
||||
|
||||
const ovOcr: OcrOvProvider = {
|
||||
id: 'ovocr',
|
||||
name: 'Intel OV(NPU) OCR',
|
||||
capabilities: {
|
||||
image: true
|
||||
// pdf: true
|
||||
}
|
||||
} as const satisfies OcrOvProvider
|
||||
|
||||
export const BUILTIN_OCR_PROVIDER_CONFIG_MAP = {
|
||||
tesseract: {
|
||||
langs: {
|
||||
chi_sim: true,
|
||||
chi_tra: true,
|
||||
eng: true
|
||||
}
|
||||
} satisfies OcrTesseractConfig,
|
||||
system: {
|
||||
langs: isWin ? ['en-us'] : undefined
|
||||
} satisfies OcrSystemConfig,
|
||||
paddleocr: {
|
||||
apiUrl: ''
|
||||
} satisfies OcrPpocrConfig,
|
||||
ovocr: {
|
||||
langs: isWin ? ['en-us', 'zh-cn'] : undefined
|
||||
} satisfies OcrOvConfig
|
||||
} as const satisfies Record<BuiltinOcrProviderId, any>
|
||||
|
||||
export const BUILTIN_OCR_PROVIDERS_MAP = {
|
||||
tesseract,
|
||||
system: systemOcr,
|
||||
paddleocr: ppocrOcr,
|
||||
ovocr: ovOcr
|
||||
} as const satisfies Record<BuiltinOcrProviderId, BuiltinOcrProvider>
|
||||
|
||||
export const BUILTIN_OCR_PROVIDERS: BuiltinOcrProvider[] = Object.values(BUILTIN_OCR_PROVIDERS_MAP)
|
||||
|
||||
export const DEFAULT_OCR_PROVIDER = {
|
||||
image: isWin || isMac ? systemOcr : tesseract
|
||||
} as const satisfies Record<OcrProviderCapability, BuiltinOcrProvider>
|
||||
|
||||
export const TESSERACT_LANG_MAP: Record<TranslateLanguageCode, TesseractLangCode> = {
|
||||
'af-za': 'afr',
|
||||
'am-et': 'amh',
|
||||
'ar-sa': 'ara',
|
||||
'as-in': 'asm',
|
||||
'az-az': 'aze',
|
||||
'az-cyrl-az': 'aze_cyrl',
|
||||
'be-by': 'bel',
|
||||
'bn-bd': 'ben',
|
||||
'bo-cn': 'bod',
|
||||
'bs-ba': 'bos',
|
||||
'bg-bg': 'bul',
|
||||
'ca-es': 'cat',
|
||||
'ceb-ph': 'ceb',
|
||||
'cs-cz': 'ces',
|
||||
'zh-cn': 'chi_sim',
|
||||
'zh-tw': 'chi_tra',
|
||||
'chr-us': 'chr',
|
||||
'cy-gb': 'cym',
|
||||
'da-dk': 'dan',
|
||||
'de-de': 'deu',
|
||||
'dz-bt': 'dzo',
|
||||
'el-gr': 'ell',
|
||||
'en-us': 'eng',
|
||||
'enm-gb': 'enm',
|
||||
'eo-world': 'epo',
|
||||
'et-ee': 'est',
|
||||
'eu-es': 'eus',
|
||||
'fa-ir': 'fas',
|
||||
'fi-fi': 'fin',
|
||||
'fr-fr': 'fra',
|
||||
'frk-de': 'frk',
|
||||
'frm-fr': 'frm',
|
||||
'ga-ie': 'gle',
|
||||
'gl-es': 'glg',
|
||||
'grc-gr': 'grc',
|
||||
'gu-in': 'guj',
|
||||
'ht-ht': 'hat',
|
||||
'he-il': 'heb',
|
||||
'hi-in': 'hin',
|
||||
'hr-hr': 'hrv',
|
||||
'hu-hu': 'hun',
|
||||
'iu-ca': 'iku',
|
||||
'id-id': 'ind',
|
||||
'is-is': 'isl',
|
||||
'it-it': 'ita',
|
||||
'ita-it': 'ita_old',
|
||||
'jv-id': 'jav',
|
||||
'ja-jp': 'jpn',
|
||||
'kn-in': 'kan',
|
||||
'ka-ge': 'kat',
|
||||
'kat-ge': 'kat_old',
|
||||
'kk-kz': 'kaz',
|
||||
'km-kh': 'khm',
|
||||
'ky-kg': 'kir',
|
||||
'ko-kr': 'kor',
|
||||
'ku-tr': 'kur',
|
||||
'la-la': 'lao',
|
||||
'la-va': 'lat',
|
||||
'lv-lv': 'lav',
|
||||
'lt-lt': 'lit',
|
||||
'ml-in': 'mal',
|
||||
'mr-in': 'mar',
|
||||
'mk-mk': 'mkd',
|
||||
'mt-mt': 'mlt',
|
||||
'ms-my': 'msa',
|
||||
'my-mm': 'mya',
|
||||
'ne-np': 'nep',
|
||||
'nl-nl': 'nld',
|
||||
'no-no': 'nor',
|
||||
'or-in': 'ori',
|
||||
'pa-in': 'pan',
|
||||
'pl-pl': 'pol',
|
||||
'pt-pt': 'por',
|
||||
'ps-af': 'pus',
|
||||
'ro-ro': 'ron',
|
||||
'ru-ru': 'rus',
|
||||
'sa-in': 'san',
|
||||
'si-lk': 'sin',
|
||||
'sk-sk': 'slk',
|
||||
'sl-si': 'slv',
|
||||
'es-es': 'spa',
|
||||
'spa-es': 'spa_old',
|
||||
'sq-al': 'sqi',
|
||||
'sr-rs': 'srp',
|
||||
'sr-latn-rs': 'srp_latn',
|
||||
'sw-tz': 'swa',
|
||||
'sv-se': 'swe',
|
||||
'syr-sy': 'syr',
|
||||
'ta-in': 'tam',
|
||||
'te-in': 'tel',
|
||||
'tg-tj': 'tgk',
|
||||
'tl-ph': 'tgl',
|
||||
'th-th': 'tha',
|
||||
'ti-er': 'tir',
|
||||
'tr-tr': 'tur',
|
||||
'ug-cn': 'uig',
|
||||
'uk-ua': 'ukr',
|
||||
'ur-pk': 'urd',
|
||||
'uz-uz': 'uzb',
|
||||
'uz-cyrl-uz': 'uzb_cyrl',
|
||||
'vi-vn': 'vie',
|
||||
'yi-us': 'yid'
|
||||
}
|
||||
|
||||
@ -4,12 +4,12 @@ import { loggerService } from '@logger'
|
||||
import IntelLogo from '@renderer/assets/images/providers/intel.png'
|
||||
import PaddleocrLogo from '@renderer/assets/images/providers/paddleocr.png'
|
||||
import TesseractLogo from '@renderer/assets/images/providers/Tesseract.js.png'
|
||||
import { BUILTIN_OCR_PROVIDERS_MAP } from '@renderer/config/ocr'
|
||||
import { getBuiltinOcrProviderLabel } from '@renderer/i18n/label'
|
||||
import { useAppSelector } from '@renderer/store'
|
||||
import { addOcrProvider, removeOcrProvider, updateOcrProviderConfig } from '@renderer/store/ocr'
|
||||
import type { OcrProvider, OcrProviderConfig } from '@renderer/types'
|
||||
import { isBuiltinOcrProvider, isBuiltinOcrProviderId, isImageOcrProvider } from '@renderer/types'
|
||||
import { BUILTIN_OCR_PROVIDERS_MAP } from '@shared/config/ocr'
|
||||
import { FileQuestionMarkIcon, MonitorIcon } from 'lucide-react'
|
||||
import { useCallback, useMemo } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
|
||||
@ -2,11 +2,11 @@
|
||||
import { Flex } from '@cherrystudio/ui'
|
||||
import { InfoTooltip } from '@cherrystudio/ui'
|
||||
import CustomTag from '@renderer/components/Tags/CustomTag'
|
||||
import { TESSERACT_LANG_MAP } from '@renderer/config/ocr'
|
||||
import { useOcrProvider } from '@renderer/hooks/useOcrProvider'
|
||||
import useTranslate from '@renderer/hooks/useTranslate'
|
||||
import type { TesseractLangCode } from '@renderer/types'
|
||||
import { BuiltinOcrProviderIds, isOcrTesseractProvider } from '@renderer/types'
|
||||
import { TESSERACT_LANG_MAP } from '@shared/config/ocr'
|
||||
import { Select } from 'antd'
|
||||
import { useCallback, useMemo, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
|
||||
@ -11,12 +11,7 @@ import {
|
||||
isNotSupportedTextDelta,
|
||||
SYSTEM_MODELS
|
||||
} from '@renderer/config/models'
|
||||
import {
|
||||
BUILTIN_OCR_PROVIDER_CONFIG_MAP,
|
||||
BUILTIN_OCR_PROVIDERS,
|
||||
BUILTIN_OCR_PROVIDERS_MAP,
|
||||
DEFAULT_OCR_PROVIDER
|
||||
} from '@renderer/config/ocr'
|
||||
import { DEFAULT_OCR_PROVIDER } from '@renderer/config/ocr'
|
||||
import {
|
||||
isSupportArrayContentProvider,
|
||||
isSupportDeveloperRoleProvider,
|
||||
@ -39,6 +34,8 @@ import type {
|
||||
import { isBuiltinOcrProvider, isSystemProvider, SystemProviderIds } from '@renderer/types'
|
||||
import { getDefaultGroupName, getLeadingEmoji, runAsyncFunction, uuid } from '@renderer/utils'
|
||||
import { defaultByPassRules } from '@shared/config/constant'
|
||||
import { BUILTIN_OCR_PROVIDERS } from '@shared/config/ocr'
|
||||
import { BUILTIN_OCR_PROVIDER_CONFIG_MAP, BUILTIN_OCR_PROVIDERS_MAP } from '@shared/config/ocr'
|
||||
import { TRANSLATE_PROMPT } from '@shared/config/prompts'
|
||||
import { DefaultPreferences } from '@shared/data/preference/preferenceSchemas'
|
||||
import { UpgradeChannel } from '@shared/data/preference/preferenceTypes'
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
import type { PayloadAction } from '@reduxjs/toolkit'
|
||||
import { createSlice } from '@reduxjs/toolkit'
|
||||
import { BUILTIN_OCR_PROVIDER_CONFIG_MAP, BUILTIN_OCR_PROVIDERS, DEFAULT_OCR_PROVIDER } from '@renderer/config/ocr'
|
||||
import { DEFAULT_OCR_PROVIDER } from '@renderer/config/ocr'
|
||||
import type { BuiltinOcrProviderId, OcrProvider, OcrProviderConfig } from '@renderer/types'
|
||||
import { BUILTIN_OCR_PROVIDERS } from '@shared/config/ocr'
|
||||
import { BUILTIN_OCR_PROVIDER_CONFIG_MAP } from '@shared/config/ocr'
|
||||
|
||||
export interface OcrState {
|
||||
providers: OcrProvider[]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user