From e640beb8743b82ae1fd1450be5076a908b4e7c76 Mon Sep 17 00:00:00 2001 From: icarus Date: Mon, 20 Oct 2025 01:37:00 +0800 Subject: [PATCH] refactor(ocr): move ocr config to shared package for reuse Centralize OCR configuration in shared package to avoid duplication and improve maintainability. This change affects multiple components that previously imported from renderer config. --- packages/shared/config/ocr.ts | 182 +++++++++++++++++ src/renderer/src/config/ocr.ts | 185 +----------------- src/renderer/src/hooks/useOcrProvider.tsx | 2 +- .../OcrTesseractSettings.tsx | 2 +- src/renderer/src/store/migrate.ts | 9 +- src/renderer/src/store/ocr.ts | 4 +- 6 files changed, 192 insertions(+), 192 deletions(-) create mode 100644 packages/shared/config/ocr.ts diff --git a/packages/shared/config/ocr.ts b/packages/shared/config/ocr.ts new file mode 100644 index 0000000000..2167bae8c1 --- /dev/null +++ b/packages/shared/config/ocr.ts @@ -0,0 +1,182 @@ +import type { + BuiltinOcrProvider, + BuiltinOcrProviderId, + OcrOvConfig, + OcrOvProvider, + OcrPpocrConfig, + OcrPpocrProvider, + OcrSystemConfig, + OcrSystemProvider, + OcrTesseractConfig, + OcrTesseractProvider, + TesseractLangCode, + TranslateLanguageCode +} from '@types' + +export const tesseract: OcrTesseractProvider = { + id: 'tesseract', + name: 'Tesseract', + capabilities: { + image: true + } +} as const + +export const systemOcr: OcrSystemProvider = { + id: 'system', + name: 'System', + capabilities: { + image: true + // pdf: true + } +} as const satisfies OcrSystemProvider + +export const ppocrOcr: OcrPpocrProvider = { + id: 'paddleocr', + name: 'PaddleOCR', + capabilities: { + image: true + // pdf: true + } +} as const + +export const ovOcr: OcrOvProvider = { + id: 'ovocr', + name: 'Intel OV(NPU) OCR', + capabilities: { + image: true + // pdf: true + } +} as const satisfies OcrOvProvider + +export const BUILTIN_OCR_PROVIDER_CONFIG_MAP = { + tesseract: { + langs: { + chi_sim: true, + chi_tra: true, + eng: true + } + } satisfies OcrTesseractConfig, + system: { + langs: ['en-us'] + } satisfies OcrSystemConfig, + paddleocr: { + apiUrl: '' + } satisfies OcrPpocrConfig, + ovocr: { + langs: ['en-us', 'zh-cn'] + } satisfies OcrOvConfig +} as const satisfies Record + +export const BUILTIN_OCR_PROVIDERS_MAP = { + tesseract, + system: systemOcr, + paddleocr: ppocrOcr, + ovocr: ovOcr +} as const satisfies Record + +export const BUILTIN_OCR_PROVIDERS: BuiltinOcrProvider[] = Object.values(BUILTIN_OCR_PROVIDERS_MAP) + +export const TESSERACT_LANG_MAP: Record = { + 'af-za': 'afr', + 'am-et': 'amh', + 'ar-sa': 'ara', + 'as-in': 'asm', + 'az-az': 'aze', + 'az-cyrl-az': 'aze_cyrl', + 'be-by': 'bel', + 'bn-bd': 'ben', + 'bo-cn': 'bod', + 'bs-ba': 'bos', + 'bg-bg': 'bul', + 'ca-es': 'cat', + 'ceb-ph': 'ceb', + 'cs-cz': 'ces', + 'zh-cn': 'chi_sim', + 'zh-tw': 'chi_tra', + 'chr-us': 'chr', + 'cy-gb': 'cym', + 'da-dk': 'dan', + 'de-de': 'deu', + 'dz-bt': 'dzo', + 'el-gr': 'ell', + 'en-us': 'eng', + 'enm-gb': 'enm', + 'eo-world': 'epo', + 'et-ee': 'est', + 'eu-es': 'eus', + 'fa-ir': 'fas', + 'fi-fi': 'fin', + 'fr-fr': 'fra', + 'frk-de': 'frk', + 'frm-fr': 'frm', + 'ga-ie': 'gle', + 'gl-es': 'glg', + 'grc-gr': 'grc', + 'gu-in': 'guj', + 'ht-ht': 'hat', + 'he-il': 'heb', + 'hi-in': 'hin', + 'hr-hr': 'hrv', + 'hu-hu': 'hun', + 'iu-ca': 'iku', + 'id-id': 'ind', + 'is-is': 'isl', + 'it-it': 'ita', + 'ita-it': 'ita_old', + 'jv-id': 'jav', + 'ja-jp': 'jpn', + 'kn-in': 'kan', + 'ka-ge': 'kat', + 'kat-ge': 'kat_old', + 'kk-kz': 'kaz', + 'km-kh': 'khm', + 'ky-kg': 'kir', + 'ko-kr': 'kor', + 'ku-tr': 'kur', + 'la-la': 'lao', + 'la-va': 'lat', + 'lv-lv': 'lav', + 'lt-lt': 'lit', + 'ml-in': 'mal', + 'mr-in': 'mar', + 'mk-mk': 'mkd', + 'mt-mt': 'mlt', + 'ms-my': 'msa', + 'my-mm': 'mya', + 'ne-np': 'nep', + 'nl-nl': 'nld', + 'no-no': 'nor', + 'or-in': 'ori', + 'pa-in': 'pan', + 'pl-pl': 'pol', + 'pt-pt': 'por', + 'ps-af': 'pus', + 'ro-ro': 'ron', + 'ru-ru': 'rus', + 'sa-in': 'san', + 'si-lk': 'sin', + 'sk-sk': 'slk', + 'sl-si': 'slv', + 'es-es': 'spa', + 'spa-es': 'spa_old', + 'sq-al': 'sqi', + 'sr-rs': 'srp', + 'sr-latn-rs': 'srp_latn', + 'sw-tz': 'swa', + 'sv-se': 'swe', + 'syr-sy': 'syr', + 'ta-in': 'tam', + 'te-in': 'tel', + 'tg-tj': 'tgk', + 'tl-ph': 'tgl', + 'th-th': 'tha', + 'ti-er': 'tir', + 'tr-tr': 'tur', + 'ug-cn': 'uig', + 'uk-ua': 'ukr', + 'ur-pk': 'urd', + 'uz-uz': 'uzb', + 'uz-cyrl-uz': 'uzb_cyrl', + 'vi-vn': 'vie', + 'yi-us': 'yid' +} diff --git a/src/renderer/src/config/ocr.ts b/src/renderer/src/config/ocr.ts index 3432dbba73..692cb273b8 100644 --- a/src/renderer/src/config/ocr.ts +++ b/src/renderer/src/config/ocr.ts @@ -1,189 +1,8 @@ -import type { - BuiltinOcrProvider, - BuiltinOcrProviderId, - OcrOvConfig, - OcrOvProvider, - OcrPpocrConfig, - OcrPpocrProvider, - OcrProviderCapability, - OcrSystemConfig, - OcrSystemProvider, - OcrTesseractConfig, - OcrTesseractProvider, - TesseractLangCode, - TranslateLanguageCode -} from '@renderer/types' +import type { BuiltinOcrProvider, OcrProviderCapability } from '@renderer/types' +import { systemOcr, tesseract } from '@shared/config/ocr' import { isMac, isWin } from './constant' -const tesseract: OcrTesseractProvider = { - id: 'tesseract', - name: 'Tesseract', - capabilities: { - image: true - } -} as const - -const systemOcr: OcrSystemProvider = { - id: 'system', - name: 'System', - capabilities: { - image: true - // pdf: true - } -} as const satisfies OcrSystemProvider - -const ppocrOcr: OcrPpocrProvider = { - id: 'paddleocr', - name: 'PaddleOCR', - capabilities: { - image: true - // pdf: true - } -} as const - -const ovOcr: OcrOvProvider = { - id: 'ovocr', - name: 'Intel OV(NPU) OCR', - capabilities: { - image: true - // pdf: true - } -} as const satisfies OcrOvProvider - -export const BUILTIN_OCR_PROVIDER_CONFIG_MAP = { - tesseract: { - langs: { - chi_sim: true, - chi_tra: true, - eng: true - } - } satisfies OcrTesseractConfig, - system: { - langs: isWin ? ['en-us'] : undefined - } satisfies OcrSystemConfig, - paddleocr: { - apiUrl: '' - } satisfies OcrPpocrConfig, - ovocr: { - langs: isWin ? ['en-us', 'zh-cn'] : undefined - } satisfies OcrOvConfig -} as const satisfies Record - -export const BUILTIN_OCR_PROVIDERS_MAP = { - tesseract, - system: systemOcr, - paddleocr: ppocrOcr, - ovocr: ovOcr -} as const satisfies Record - -export const BUILTIN_OCR_PROVIDERS: BuiltinOcrProvider[] = Object.values(BUILTIN_OCR_PROVIDERS_MAP) - export const DEFAULT_OCR_PROVIDER = { image: isWin || isMac ? systemOcr : tesseract } as const satisfies Record - -export const TESSERACT_LANG_MAP: Record = { - 'af-za': 'afr', - 'am-et': 'amh', - 'ar-sa': 'ara', - 'as-in': 'asm', - 'az-az': 'aze', - 'az-cyrl-az': 'aze_cyrl', - 'be-by': 'bel', - 'bn-bd': 'ben', - 'bo-cn': 'bod', - 'bs-ba': 'bos', - 'bg-bg': 'bul', - 'ca-es': 'cat', - 'ceb-ph': 'ceb', - 'cs-cz': 'ces', - 'zh-cn': 'chi_sim', - 'zh-tw': 'chi_tra', - 'chr-us': 'chr', - 'cy-gb': 'cym', - 'da-dk': 'dan', - 'de-de': 'deu', - 'dz-bt': 'dzo', - 'el-gr': 'ell', - 'en-us': 'eng', - 'enm-gb': 'enm', - 'eo-world': 'epo', - 'et-ee': 'est', - 'eu-es': 'eus', - 'fa-ir': 'fas', - 'fi-fi': 'fin', - 'fr-fr': 'fra', - 'frk-de': 'frk', - 'frm-fr': 'frm', - 'ga-ie': 'gle', - 'gl-es': 'glg', - 'grc-gr': 'grc', - 'gu-in': 'guj', - 'ht-ht': 'hat', - 'he-il': 'heb', - 'hi-in': 'hin', - 'hr-hr': 'hrv', - 'hu-hu': 'hun', - 'iu-ca': 'iku', - 'id-id': 'ind', - 'is-is': 'isl', - 'it-it': 'ita', - 'ita-it': 'ita_old', - 'jv-id': 'jav', - 'ja-jp': 'jpn', - 'kn-in': 'kan', - 'ka-ge': 'kat', - 'kat-ge': 'kat_old', - 'kk-kz': 'kaz', - 'km-kh': 'khm', - 'ky-kg': 'kir', - 'ko-kr': 'kor', - 'ku-tr': 'kur', - 'la-la': 'lao', - 'la-va': 'lat', - 'lv-lv': 'lav', - 'lt-lt': 'lit', - 'ml-in': 'mal', - 'mr-in': 'mar', - 'mk-mk': 'mkd', - 'mt-mt': 'mlt', - 'ms-my': 'msa', - 'my-mm': 'mya', - 'ne-np': 'nep', - 'nl-nl': 'nld', - 'no-no': 'nor', - 'or-in': 'ori', - 'pa-in': 'pan', - 'pl-pl': 'pol', - 'pt-pt': 'por', - 'ps-af': 'pus', - 'ro-ro': 'ron', - 'ru-ru': 'rus', - 'sa-in': 'san', - 'si-lk': 'sin', - 'sk-sk': 'slk', - 'sl-si': 'slv', - 'es-es': 'spa', - 'spa-es': 'spa_old', - 'sq-al': 'sqi', - 'sr-rs': 'srp', - 'sr-latn-rs': 'srp_latn', - 'sw-tz': 'swa', - 'sv-se': 'swe', - 'syr-sy': 'syr', - 'ta-in': 'tam', - 'te-in': 'tel', - 'tg-tj': 'tgk', - 'tl-ph': 'tgl', - 'th-th': 'tha', - 'ti-er': 'tir', - 'tr-tr': 'tur', - 'ug-cn': 'uig', - 'uk-ua': 'ukr', - 'ur-pk': 'urd', - 'uz-uz': 'uzb', - 'uz-cyrl-uz': 'uzb_cyrl', - 'vi-vn': 'vie', - 'yi-us': 'yid' -} diff --git a/src/renderer/src/hooks/useOcrProvider.tsx b/src/renderer/src/hooks/useOcrProvider.tsx index 93a49c301a..c0a650f78e 100644 --- a/src/renderer/src/hooks/useOcrProvider.tsx +++ b/src/renderer/src/hooks/useOcrProvider.tsx @@ -4,12 +4,12 @@ import { loggerService } from '@logger' import IntelLogo from '@renderer/assets/images/providers/intel.png' import PaddleocrLogo from '@renderer/assets/images/providers/paddleocr.png' import TesseractLogo from '@renderer/assets/images/providers/Tesseract.js.png' -import { BUILTIN_OCR_PROVIDERS_MAP } from '@renderer/config/ocr' import { getBuiltinOcrProviderLabel } from '@renderer/i18n/label' import { useAppSelector } from '@renderer/store' import { addOcrProvider, removeOcrProvider, updateOcrProviderConfig } from '@renderer/store/ocr' import type { OcrProvider, OcrProviderConfig } from '@renderer/types' import { isBuiltinOcrProvider, isBuiltinOcrProviderId, isImageOcrProvider } from '@renderer/types' +import { BUILTIN_OCR_PROVIDERS_MAP } from '@shared/config/ocr' import { FileQuestionMarkIcon, MonitorIcon } from 'lucide-react' import { useCallback, useMemo } from 'react' import { useTranslation } from 'react-i18next' diff --git a/src/renderer/src/pages/settings/DocProcessSettings/OcrTesseractSettings.tsx b/src/renderer/src/pages/settings/DocProcessSettings/OcrTesseractSettings.tsx index 0cf8ef9eb9..03ca08d8c4 100644 --- a/src/renderer/src/pages/settings/DocProcessSettings/OcrTesseractSettings.tsx +++ b/src/renderer/src/pages/settings/DocProcessSettings/OcrTesseractSettings.tsx @@ -2,11 +2,11 @@ import { Flex } from '@cherrystudio/ui' import { InfoTooltip } from '@cherrystudio/ui' import CustomTag from '@renderer/components/Tags/CustomTag' -import { TESSERACT_LANG_MAP } from '@renderer/config/ocr' import { useOcrProvider } from '@renderer/hooks/useOcrProvider' import useTranslate from '@renderer/hooks/useTranslate' import type { TesseractLangCode } from '@renderer/types' import { BuiltinOcrProviderIds, isOcrTesseractProvider } from '@renderer/types' +import { TESSERACT_LANG_MAP } from '@shared/config/ocr' import { Select } from 'antd' import { useCallback, useMemo, useState } from 'react' import { useTranslation } from 'react-i18next' diff --git a/src/renderer/src/store/migrate.ts b/src/renderer/src/store/migrate.ts index 43ef87d914..afbf9782f4 100644 --- a/src/renderer/src/store/migrate.ts +++ b/src/renderer/src/store/migrate.ts @@ -11,12 +11,7 @@ import { isNotSupportedTextDelta, SYSTEM_MODELS } from '@renderer/config/models' -import { - BUILTIN_OCR_PROVIDER_CONFIG_MAP, - BUILTIN_OCR_PROVIDERS, - BUILTIN_OCR_PROVIDERS_MAP, - DEFAULT_OCR_PROVIDER -} from '@renderer/config/ocr' +import { DEFAULT_OCR_PROVIDER } from '@renderer/config/ocr' import { isSupportArrayContentProvider, isSupportDeveloperRoleProvider, @@ -39,6 +34,8 @@ import type { import { isBuiltinOcrProvider, isSystemProvider, SystemProviderIds } from '@renderer/types' import { getDefaultGroupName, getLeadingEmoji, runAsyncFunction, uuid } from '@renderer/utils' import { defaultByPassRules } from '@shared/config/constant' +import { BUILTIN_OCR_PROVIDERS } from '@shared/config/ocr' +import { BUILTIN_OCR_PROVIDER_CONFIG_MAP, BUILTIN_OCR_PROVIDERS_MAP } from '@shared/config/ocr' import { TRANSLATE_PROMPT } from '@shared/config/prompts' import { DefaultPreferences } from '@shared/data/preference/preferenceSchemas' import { UpgradeChannel } from '@shared/data/preference/preferenceTypes' diff --git a/src/renderer/src/store/ocr.ts b/src/renderer/src/store/ocr.ts index 308fe5c830..3fd8939984 100644 --- a/src/renderer/src/store/ocr.ts +++ b/src/renderer/src/store/ocr.ts @@ -1,7 +1,9 @@ import type { PayloadAction } from '@reduxjs/toolkit' import { createSlice } from '@reduxjs/toolkit' -import { BUILTIN_OCR_PROVIDER_CONFIG_MAP, BUILTIN_OCR_PROVIDERS, DEFAULT_OCR_PROVIDER } from '@renderer/config/ocr' +import { DEFAULT_OCR_PROVIDER } from '@renderer/config/ocr' import type { BuiltinOcrProviderId, OcrProvider, OcrProviderConfig } from '@renderer/types' +import { BUILTIN_OCR_PROVIDERS } from '@shared/config/ocr' +import { BUILTIN_OCR_PROVIDER_CONFIG_MAP } from '@shared/config/ocr' export interface OcrState { providers: OcrProvider[]