From 37ad896f6ae696b4bca45cb512979a3b7c1a79b2 Mon Sep 17 00:00:00 2001 From: icarus Date: Mon, 20 Oct 2025 06:47:02 +0800 Subject: [PATCH] refactor(ocr): restructure OCR provider configuration and types - Remove separate configs from store and move them into provider definitions - Add Zod schemas for OCR provider types and configurations - Update migration to use new provider structure - Make OCR provider config non-nullable in database schema - Clean up unused OCR preference settings --- packages/shared/config/ocr.ts | 43 ++++----- .../data/preference/preferenceSchemas.ts | 14 --- src/main/data/db/schemas/ocr/provider.ts | 3 +- .../CustomLanguageModal.tsx | 3 + src/renderer/src/store/index.ts | 2 +- src/renderer/src/store/migrate.ts | 39 +------- src/renderer/src/store/ocr.ts | 36 +++---- src/renderer/src/types/index.ts | 13 ++- src/renderer/src/types/ocr.ts | 93 ++++++++++++------- 9 files changed, 115 insertions(+), 131 deletions(-) diff --git a/packages/shared/config/ocr.ts b/packages/shared/config/ocr.ts index 788242eca3..bb43f558f1 100644 --- a/packages/shared/config/ocr.ts +++ b/packages/shared/config/ocr.ts @@ -1,13 +1,9 @@ import type { BuiltinOcrProvider, BuiltinOcrProviderId, - OcrOvConfig, OcrOvProvider, - OcrPpocrConfig, OcrPpocrProvider, - OcrSystemConfig, OcrSystemProvider, - OcrTesseractConfig, OcrTesseractProvider, TesseractLangCode, TranslateLanguageCode @@ -18,6 +14,14 @@ export const tesseract: OcrTesseractProvider = { name: 'Tesseract', capabilities: { image: true + }, + config: { + langs: { + chi_sim: true, + chi_tra: true, + eng: true + }, + enabled: false } } as const @@ -27,6 +31,10 @@ export const systemOcr: OcrSystemProvider = { capabilities: { image: true // pdf: true + }, + config: { + langs: ['en-us'], + enabled: false } } as const satisfies OcrSystemProvider @@ -36,7 +44,8 @@ export const ppocrOcr: OcrPpocrProvider = { capabilities: { image: true // pdf: true - } + }, + config: { apiUrl: '', enabled: false } } as const export const ovOcr: OcrOvProvider = { @@ -45,34 +54,20 @@ export const ovOcr: OcrOvProvider = { capabilities: { image: true // pdf: true + }, + config: { + enabled: false } } as const satisfies OcrOvProvider -export const BUILTIN_OCR_PROVIDER_CONFIG_MAP = { - tesseract: { - langs: { - chi_sim: true, - chi_tra: true, - eng: true - } - } satisfies OcrTesseractConfig, - system: { - langs: ['en-us'] - } satisfies OcrSystemConfig, - paddleocr: { - apiUrl: '' - } satisfies OcrPpocrConfig, - ovocr: {} satisfies OcrOvConfig -} as const satisfies Record - -export const BUILTIN_OCR_PROVIDERS_MAP = { +export const INITIAL_BUILTIN_OCR_PROVIDER_MAP = { tesseract, system: systemOcr, paddleocr: ppocrOcr, ovocr: ovOcr } as const satisfies Record -export const BUILTIN_OCR_PROVIDERS: BuiltinOcrProvider[] = Object.values(BUILTIN_OCR_PROVIDERS_MAP) +export const BUILTIN_OCR_PROVIDERS: BuiltinOcrProvider[] = Object.values(INITIAL_BUILTIN_OCR_PROVIDER_MAP) export const TESSERACT_LANG_MAP: Record = { 'af-za': 'afr', diff --git a/packages/shared/data/preference/preferenceSchemas.ts b/packages/shared/data/preference/preferenceSchemas.ts index 08a8c12ba3..43ddfd6655 100644 --- a/packages/shared/data/preference/preferenceSchemas.ts +++ b/packages/shared/data/preference/preferenceSchemas.ts @@ -9,10 +9,8 @@ * === AUTO-GENERATED CONTENT START === */ -import { BUILTIN_OCR_PROVIDER_CONFIG_MAP } from '@shared/config/ocr' import { TRANSLATE_PROMPT } from '@shared/config/prompts' import * as PreferenceTypes from '@shared/data/preference/preferenceTypes' -import type { OcrOvConfig, OcrPpocrConfig, OcrSystemConfig, OcrTesseractConfig } from '@types' /* eslint @typescript-eslint/member-ordering: ["error", { "interfaces": { "order": "alphabetically" }, @@ -353,14 +351,6 @@ export interface PreferenceSchemas { 'feature.translate.model_prompt': string // redux/settings/targetLanguage 'feature.translate.target_language': string - // redux/ocr/configs/ovocr - 'ocr.provider.config.ovocr': OcrOvConfig - // redux/ocr/configs/paddleocr - 'ocr.provider.config.paddleocr': OcrPpocrConfig - // redux/ocr/configs/system - 'ocr.provider.config.system': OcrSystemConfig - // redux/ocr/configs/tesseract - 'ocr.provider.config.tesseract': OcrTesseractConfig // redux/ocr/imageProviderId 'ocr.settings.image_provider_id': string | null // redux/shortcuts/shortcuts.exit_fullscreen @@ -624,10 +614,6 @@ export const DefaultPreferences: PreferenceSchemas = { 'feature.selection.trigger_mode': PreferenceTypes.SelectionTriggerMode.Selected, 'feature.translate.model_prompt': TRANSLATE_PROMPT, 'feature.translate.target_language': 'en-us', - 'ocr.provider.config.ovocr': BUILTIN_OCR_PROVIDER_CONFIG_MAP.ovocr, - 'ocr.provider.config.paddleocr': BUILTIN_OCR_PROVIDER_CONFIG_MAP.paddleocr, - 'ocr.provider.config.system': BUILTIN_OCR_PROVIDER_CONFIG_MAP.system, - 'ocr.provider.config.tesseract': BUILTIN_OCR_PROVIDER_CONFIG_MAP.tesseract, 'ocr.settings.image_provider_id': null, 'shortcut.app.exit_fullscreen': { editable: false, enabled: true, key: ['Escape'], system: true }, 'shortcut.app.search_message': { diff --git a/src/main/data/db/schemas/ocr/provider.ts b/src/main/data/db/schemas/ocr/provider.ts index 75b2281ec2..a7bb078613 100644 --- a/src/main/data/db/schemas/ocr/provider.ts +++ b/src/main/data/db/schemas/ocr/provider.ts @@ -36,9 +36,8 @@ export const ocrProviderTable = sqliteTable( * Since this is a polymorphic field, both frontend and backend must validate * that the structure matches the expected schema for the corresponding provider type * before saving. - * This field is nullable because `config` in the `OcrProvider` type is optional. */ - config: text('config', { mode: 'json' }).$type(), + config: text('config', { mode: 'json' }).$type().notNull(), /** Timestamps. May not useful. */ ...createUpdateTimestamps diff --git a/src/renderer/src/pages/settings/TranslateSettingsPopup/CustomLanguageModal.tsx b/src/renderer/src/pages/settings/TranslateSettingsPopup/CustomLanguageModal.tsx index 34e063fcaa..10c1508adf 100644 --- a/src/renderer/src/pages/settings/TranslateSettingsPopup/CustomLanguageModal.tsx +++ b/src/renderer/src/pages/settings/TranslateSettingsPopup/CustomLanguageModal.tsx @@ -142,6 +142,9 @@ const CustomLanguageModal = ({ isOpen, editingCustomLanguage, onAdd, onEdit, onC rules={[ { required: true, message: t('settings.translate.custom.error.langCode.empty') }, { + // TODO: use TranslateLanguageCodeSchema here. Modify it when migrating to new UI. + // NOTE: Since any uppercase will be converted to lowercase before it's save to indexedDB, + // it's safe to replace it with case sensitive TranslateLanguageCodeSchema. pattern: /^[a-zA-Z]{2,3}(-[a-zA-Z]{2,3})?$/, message: t('settings.translate.custom.error.langCode.invalid') }, diff --git a/src/renderer/src/store/index.ts b/src/renderer/src/store/index.ts index 402b444a23..30d5dfe309 100644 --- a/src/renderer/src/store/index.ts +++ b/src/renderer/src/store/index.ts @@ -69,7 +69,7 @@ const persistedReducer = persistReducer( { key: 'cherry-studio', storage, - version: 164, + version: 163, blacklist: ['runtime', 'messages', 'messageBlocks', 'tabs'], migrate }, diff --git a/src/renderer/src/store/migrate.ts b/src/renderer/src/store/migrate.ts index d79367b557..c162dd7b7a 100644 --- a/src/renderer/src/store/migrate.ts +++ b/src/renderer/src/store/migrate.ts @@ -30,12 +30,12 @@ import type { TranslateLanguageCode, WebSearchProvider } from '@renderer/types' -import { isBuiltinOcrProvider, isSystemProvider, SystemProviderIds } from '@renderer/types' +import { isSystemProvider, SystemProviderIds } from '@renderer/types' import { getDefaultGroupName, getLeadingEmoji, runAsyncFunction, uuid } from '@renderer/utils' import { getDefaultOcrProvider } from '@renderer/utils/ocr' import { defaultByPassRules } from '@shared/config/constant' import { BUILTIN_OCR_PROVIDERS } from '@shared/config/ocr' -import { BUILTIN_OCR_PROVIDER_CONFIG_MAP, BUILTIN_OCR_PROVIDERS_MAP } from '@shared/config/ocr' +import { INITIAL_BUILTIN_OCR_PROVIDER_MAP } from '@shared/config/ocr' import { TRANSLATE_PROMPT } from '@shared/config/prompts' import { DefaultPreferences } from '@shared/data/preference/preferenceSchemas' import { UpgradeChannel } from '@shared/data/preference/preferenceTypes' @@ -2235,7 +2235,6 @@ const migrateConfig = { }, '137': (state: RootState) => { try { - // @ts-expect-error old migration state.ocr = { providers: BUILTIN_OCR_PROVIDERS, imageProviderId: getDefaultOcrProvider('image').id @@ -2249,7 +2248,7 @@ const migrateConfig = { }, '138': (state: RootState) => { try { - addOcrProvider(state, BUILTIN_OCR_PROVIDERS_MAP.system) + addOcrProvider(state, INITIAL_BUILTIN_OCR_PROVIDER_MAP.system) return state } catch (error) { logger.error('migrate 138 error', error as Error) @@ -2428,7 +2427,7 @@ const migrateConfig = { }, '148': (state: RootState) => { try { - addOcrProvider(state, BUILTIN_OCR_PROVIDERS_MAP.paddleocr) + addOcrProvider(state, INITIAL_BUILTIN_OCR_PROVIDER_MAP.paddleocr) return state } catch (error) { logger.error('migrate 148 error', error as Error) @@ -2677,7 +2676,7 @@ const migrateConfig = { }, '163': (state: RootState) => { try { - addOcrProvider(state, BUILTIN_OCR_PROVIDERS_MAP.ovocr) + addOcrProvider(state, INITIAL_BUILTIN_OCR_PROVIDER_MAP.ovocr) state.llm.providers.forEach((provider) => { if (provider.id === 'cherryin') { provider.anthropicApiHost = 'https://open.cherryin.net' @@ -2688,34 +2687,6 @@ const migrateConfig = { logger.error('migrate 163 error', error as Error) return state } - }, - '164': (state: RootState) => { - try { - state.ocr.providers.forEach((p) => { - if (isBuiltinOcrProvider(p)) { - switch (p.id) { - case 'ovocr': - state.ocr.configs.ovocr = p.config ?? BUILTIN_OCR_PROVIDER_CONFIG_MAP.ovocr - break - case 'paddleocr': - state.ocr.configs.paddleocr = p.config ?? BUILTIN_OCR_PROVIDER_CONFIG_MAP.paddleocr - break - case 'system': - state.ocr.configs.system = p.config ?? BUILTIN_OCR_PROVIDER_CONFIG_MAP.system - break - case 'tesseract': - state.ocr.configs.tesseract = p.config ?? BUILTIN_OCR_PROVIDER_CONFIG_MAP.tesseract - break - default: - logger.warn(`Unknown ocr provider ${p.id}. Skipped.`) - } - } - }) - return state - } catch (error) { - logger.error('migrate 164 error', error as Error) - return state - } } } diff --git a/src/renderer/src/store/ocr.ts b/src/renderer/src/store/ocr.ts index f9e78c79b9..ac2fa5d137 100644 --- a/src/renderer/src/store/ocr.ts +++ b/src/renderer/src/store/ocr.ts @@ -1,24 +1,16 @@ import type { PayloadAction } from '@reduxjs/toolkit' import { createSlice } from '@reduxjs/toolkit' -import type { BuiltinOcrProviderId, OcrProvider, OcrProviderConfig } from '@renderer/types' +import type { OcrProvider } from '@renderer/types' import { getDefaultOcrProvider } from '@renderer/utils/ocr' import { BUILTIN_OCR_PROVIDERS } from '@shared/config/ocr' -import { BUILTIN_OCR_PROVIDER_CONFIG_MAP } from '@shared/config/ocr' export interface OcrState { providers: OcrProvider[] - configs: Record imageProviderId: string } const initialState: OcrState = { providers: BUILTIN_OCR_PROVIDERS, - configs: { - tesseract: BUILTIN_OCR_PROVIDER_CONFIG_MAP.tesseract, - system: BUILTIN_OCR_PROVIDER_CONFIG_MAP.system, - paddleocr: BUILTIN_OCR_PROVIDER_CONFIG_MAP.paddleocr, - ovocr: BUILTIN_OCR_PROVIDER_CONFIG_MAP.ovocr - }, imageProviderId: getDefaultOcrProvider('image').id } @@ -46,18 +38,18 @@ const ocrSlice = createSlice({ Object.assign(state.providers[index], action.payload) } }, - updateOcrProviderConfig( - state, - action: PayloadAction<{ id: string; update: Omit, 'id'> }> - ) { - const index = state.providers.findIndex((provider) => provider.id === action.payload.id) - if (index !== -1) { - if (!state.providers[index].config) { - state.providers[index].config = {} - } - Object.assign(state.providers[index].config, action.payload.update) - } - }, + // updateOcrProviderConfig( + // state, + // action: PayloadAction<{ id: string; update: Omit, 'id'> }> + // ) { + // const index = state.providers.findIndex((provider) => provider.id === action.payload.id) + // if (index !== -1) { + // if (!state.providers[index].config) { + // state.providers[index].config = {} + // } + // Object.assign(state.providers[index].config, action.payload.update) + // } + // }, setImageOcrProviderId(state, action: PayloadAction) { state.imageProviderId = action.payload } @@ -69,7 +61,7 @@ export const { addOcrProvider, removeOcrProvider, updateOcrProvider, - updateOcrProviderConfig, + // updateOcrProviderConfig, setImageOcrProviderId } = ocrSlice.actions diff --git a/src/renderer/src/types/index.ts b/src/renderer/src/types/index.ts index 5a0e5fffbb..40b64fe3ad 100644 --- a/src/renderer/src/types/index.ts +++ b/src/renderer/src/types/index.ts @@ -7,6 +7,8 @@ import type { CSSProperties } from 'react' export * from './file' export * from './note' +import * as z from 'zod' + import type { StreamTextParams } from './aiCoreTypes' import type { Chunk } from './chunk' import type { FileMetadata } from './file' @@ -480,9 +482,14 @@ export type GenerateImageResponse = { images: string[] } -// 为了支持自定义语言,设置为string别名 -/** zh-cn, en-us, etc. */ -export type TranslateLanguageCode = string +/** + * Language code pattern used for translation features. + * Examples: "zh-cn", "en-us", "fr-fr", etc. + * Must be lowercase and follow the format: 2-3 letter language code + * followed by a hyphen and 2-letter region code. + */ +export const TranslateLanguageCodeSchema = z.string().regex(/^[a-z]{2,3}(-[a-z]{2-3})$/) +export type TranslateLanguageCode = z.infer // langCode应当能够唯一确认一种语言 export type TranslateLanguage = { diff --git a/src/renderer/src/types/ocr.ts b/src/renderer/src/types/ocr.ts index d2a0a72d25..fba68752ca 100644 --- a/src/renderer/src/types/ocr.ts +++ b/src/renderer/src/types/ocr.ts @@ -2,7 +2,7 @@ import type Tesseract from 'tesseract.js' import * as z from 'zod' import type { FileMetadata, ImageFileMetadata, TranslateLanguageCode } from '.' -import { isImageFileMetadata } from '.' +import { isImageFileMetadata, TranslateLanguageCodeSchema } from '.' export const BuiltinOcrProviderIds = { tesseract: 'tesseract', @@ -72,33 +72,23 @@ export const isOcrProviderApiConfig = (config: unknown): config is OcrProviderAp * Extend this type to define provider-specific config types. */ export const OcrProviderBaseConfigSchema = z.object({ - /** Not used for now. Could safely remove. */ - api: OcrProviderApiConfigSchema.optional(), - /** Not used for now. Could safely remove. */ - models: z.array(OcrModelSchema).optional(), - /** Not used for now. Could safely remove. */ - enabled: z.boolean().optional() + enabled: z.boolean().default(false) }) export type OcrProviderBaseConfig = z.infer -export type OcrProviderConfig = - | OcrApiProviderConfig - | OcrTesseractConfig - | OcrSystemConfig - | OcrPpocrConfig - | OcrOvConfig +export const OcrProviderConfigSchema = OcrProviderBaseConfigSchema.loose() + +export type OcrProviderConfig = z.infer export const OcrProviderSchema = z.object({ id: z.string(), name: z.string(), - capabilities: OcrProviderCapabilityRecordSchema + capabilities: OcrProviderCapabilityRecordSchema, + config: OcrProviderConfigSchema }) -export type OcrProvider = z.infer & { - /** @deprecated */ - config?: OcrProviderBaseConfig -} +export type OcrProvider = z.infer export const isOcrProvider = (p: unknown): p is OcrProvider => { return OcrProviderSchema.safeParse(p).success @@ -170,13 +160,18 @@ export type OcrHandler = (file: SupportedOcrFile) => Promise export type OcrImageHandler = (file: ImageFileMetadata) => Promise -// Tesseract Types -export type OcrTesseractConfig = OcrProviderBaseConfig & { - langs?: Partial> -} +// ========================================================== +// Tesseract OCR Types +// ========================================================== +export const OcrTesseractConfigSchema = OcrProviderBaseConfigSchema.extend({ + langs: z.record(TranslateLanguageCodeSchema, z.boolean()).optional() +}) + +export type OcrTesseractConfig = z.infer export type OcrTesseractProvider = { id: 'tesseract' + config: OcrTesseractConfig } & ImageOcrProvider & BuiltinOcrProvider @@ -186,13 +181,16 @@ export const isOcrTesseractProvider = (p: OcrProvider): p is OcrTesseractProvide export type TesseractLangCode = Tesseract.LanguageCode -// System Types -export type OcrSystemConfig = OcrProviderBaseConfig & { +// ========================================================== +// System OCR Types +// ========================================================== +export interface OcrSystemConfig extends OcrProviderBaseConfig { langs?: TranslateLanguageCode[] } export type OcrSystemProvider = { id: 'system' + config: OcrSystemConfig } & ImageOcrProvider & // PdfOcrProvider & BuiltinOcrProvider @@ -201,14 +199,23 @@ export const isOcrSystemProvider = (p: OcrProvider): p is OcrSystemProvider => { return p.id === BuiltinOcrProviderIds.system } -// PaddleOCR Types -export type OcrPpocrConfig = OcrProviderBaseConfig & { - apiUrl?: string - accessToken?: string +// ========================================================== +// PaddleOCR Types +// ========================================================== +export const OcrPpocrConfigSchema = OcrProviderBaseConfigSchema.extend({ + apiUrl: z.string().optional(), + accessToken: z.string().optional() +}) + +export type OcrPpocrConfig = z.infer + +export const isOcrPpocrConfig = (config: unknown): config is OcrPpocrConfig => { + return OcrPpocrConfigSchema.safeParse(config).success } export type OcrPpocrProvider = { id: 'paddleocr' + config: OcrPpocrConfig } & ImageOcrProvider & // PdfOcrProvider & BuiltinOcrProvider @@ -217,14 +224,23 @@ export const isOcrPpocrProvider = (p: OcrProvider): p is OcrPpocrProvider => { return p.id === BuiltinOcrProviderIds.paddleocr } -// OV OCR Types -export type OcrOvConfig = OcrProviderBaseConfig & { +// ========================================================== +// OV OCR Types +// ========================================================== +export const OcrOvConfigSchema = OcrProviderBaseConfigSchema.extend({ // It's not configurable for now. - // langs?: TranslateLanguageCode[] + // langs: z.array(TranslateLanguageCodeSchema).optional() +}) + +export type OcrOvConfig = z.infer + +export const isOcrOvConfig = (config: unknown): config is OcrOvConfig => { + return OcrOvConfigSchema.safeParse(config).success } export type OcrOvProvider = { id: 'ovocr' + config: OcrOvConfig } & ImageOcrProvider & // PdfOcrProvider & BuiltinOcrProvider @@ -232,3 +248,18 @@ export type OcrOvProvider = { export const isOcrOVProvider = (p: OcrProvider): p is OcrOvProvider => { return p.id === BuiltinOcrProviderIds.ovocr } + +// ========================================================== +// Data-API OCR Types (notable) +// ========================================================== + +const TimestampExtendShape = { + createdAt: z.number().nullable(), + updatedAt: z.number().nullable() +} + +export const ListOcrProvidersResponseSchema = z.object({ + data: z.array(OcrProviderSchema.extend(TimestampExtendShape)) +}) + +export type ListOcrProvidersResponse = z.infer