refactor(ocr): restructure OCR provider configuration and types

- Remove separate configs from store and move them into provider definitions
- Add Zod schemas for OCR provider types and configurations
- Update migration to use new provider structure
- Make OCR provider config non-nullable in database schema
- Clean up unused OCR preference settings
This commit is contained in:
icarus 2025-10-20 06:47:02 +08:00
parent 84a513a6ae
commit 37ad896f6a
9 changed files with 115 additions and 131 deletions

View File

@ -1,13 +1,9 @@
import type {
BuiltinOcrProvider,
BuiltinOcrProviderId,
OcrOvConfig,
OcrOvProvider,
OcrPpocrConfig,
OcrPpocrProvider,
OcrSystemConfig,
OcrSystemProvider,
OcrTesseractConfig,
OcrTesseractProvider,
TesseractLangCode,
TranslateLanguageCode
@ -18,6 +14,14 @@ export const tesseract: OcrTesseractProvider = {
name: 'Tesseract',
capabilities: {
image: true
},
config: {
langs: {
chi_sim: true,
chi_tra: true,
eng: true
},
enabled: false
}
} as const
@ -27,6 +31,10 @@ export const systemOcr: OcrSystemProvider = {
capabilities: {
image: true
// pdf: true
},
config: {
langs: ['en-us'],
enabled: false
}
} as const satisfies OcrSystemProvider
@ -36,7 +44,8 @@ export const ppocrOcr: OcrPpocrProvider = {
capabilities: {
image: true
// pdf: true
}
},
config: { apiUrl: '', enabled: false }
} as const
export const ovOcr: OcrOvProvider = {
@ -45,34 +54,20 @@ export const ovOcr: OcrOvProvider = {
capabilities: {
image: true
// pdf: true
},
config: {
enabled: false
}
} as const satisfies OcrOvProvider
export const BUILTIN_OCR_PROVIDER_CONFIG_MAP = {
tesseract: {
langs: {
chi_sim: true,
chi_tra: true,
eng: true
}
} satisfies OcrTesseractConfig,
system: {
langs: ['en-us']
} satisfies OcrSystemConfig,
paddleocr: {
apiUrl: ''
} satisfies OcrPpocrConfig,
ovocr: {} satisfies OcrOvConfig
} as const satisfies Record<BuiltinOcrProviderId, any>
export const BUILTIN_OCR_PROVIDERS_MAP = {
export const INITIAL_BUILTIN_OCR_PROVIDER_MAP = {
tesseract,
system: systemOcr,
paddleocr: ppocrOcr,
ovocr: ovOcr
} as const satisfies Record<BuiltinOcrProviderId, BuiltinOcrProvider>
export const BUILTIN_OCR_PROVIDERS: BuiltinOcrProvider[] = Object.values(BUILTIN_OCR_PROVIDERS_MAP)
export const BUILTIN_OCR_PROVIDERS: BuiltinOcrProvider[] = Object.values(INITIAL_BUILTIN_OCR_PROVIDER_MAP)
export const TESSERACT_LANG_MAP: Record<TranslateLanguageCode, TesseractLangCode> = {
'af-za': 'afr',

View File

@ -9,10 +9,8 @@
* === AUTO-GENERATED CONTENT START ===
*/
import { BUILTIN_OCR_PROVIDER_CONFIG_MAP } from '@shared/config/ocr'
import { TRANSLATE_PROMPT } from '@shared/config/prompts'
import * as PreferenceTypes from '@shared/data/preference/preferenceTypes'
import type { OcrOvConfig, OcrPpocrConfig, OcrSystemConfig, OcrTesseractConfig } from '@types'
/* eslint @typescript-eslint/member-ordering: ["error", {
"interfaces": { "order": "alphabetically" },
@ -353,14 +351,6 @@ export interface PreferenceSchemas {
'feature.translate.model_prompt': string
// redux/settings/targetLanguage
'feature.translate.target_language': string
// redux/ocr/configs/ovocr
'ocr.provider.config.ovocr': OcrOvConfig
// redux/ocr/configs/paddleocr
'ocr.provider.config.paddleocr': OcrPpocrConfig
// redux/ocr/configs/system
'ocr.provider.config.system': OcrSystemConfig
// redux/ocr/configs/tesseract
'ocr.provider.config.tesseract': OcrTesseractConfig
// redux/ocr/imageProviderId
'ocr.settings.image_provider_id': string | null
// redux/shortcuts/shortcuts.exit_fullscreen
@ -624,10 +614,6 @@ export const DefaultPreferences: PreferenceSchemas = {
'feature.selection.trigger_mode': PreferenceTypes.SelectionTriggerMode.Selected,
'feature.translate.model_prompt': TRANSLATE_PROMPT,
'feature.translate.target_language': 'en-us',
'ocr.provider.config.ovocr': BUILTIN_OCR_PROVIDER_CONFIG_MAP.ovocr,
'ocr.provider.config.paddleocr': BUILTIN_OCR_PROVIDER_CONFIG_MAP.paddleocr,
'ocr.provider.config.system': BUILTIN_OCR_PROVIDER_CONFIG_MAP.system,
'ocr.provider.config.tesseract': BUILTIN_OCR_PROVIDER_CONFIG_MAP.tesseract,
'ocr.settings.image_provider_id': null,
'shortcut.app.exit_fullscreen': { editable: false, enabled: true, key: ['Escape'], system: true },
'shortcut.app.search_message': {

View File

@ -36,9 +36,8 @@ export const ocrProviderTable = sqliteTable(
* Since this is a polymorphic field, both frontend and backend must validate
* that the structure matches the expected schema for the corresponding provider type
* before saving.
* This field is nullable because `config` in the `OcrProvider` type is optional.
*/
config: text('config', { mode: 'json' }).$type<OcrProviderConfig>(),
config: text('config', { mode: 'json' }).$type<OcrProviderConfig>().notNull(),
/** Timestamps. May not useful. */
...createUpdateTimestamps

View File

@ -142,6 +142,9 @@ const CustomLanguageModal = ({ isOpen, editingCustomLanguage, onAdd, onEdit, onC
rules={[
{ required: true, message: t('settings.translate.custom.error.langCode.empty') },
{
// TODO: use TranslateLanguageCodeSchema here. Modify it when migrating to new UI.
// NOTE: Since any uppercase will be converted to lowercase before it's save to indexedDB,
// it's safe to replace it with case sensitive TranslateLanguageCodeSchema.
pattern: /^[a-zA-Z]{2,3}(-[a-zA-Z]{2,3})?$/,
message: t('settings.translate.custom.error.langCode.invalid')
},

View File

@ -69,7 +69,7 @@ const persistedReducer = persistReducer(
{
key: 'cherry-studio',
storage,
version: 164,
version: 163,
blacklist: ['runtime', 'messages', 'messageBlocks', 'tabs'],
migrate
},

View File

@ -30,12 +30,12 @@ import type {
TranslateLanguageCode,
WebSearchProvider
} from '@renderer/types'
import { isBuiltinOcrProvider, isSystemProvider, SystemProviderIds } from '@renderer/types'
import { isSystemProvider, SystemProviderIds } from '@renderer/types'
import { getDefaultGroupName, getLeadingEmoji, runAsyncFunction, uuid } from '@renderer/utils'
import { getDefaultOcrProvider } from '@renderer/utils/ocr'
import { defaultByPassRules } from '@shared/config/constant'
import { BUILTIN_OCR_PROVIDERS } from '@shared/config/ocr'
import { BUILTIN_OCR_PROVIDER_CONFIG_MAP, BUILTIN_OCR_PROVIDERS_MAP } from '@shared/config/ocr'
import { INITIAL_BUILTIN_OCR_PROVIDER_MAP } from '@shared/config/ocr'
import { TRANSLATE_PROMPT } from '@shared/config/prompts'
import { DefaultPreferences } from '@shared/data/preference/preferenceSchemas'
import { UpgradeChannel } from '@shared/data/preference/preferenceTypes'
@ -2235,7 +2235,6 @@ const migrateConfig = {
},
'137': (state: RootState) => {
try {
// @ts-expect-error old migration
state.ocr = {
providers: BUILTIN_OCR_PROVIDERS,
imageProviderId: getDefaultOcrProvider('image').id
@ -2249,7 +2248,7 @@ const migrateConfig = {
},
'138': (state: RootState) => {
try {
addOcrProvider(state, BUILTIN_OCR_PROVIDERS_MAP.system)
addOcrProvider(state, INITIAL_BUILTIN_OCR_PROVIDER_MAP.system)
return state
} catch (error) {
logger.error('migrate 138 error', error as Error)
@ -2428,7 +2427,7 @@ const migrateConfig = {
},
'148': (state: RootState) => {
try {
addOcrProvider(state, BUILTIN_OCR_PROVIDERS_MAP.paddleocr)
addOcrProvider(state, INITIAL_BUILTIN_OCR_PROVIDER_MAP.paddleocr)
return state
} catch (error) {
logger.error('migrate 148 error', error as Error)
@ -2677,7 +2676,7 @@ const migrateConfig = {
},
'163': (state: RootState) => {
try {
addOcrProvider(state, BUILTIN_OCR_PROVIDERS_MAP.ovocr)
addOcrProvider(state, INITIAL_BUILTIN_OCR_PROVIDER_MAP.ovocr)
state.llm.providers.forEach((provider) => {
if (provider.id === 'cherryin') {
provider.anthropicApiHost = 'https://open.cherryin.net'
@ -2688,34 +2687,6 @@ const migrateConfig = {
logger.error('migrate 163 error', error as Error)
return state
}
},
'164': (state: RootState) => {
try {
state.ocr.providers.forEach((p) => {
if (isBuiltinOcrProvider(p)) {
switch (p.id) {
case 'ovocr':
state.ocr.configs.ovocr = p.config ?? BUILTIN_OCR_PROVIDER_CONFIG_MAP.ovocr
break
case 'paddleocr':
state.ocr.configs.paddleocr = p.config ?? BUILTIN_OCR_PROVIDER_CONFIG_MAP.paddleocr
break
case 'system':
state.ocr.configs.system = p.config ?? BUILTIN_OCR_PROVIDER_CONFIG_MAP.system
break
case 'tesseract':
state.ocr.configs.tesseract = p.config ?? BUILTIN_OCR_PROVIDER_CONFIG_MAP.tesseract
break
default:
logger.warn(`Unknown ocr provider ${p.id}. Skipped.`)
}
}
})
return state
} catch (error) {
logger.error('migrate 164 error', error as Error)
return state
}
}
}

View File

@ -1,24 +1,16 @@
import type { PayloadAction } from '@reduxjs/toolkit'
import { createSlice } from '@reduxjs/toolkit'
import type { BuiltinOcrProviderId, OcrProvider, OcrProviderConfig } from '@renderer/types'
import type { OcrProvider } from '@renderer/types'
import { getDefaultOcrProvider } from '@renderer/utils/ocr'
import { BUILTIN_OCR_PROVIDERS } from '@shared/config/ocr'
import { BUILTIN_OCR_PROVIDER_CONFIG_MAP } from '@shared/config/ocr'
export interface OcrState {
providers: OcrProvider[]
configs: Record<BuiltinOcrProviderId, OcrProviderConfig>
imageProviderId: string
}
const initialState: OcrState = {
providers: BUILTIN_OCR_PROVIDERS,
configs: {
tesseract: BUILTIN_OCR_PROVIDER_CONFIG_MAP.tesseract,
system: BUILTIN_OCR_PROVIDER_CONFIG_MAP.system,
paddleocr: BUILTIN_OCR_PROVIDER_CONFIG_MAP.paddleocr,
ovocr: BUILTIN_OCR_PROVIDER_CONFIG_MAP.ovocr
},
imageProviderId: getDefaultOcrProvider('image').id
}
@ -46,18 +38,18 @@ const ocrSlice = createSlice({
Object.assign(state.providers[index], action.payload)
}
},
updateOcrProviderConfig(
state,
action: PayloadAction<{ id: string; update: Omit<Partial<OcrProviderConfig>, 'id'> }>
) {
const index = state.providers.findIndex((provider) => provider.id === action.payload.id)
if (index !== -1) {
if (!state.providers[index].config) {
state.providers[index].config = {}
}
Object.assign(state.providers[index].config, action.payload.update)
}
},
// updateOcrProviderConfig(
// state,
// action: PayloadAction<{ id: string; update: Omit<Partial<OcrProviderConfig>, 'id'> }>
// ) {
// const index = state.providers.findIndex((provider) => provider.id === action.payload.id)
// if (index !== -1) {
// if (!state.providers[index].config) {
// state.providers[index].config = {}
// }
// Object.assign(state.providers[index].config, action.payload.update)
// }
// },
setImageOcrProviderId(state, action: PayloadAction<string>) {
state.imageProviderId = action.payload
}
@ -69,7 +61,7 @@ export const {
addOcrProvider,
removeOcrProvider,
updateOcrProvider,
updateOcrProviderConfig,
// updateOcrProviderConfig,
setImageOcrProviderId
} = ocrSlice.actions

View File

@ -7,6 +7,8 @@ import type { CSSProperties } from 'react'
export * from './file'
export * from './note'
import * as z from 'zod'
import type { StreamTextParams } from './aiCoreTypes'
import type { Chunk } from './chunk'
import type { FileMetadata } from './file'
@ -480,9 +482,14 @@ export type GenerateImageResponse = {
images: string[]
}
// 为了支持自定义语言设置为string别名
/** zh-cn, en-us, etc. */
export type TranslateLanguageCode = string
/**
* Language code pattern used for translation features.
* Examples: "zh-cn", "en-us", "fr-fr", etc.
* Must be lowercase and follow the format: 2-3 letter language code
* followed by a hyphen and 2-letter region code.
*/
export const TranslateLanguageCodeSchema = z.string().regex(/^[a-z]{2,3}(-[a-z]{2-3})$/)
export type TranslateLanguageCode = z.infer<typeof TranslateLanguageCodeSchema>
// langCode应当能够唯一确认一种语言
export type TranslateLanguage = {

View File

@ -2,7 +2,7 @@ import type Tesseract from 'tesseract.js'
import * as z from 'zod'
import type { FileMetadata, ImageFileMetadata, TranslateLanguageCode } from '.'
import { isImageFileMetadata } from '.'
import { isImageFileMetadata, TranslateLanguageCodeSchema } from '.'
export const BuiltinOcrProviderIds = {
tesseract: 'tesseract',
@ -72,33 +72,23 @@ export const isOcrProviderApiConfig = (config: unknown): config is OcrProviderAp
* Extend this type to define provider-specific config types.
*/
export const OcrProviderBaseConfigSchema = z.object({
/** Not used for now. Could safely remove. */
api: OcrProviderApiConfigSchema.optional(),
/** Not used for now. Could safely remove. */
models: z.array(OcrModelSchema).optional(),
/** Not used for now. Could safely remove. */
enabled: z.boolean().optional()
enabled: z.boolean().default(false)
})
export type OcrProviderBaseConfig = z.infer<typeof OcrProviderBaseConfigSchema>
export type OcrProviderConfig =
| OcrApiProviderConfig
| OcrTesseractConfig
| OcrSystemConfig
| OcrPpocrConfig
| OcrOvConfig
export const OcrProviderConfigSchema = OcrProviderBaseConfigSchema.loose()
export type OcrProviderConfig = z.infer<typeof OcrProviderConfigSchema>
export const OcrProviderSchema = z.object({
id: z.string(),
name: z.string(),
capabilities: OcrProviderCapabilityRecordSchema
capabilities: OcrProviderCapabilityRecordSchema,
config: OcrProviderConfigSchema
})
export type OcrProvider = z.infer<typeof OcrProviderSchema> & {
/** @deprecated */
config?: OcrProviderBaseConfig
}
export type OcrProvider = z.infer<typeof OcrProviderSchema>
export const isOcrProvider = (p: unknown): p is OcrProvider => {
return OcrProviderSchema.safeParse(p).success
@ -170,13 +160,18 @@ export type OcrHandler = (file: SupportedOcrFile) => Promise<OcrResult>
export type OcrImageHandler = (file: ImageFileMetadata) => Promise<OcrResult>
// Tesseract Types
export type OcrTesseractConfig = OcrProviderBaseConfig & {
langs?: Partial<Record<TesseractLangCode, boolean>>
}
// ==========================================================
// Tesseract OCR Types
// ==========================================================
export const OcrTesseractConfigSchema = OcrProviderBaseConfigSchema.extend({
langs: z.record(TranslateLanguageCodeSchema, z.boolean()).optional()
})
export type OcrTesseractConfig = z.infer<typeof OcrTesseractConfigSchema>
export type OcrTesseractProvider = {
id: 'tesseract'
config: OcrTesseractConfig
} & ImageOcrProvider &
BuiltinOcrProvider
@ -186,13 +181,16 @@ export const isOcrTesseractProvider = (p: OcrProvider): p is OcrTesseractProvide
export type TesseractLangCode = Tesseract.LanguageCode
// System Types
export type OcrSystemConfig = OcrProviderBaseConfig & {
// ==========================================================
// System OCR Types
// ==========================================================
export interface OcrSystemConfig extends OcrProviderBaseConfig {
langs?: TranslateLanguageCode[]
}
export type OcrSystemProvider = {
id: 'system'
config: OcrSystemConfig
} & ImageOcrProvider &
// PdfOcrProvider &
BuiltinOcrProvider
@ -201,14 +199,23 @@ export const isOcrSystemProvider = (p: OcrProvider): p is OcrSystemProvider => {
return p.id === BuiltinOcrProviderIds.system
}
// PaddleOCR Types
export type OcrPpocrConfig = OcrProviderBaseConfig & {
apiUrl?: string
accessToken?: string
// ==========================================================
// PaddleOCR Types
// ==========================================================
export const OcrPpocrConfigSchema = OcrProviderBaseConfigSchema.extend({
apiUrl: z.string().optional(),
accessToken: z.string().optional()
})
export type OcrPpocrConfig = z.infer<typeof OcrPpocrConfigSchema>
export const isOcrPpocrConfig = (config: unknown): config is OcrPpocrConfig => {
return OcrPpocrConfigSchema.safeParse(config).success
}
export type OcrPpocrProvider = {
id: 'paddleocr'
config: OcrPpocrConfig
} & ImageOcrProvider &
// PdfOcrProvider &
BuiltinOcrProvider
@ -217,14 +224,23 @@ export const isOcrPpocrProvider = (p: OcrProvider): p is OcrPpocrProvider => {
return p.id === BuiltinOcrProviderIds.paddleocr
}
// OV OCR Types
export type OcrOvConfig = OcrProviderBaseConfig & {
// ==========================================================
// OV OCR Types
// ==========================================================
export const OcrOvConfigSchema = OcrProviderBaseConfigSchema.extend({
// It's not configurable for now.
// langs?: TranslateLanguageCode[]
// langs: z.array(TranslateLanguageCodeSchema).optional()
})
export type OcrOvConfig = z.infer<typeof OcrOvConfigSchema>
export const isOcrOvConfig = (config: unknown): config is OcrOvConfig => {
return OcrOvConfigSchema.safeParse(config).success
}
export type OcrOvProvider = {
id: 'ovocr'
config: OcrOvConfig
} & ImageOcrProvider &
// PdfOcrProvider &
BuiltinOcrProvider
@ -232,3 +248,18 @@ export type OcrOvProvider = {
export const isOcrOVProvider = (p: OcrProvider): p is OcrOvProvider => {
return p.id === BuiltinOcrProviderIds.ovocr
}
// ==========================================================
// Data-API OCR Types (notable)
// ==========================================================
const TimestampExtendShape = {
createdAt: z.number().nullable(),
updatedAt: z.number().nullable()
}
export const ListOcrProvidersResponseSchema = z.object({
data: z.array(OcrProviderSchema.extend(TimestampExtendShape))
})
export type ListOcrProvidersResponse = z.infer<typeof ListOcrProvidersResponseSchema>