Merge branch 'feat/ocr' into feat/ocr-translate

This commit is contained in:
icarus 2025-08-23 15:55:56 +08:00
commit 7d1c775afd
8 changed files with 173 additions and 28 deletions

View File

@ -1,10 +1,8 @@
import { loggerService } from '@logger'
import { BuiltinOcrProviderIds, OcrProvider, OcrResult, SupportedOcrFile } from '@types'
import { BuiltinOcrProviderIds, OcrHandler, OcrProvider, OcrResult, SupportedOcrFile } from '@types'
import { tesseractService } from './tesseract/TesseractService'
type OcrHandler = (file: SupportedOcrFile) => Promise<OcrResult>
const logger = loggerService.withContext('OcrService')
export class OcrService {

View File

@ -1,5 +1,5 @@
import { loggerService } from '@logger'
import * as OcrService from '@renderer/services/OcrService'
import * as OcrService from '@renderer/services/ocr/OcrService'
import { useAppSelector } from '@renderer/store'
import { ImageFileMetadata, isImageFile, SupportedOcrFile } from '@renderer/types'
import { uuid } from '@renderer/utils'

View File

@ -1,14 +0,0 @@
import { OcrProvider, OcrResult, SupportedOcrFile } from '@renderer/types'
// const logger = loggerService.withContext('renderer:OcrService')
/**
* ocr a file
* @param file any supported file
* @param provider ocr provider
* @returns ocr result
* @throws {Error}
*/
export const ocr = async (file: SupportedOcrFile, provider: OcrProvider): Promise<OcrResult> => {
return window.api.ocr.ocr(file, provider)
}

View File

@ -0,0 +1,23 @@
import { loggerService } from '@logger'
import { isOcrApiProvider, OcrProvider, OcrResult, SupportedOcrFile } from '@renderer/types'
import { OcrApiClientFactory } from './clients/OcrApiClientFactory'
const logger = loggerService.withContext('renderer:OcrService')
/**
* ocr a file
* @param file any supported file
* @param provider ocr provider
* @returns ocr result
* @throws {Error}
*/
export const ocr = async (file: SupportedOcrFile, provider: OcrProvider): Promise<OcrResult> => {
logger.info(`ocr file ${file.path}`)
if (isOcrApiProvider(provider)) {
const client = OcrApiClientFactory.create(provider)
return client.ocr(file)
} else {
return window.api.ocr.ocr(file, provider)
}
}

View File

@ -0,0 +1,28 @@
import { loggerService } from '@logger'
import { OcrApiProvider } from '@renderer/types'
import { OcrBaseApiClient } from './OcrBaseApiClient'
import { OcrExampleApiClient } from './OcrExampleApiClient'
const logger = loggerService.withContext('OcrApiClientFactory')
export class OcrApiClientFactory {
/**
* Create an ApiClient instance for the given provider
* ApiClient实例
*/
static create(provider: OcrApiProvider): OcrBaseApiClient {
logger.debug(`Creating ApiClient for provider:`, {
id: provider.id,
config: provider.config
})
let instance: OcrBaseApiClient
// Extend other clients here
// eslint-disable-next-line prefer-const
instance = new OcrExampleApiClient(provider)
return instance
}
}

View File

@ -0,0 +1,43 @@
import { OcrApiProvider, OcrHandler } from '@renderer/types'
export abstract class OcrBaseApiClient {
public provider: OcrApiProvider
protected host: string
protected apiKey: string
constructor(provider: OcrApiProvider) {
this.provider = provider
this.host = this.getHost()
this.apiKey = this.getApiKey()
}
abstract ocr: OcrHandler
// copy from BaseApiClient
public getHost(): string {
return this.provider.config.api.apiHost
}
// copy from BaseApiClient
public getApiKey() {
const keys = this.provider.config.api.apiKey.split(',').map((key) => key.trim())
const keyName = `ocr_provider:${this.provider.id}:last_used_key`
if (keys.length === 1) {
return keys[0]
}
const lastUsedKey = window.keyv.get(keyName)
if (!lastUsedKey) {
window.keyv.set(keyName, keys[0])
return keys[0]
}
const currentIndex = keys.indexOf(lastUsedKey)
const nextIndex = (currentIndex + 1) % keys.length
const nextKey = keys[nextIndex]
window.keyv.set(keyName, nextKey)
return nextKey
}
}

View File

@ -0,0 +1,15 @@
import { OcrApiProvider, SupportedOcrFile } from '@renderer/types'
import { OcrBaseApiClient } from './OcrBaseApiClient'
export type OcrExampleProvider = OcrApiProvider
export class OcrExampleApiClient extends OcrBaseApiClient {
constructor(provider: OcrApiProvider) {
super(provider)
}
public ocr = async (file: SupportedOcrFile) => {
return { text: `Example output: ${file.path}` }
}
}

View File

@ -1,4 +1,4 @@
import { FileMetadata, ImageFileMetadata, isImageFile, Model } from '.'
import { FileMetadata, ImageFileMetadata, isImageFile } from '.'
export const BuiltinOcrProviderIds = {
tesseract: 'tesseract'
@ -23,22 +23,70 @@ export const isOcrProviderCapability = (cap: string): cap is OcrProviderCapabili
export type OcrProviderCapabilityRecord = Partial<Record<OcrProviderCapability, boolean>>
// OCR models and providers share the same type definition.
// A provider can offer capabilities to process multiple file types,
// while a model belonging to that provider may be limited to processing only one specific file type.
export type OcrModelCapabilityRecord = OcrProviderCapabilityRecord
export interface OcrModel {
id: string
name: string
providerId: string
capabilities: OcrModelCapabilityRecord
}
/**
* Extend this type to define provider-specefic config types.
*/
export type OcrProviderApiConfig = {
apiKey: string
apiHost: string
apiVersion?: string
}
export const isOcrProviderApiConfig = (config: unknown): config is OcrProviderApiConfig => {
return (
typeof config === 'object' &&
config !== null &&
'apiKey' in config &&
typeof config.apiKey === 'string' &&
'apiHost' in config &&
typeof config.apiHost === 'string' &&
(!('apiVersion' in config) || typeof config.apiVersion === 'string')
)
}
/**
* For future. Model based ocr, api based ocr. May different api client.
*
* Extend this type to define provider-specific config types.
*/
export type OcrProviderConfig = {
/** Not used for now. Could safely remove. */
api?: OcrProviderApiConfig
/** Not used for now. Could safely remove. */
models?: OcrModel[]
/** Not used for now. Could safely remove. */
enabled?: boolean
}
export type OcrProvider = {
id: string
name: string
capabilities: OcrProviderCapabilityRecord
config?: {
// for future. Model based ocr, api based ocr. May different api client.
api?: {
apiKey: string
apiHost: string
apiVersion?: string
}
models?: Model[]
enabled?: boolean
config?: OcrProviderConfig
}
export type OcrApiProvider = OcrProvider & {
config: OcrProviderConfig & {
api: OcrProviderApiConfig
}
}
export const isOcrApiProvider = (p: OcrProvider): p is OcrApiProvider => {
return !!(p.config && p.config.api && isOcrProviderApiConfig(p.config.api))
}
export type BuiltinOcrProvider = OcrProvider & {
id: BuiltinOcrProviderId
}
@ -71,3 +119,7 @@ export const isSupportedOcrFile = (file: FileMetadata): file is SupportedOcrFile
export type OcrResult = {
text: string
}
export type OcrHandler = (file: SupportedOcrFile) => Promise<OcrResult>
export type OcrImageHandler = (file: ImageFileMetadata) => Promise<OcrResult>