refactor(ocr): restructure ocr service and repository layers

- Extract database operations to new OcrProviderRepository
- Improve service initialization and provider management
- Add better error handling and logging
- Update API handlers to use new service methods
This commit is contained in:
icarus 2025-10-20 19:35:39 +08:00
parent ad67d2558a
commit 4fd3300ed0
3 changed files with 507 additions and 145 deletions

View File

@ -213,7 +213,7 @@ export const apiHandlers: ApiImplementation = {
'/ocr/providers': {
GET: async ({ query }) => {
return ocrService.listProviders(query.registered)
return ocrService.listProviders(query)
},
POST: async ({ body }) => {
return ocrService.createProvider(body)
@ -228,13 +228,13 @@ export const apiHandlers: ApiImplementation = {
if (params.id !== body.id) {
throw new Error('Provider ID in path does not match ID in body')
}
return ocrService.patchProvider(body)
return ocrService.updateProvider(params.id, body)
},
PUT: async ({ params, body }) => {
if (params.id !== body.id) {
throw new Error('Provider ID in path does not match ID in body')
}
return ocrService.putProvider(body)
return ocrService.replaceProvider(body)
},
DELETE: async ({ params }) => {
return ocrService.deleteProvider(params.id)

View File

@ -0,0 +1,260 @@
import { dbService } from '@data/db/DbService'
import { ocrProviderTable } from '@data/db/schemas/ocr/provider'
import { loggerService } from '@logger'
import type {
CreateOcrProviderRequest,
CreateOcrProviderResponse,
DbOcrProvider,
ListOcrProvidersQuery,
ListOcrProvidersResponse,
OcrProviderId,
PatchOcrProviderRequest,
PatchOcrProviderResponse,
PutOcrProviderRequest,
PutOcrProviderResponse
} from '@types'
import { BuiltinOcrProviderIds, isDbOcrProvider } from '@types'
import dayjs from 'dayjs'
import { eq } from 'drizzle-orm'
import { merge } from 'lodash'
const logger = loggerService.withContext('OcrProviderRepository')
/**
* Data access layer for OCR providers
* Handles all database operations and data validation
*/
export class OcrProviderRepository {
/**
* Get all OCR providers
*/
public async findAll(query?: ListOcrProvidersQuery): Promise<ListOcrProvidersResponse> {
try {
const providers = await dbService.getDb().select().from(ocrProviderTable)
if (query?.registered) {
// Filter by registered providers (this would need to be implemented)
// For now, return all providers
return { data: providers }
}
return { data: providers }
} catch (error) {
logger.error('Failed to find all OCR providers', error as Error)
throw error
}
}
/**
* Get OCR provider by ID
*/
public async findById(id: OcrProviderId): Promise<DbOcrProvider> {
try {
const providers = await dbService
.getDb()
.select()
.from(ocrProviderTable)
.where(eq(ocrProviderTable.id, id))
.limit(1)
if (providers.length === 0) {
throw new Error(`OCR provider ${id} not found`)
}
return providers[0]
} catch (error) {
logger.error(`Failed to find OCR provider ${id}`, error as Error)
throw error
}
}
/**
* Check if provider exists
*/
public async exists(id: OcrProviderId): Promise<boolean> {
try {
const providers = await dbService
.getDb()
.select({ id: ocrProviderTable.id })
.from(ocrProviderTable)
.where(eq(ocrProviderTable.id, id))
.limit(1)
return providers.length > 0
} catch (error) {
logger.error(`Failed to check if OCR provider ${id} exists`, error as Error)
throw error
}
}
/**
* Create new OCR provider
*/
public async create(data: CreateOcrProviderRequest): Promise<CreateOcrProviderResponse> {
try {
// Check if provider already exists
if (await this.exists(data.id)) {
throw new Error(`OCR provider ${data.id} already exists`)
}
const timestamp = dayjs().valueOf()
const newProvider = {
...data,
createdAt: timestamp,
updatedAt: timestamp
} satisfies DbOcrProvider
// Validate data structure
if (!isDbOcrProvider(newProvider)) {
throw new Error('Invalid OCR provider data')
}
const [created] = await dbService.getDb().insert(ocrProviderTable).values(newProvider).returning()
logger.info(`Created OCR provider: ${data.id}`)
return { data: created }
} catch (error) {
logger.error(`Failed to create OCR provider ${data.id}`, error as Error)
throw error
}
}
/**
* Update OCR provider (partial update)
*/
public async update(id: OcrProviderId, data: Partial<PatchOcrProviderRequest>): Promise<PatchOcrProviderResponse> {
try {
const existing = await this.findById(id)
const newProvider = {
...merge({}, existing, data),
updatedAt: dayjs().valueOf()
} satisfies DbOcrProvider
// Validate data structure
if (!isDbOcrProvider(newProvider)) {
throw new Error('Invalid OCR provider data')
}
const [updated] = await dbService
.getDb()
.update(ocrProviderTable)
.set(newProvider)
.where(eq(ocrProviderTable.id, id))
.returning()
logger.info(`Updated OCR provider: ${id}`)
return { data: updated }
} catch (error) {
logger.error(`Failed to update OCR provider ${id}`, error as Error)
throw error
}
}
/**
* Replace OCR provider (full update)
*/
public async replace(data: PutOcrProviderRequest): Promise<PutOcrProviderResponse> {
try {
// Check if it's a built-in provider
if (BuiltinOcrProviderIds.some((pid) => pid === data.id)) {
throw new Error('Built-in OCR providers cannot be modified with PUT method.')
}
const timestamp = dayjs().valueOf()
const existing = await this.exists(data.id)
let newProvider: DbOcrProvider
if (existing) {
// Update existing
const current = await this.findById(data.id)
newProvider = {
...data,
updatedAt: timestamp,
createdAt: current.createdAt
}
} else {
// Create new
newProvider = {
...data,
createdAt: timestamp,
updatedAt: timestamp
}
}
// Validate data structure
if (!isDbOcrProvider(newProvider)) {
throw new Error('Invalid OCR provider data')
}
const [saved] = await dbService
.getDb()
.insert(ocrProviderTable)
.values(newProvider)
.onConflictDoUpdate({
target: ocrProviderTable.id,
set: newProvider
})
.returning()
logger.info(`Replaced OCR provider: ${data.id}`)
return { data: saved }
} catch (error) {
logger.error(`Failed to replace OCR provider ${data.id}`, error as Error)
throw error
}
}
/**
* Delete OCR provider
*/
public async delete(id: OcrProviderId): Promise<void> {
try {
// Check if it's a built-in provider
if (BuiltinOcrProviderIds.some((pid) => pid === id)) {
throw new Error('Built-in OCR providers cannot be deleted.')
}
// Check if provider exists
await this.findById(id)
await dbService.getDb().delete(ocrProviderTable).where(eq(ocrProviderTable.id, id))
logger.info(`Deleted OCR provider: ${id}`)
} catch (error) {
logger.error(`Failed to delete OCR provider ${id}`, error as Error)
throw error
}
}
/**
* Initialize built-in providers in database
*/
public async initializeBuiltInProviders(): Promise<void> {
try {
// Import built-in provider configurations
const { BUILTIN_OCR_PROVIDERS } = await import('@shared/config/ocr')
logger.info('Initializing built-in OCR providers')
// Check and create each built-in provider if it doesn't exist
for (const provider of BUILTIN_OCR_PROVIDERS) {
const exists = await this.exists(provider.id)
if (!exists) {
logger.info(`Creating built-in OCR provider: ${provider.id}`)
await this.create(provider)
} else {
logger.debug(`Built-in OCR provider already exists: ${provider.id}`)
}
}
logger.info(`Initialized ${BUILTIN_OCR_PROVIDERS.length} built-in OCR providers`)
} catch (error) {
logger.error('Failed to initialize built-in OCR providers', error as Error)
throw error
}
}
}
export const ocrProviderRepository = new OcrProviderRepository()

View File

@ -1,12 +1,14 @@
import { dbService } from '@data/db/DbService'
import { ocrProviderTable } from '@data/db/schemas/ocr/provider'
import { loggerService } from '@logger'
import { ocrProviderRepository } from '@main/data/repositories/OcrProviderRepository'
import type {
CreateOcrProviderRequest,
CreateOcrProviderResponse,
DbOcrProvider,
ListOcrProvidersQuery,
ListOcrProvidersResponse,
OcrParams,
OcrProvider,
OcrProviderId,
OcrResult,
PatchOcrProviderRequest,
PatchOcrProviderResponse,
@ -14,10 +16,7 @@ import type {
PutOcrProviderResponse,
SupportedOcrFile
} from '@types'
import { BuiltinOcrProviderIdMap, BuiltinOcrProviderIds, isDbOcrProvider } from '@types'
import dayjs from 'dayjs'
import { eq } from 'drizzle-orm'
import { merge } from 'lodash'
import { BuiltinOcrProviderIdMap } from '@types'
import type { OcrBaseService } from './builtin/OcrBaseService'
import { ovOcrService } from './builtin/OvOcrService'
@ -27,12 +26,47 @@ import { tesseractService } from './builtin/TesseractService'
const logger = loggerService.withContext('OcrService')
export class OcrService {
private registry: Map<string, OcrBaseService> = new Map()
/**
* Business logic layer for OCR operations
* Handles OCR provider registration, orchestration, and core OCR functionality
*/
class OcrService {
private registry: Map<OcrProviderId, OcrBaseService> = new Map()
private initialized: boolean = false
constructor() {
// TODO: Ensure builtin providers are in db.
// Register built-in providers
this.registerBuiltinProviders()
}
/**
* Ensure the service is initialized
*/
private async ensureInitialized(): Promise<void> {
if (!this.initialized) {
await this.initializeBuiltinProviders()
this.initialized = true
}
}
/**
* Initialize built-in OCR providers
*/
private async initializeBuiltinProviders(): Promise<void> {
try {
// Ensure built-in providers exist in database
await ocrProviderRepository.initializeBuiltInProviders()
logger.info('OCR service initialized with built-in providers')
} catch (error) {
logger.error('Failed to initialize OCR service', error as Error)
throw error
}
}
/**
* Register built-in providers (sync)
*/
private registerBuiltinProviders(): void {
this.register(BuiltinOcrProviderIdMap.tesseract, tesseractService)
if (systemOcrService) {
@ -46,158 +80,226 @@ export class OcrService {
}
}
private register(providerId: string, service: OcrBaseService): void {
/**
* Register an OCR provider service
*/
private register(providerId: OcrProviderId, service: OcrBaseService): void {
if (this.registry.has(providerId)) {
logger.warn(`Provider ${providerId} has existing handler. Overwrited.`)
logger.warn(`Provider ${providerId} already registered. Overwriting.`)
}
this.registry.set(providerId, service)
logger.info(`Registered OCR provider: ${providerId}`)
}
// @ts-expect-error not used for now, but just keep it.
private unregister(providerId: string): void {
this.registry.delete(providerId)
// Not sure when it will be needed.
/**
* Unregister an OCR provider service
*/
// private unregister(providerId: OcrProviderId): void {
// if (this.registry.delete(providerId)) {
// logger.info(`Unregistered OCR provider: ${providerId}`)
// }
// }
/**
* Get all registered provider IDs
*/
public getRegisteredProviderIds(): OcrProviderId[] {
return Array.from(this.registry.keys())
}
public async listProviders(registered?: boolean): Promise<ListOcrProvidersResponse> {
const providers = await dbService.getDb().select().from(ocrProviderTable)
if (registered) {
const registeredKeys = Array.from(this.registry.keys())
return { data: providers.filter((p) => registeredKeys.includes(p.id)) }
} else {
return { data: providers }
}
/**
* Check if a provider is registered
*/
public isProviderRegistered(providerId: OcrProviderId): boolean {
return this.registry.has(providerId)
}
public async getProvider(providerId: string) {
const providers = await dbService
.getDb()
.select()
.from(ocrProviderTable)
.where(eq(ocrProviderTable.id, providerId))
.limit(1)
if (providers.length === 0) {
throw new Error(`OCR provider ${providerId} not found`)
}
return { data: providers[0] }
}
/**
* Get list of OCR providers
*/
public async listProviders(query?: ListOcrProvidersQuery): Promise<ListOcrProvidersResponse> {
try {
await this.ensureInitialized()
const result = await ocrProviderRepository.findAll(query)
public async patchProvider(update: PatchOcrProviderRequest): Promise<PatchOcrProviderResponse> {
const providers = await dbService
.getDb()
.select()
.from(ocrProviderTable)
.where(eq(ocrProviderTable.id, update.id))
.limit(1)
if (providers.length == 0) {
throw new Error(`OCR provider ${update.id} not found`)
}
const found = providers[0]
const newProvider = { ...merge({}, found, update), updatedAt: dayjs().valueOf() } satisfies DbOcrProvider
if (!isDbOcrProvider(newProvider)) {
throw new Error('Invalid OCR provider data')
}
const [updated] = await dbService
.getDb()
.update(ocrProviderTable)
.set(newProvider)
.where(eq(ocrProviderTable.id, update.id))
.returning()
return { data: updated }
}
public async createProvider(create: CreateOcrProviderRequest): Promise<CreateOcrProviderResponse> {
const providers = await dbService
.getDb()
.select()
.from(ocrProviderTable)
.where(eq(ocrProviderTable.id, create.id))
.limit(1)
if (providers.length > 0) {
throw new Error(`OCR provider ${create.id} already exists`)
}
const timestamp = dayjs().valueOf()
const newProvider = {
...create,
createdAt: timestamp,
updatedAt: timestamp
} satisfies DbOcrProvider
if (!isDbOcrProvider(newProvider)) {
throw new Error('Invalid OCR provider data')
}
const [created] = await dbService.getDb().insert(ocrProviderTable).values(newProvider).returning()
return { data: created }
}
public async putProvider(provider: PutOcrProviderRequest): Promise<PutOcrProviderResponse> {
if (BuiltinOcrProviderIds.some((pid) => pid === provider.id)) {
throw new Error('Builtin OCR providers cannot be modified with PUT method.')
}
const providers = await dbService
.getDb()
.select()
.from(ocrProviderTable)
.where(eq(ocrProviderTable.id, provider.id))
.limit(1)
const timestamp = dayjs().valueOf()
if (providers.length === 0) {
const newProvider = {
...provider,
createdAt: timestamp,
updatedAt: timestamp
} satisfies DbOcrProvider
if (!isDbOcrProvider(newProvider)) {
throw new Error('Invalid OCR provider data')
if (query?.registered) {
// Filter by registered providers
const registeredIds = this.getRegisteredProviderIds()
result.data = result.data.filter((provider) => registeredIds.includes(provider.id))
}
const [created] = await dbService.getDb().insert(ocrProviderTable).values(newProvider).returning()
return { data: created }
}
const existed = providers[0]
const newProvider = {
...provider,
updatedAt: timestamp,
createdAt: existed.createdAt
} satisfies DbOcrProvider
if (!isDbOcrProvider(newProvider)) {
throw new Error('Invalid OCR provider data')
logger.debug(`Listed ${result.data.length} OCR providers`)
return result
} catch (error) {
logger.error('Failed to list OCR providers', error as Error)
throw error
}
const [updated] = await dbService
.getDb()
.update(ocrProviderTable)
.set(newProvider)
.where(eq(ocrProviderTable.id, provider.id))
.returning()
return { data: updated }
}
public async deleteProvider(providerId: string): Promise<void> {
if (BuiltinOcrProviderIds.some((pid) => pid === providerId)) {
throw new Error('Builtin OCR providers cannot be deleted.')
/**
* Get OCR provider by ID
*/
public async getProvider(providerId: OcrProviderId): Promise<{ data: DbOcrProvider }> {
try {
await this.ensureInitialized()
const provider = await ocrProviderRepository.findById(providerId)
logger.debug(`Retrieved OCR provider: ${providerId}`)
return { data: provider }
} catch (error) {
logger.error(`Failed to get OCR provider ${providerId}`, error as Error)
throw error
}
const providers = await dbService
.getDb()
.select()
.from(ocrProviderTable)
.where(eq(ocrProviderTable.id, providerId))
.limit(1)
if (providers.length === 0) {
throw new Error(`OCR provider ${providerId} not found`)
}
await dbService.getDb().delete(ocrProviderTable).where(eq(ocrProviderTable.id, providerId))
}
/**
* Create new OCR provider
*/
public async createProvider(data: CreateOcrProviderRequest): Promise<CreateOcrProviderResponse> {
try {
await this.ensureInitialized()
const result = await ocrProviderRepository.create(data)
logger.info(`Created OCR provider: ${data.id}`)
return result
} catch (error) {
logger.error(`Failed to create OCR provider ${data.id}`, error as Error)
throw error
}
}
/**
* Update OCR provider (partial update)
*/
public async updateProvider(id: OcrProviderId, data: Partial<PatchOcrProviderRequest>): Promise<PatchOcrProviderResponse> {
try {
await this.ensureInitialized()
const result = await ocrProviderRepository.update(id, data)
logger.info(`Updated OCR provider: ${id}`)
return result
} catch (error) {
logger.error(`Failed to update OCR provider ${id}`, error as Error)
throw error
}
}
/**
* Replace OCR provider (full update)
*/
public async replaceProvider(data: PutOcrProviderRequest): Promise<PutOcrProviderResponse> {
try {
await this.ensureInitialized()
const result = await ocrProviderRepository.replace(data)
logger.info(`Replaced OCR provider: ${data.id}`)
return result
} catch (error) {
logger.error(`Failed to replace OCR provider ${data.id}`, error as Error)
throw error
}
}
/**
* Delete OCR provider
*/
public async deleteProvider(id: OcrProviderId): Promise<void> {
try {
await this.ensureInitialized()
await ocrProviderRepository.delete(id)
logger.info(`Deleted OCR provider: ${id}`)
} catch (error) {
logger.error(`Failed to delete OCR provider ${id}`, error as Error)
throw error
}
}
/**
* Perform OCR on a file using the specified provider
*/
public async ocr(file: SupportedOcrFile, params: OcrParams): Promise<OcrResult> {
const service = this.registry.get(params.providerId)
if (!service) {
throw new Error(`Provider ${params.providerId} is not registered`)
try {
await this.ensureInitialized()
const service = this.registry.get(params.providerId)
if (!service) {
throw new Error(`Provider ${params.providerId} is not registered`)
}
// Validate that the provider exists in database
await this.getProvider(params.providerId)
logger.debug(`Performing OCR with provider: ${params.providerId}`)
const result = await service.ocr(file)
logger.info(`OCR completed successfully with provider: ${params.providerId}`)
return result
} catch (error) {
logger.error(`OCR failed with provider ${params.providerId}`, error as Error)
throw error
}
return service.ocr(file)
}
/**
* Check if a provider is available and ready
*/
public async isProviderAvailable(providerId: OcrProviderId): Promise<boolean> {
try {
const service = this.registry.get(providerId)
if (!service) {
return false
}
// Check if provider exists in database
await this.getProvider(providerId)
// Additional availability checks can be added here
return true
} catch (error) {
logger.debug(`Provider ${providerId} is not available`, error as Error)
return false
}
}
private async _isProviderAvailable(provider: OcrProvider): Promise<boolean> {
try {
return this.registry.get(provider.id) !== undefined
} catch (error) {
logger.debug(`Provider ${provider.id} is not available`, error as Error)
return false
}
}
/**
* Get available providers
* It's only for image type. May re-designed for a specific file type in the future.
*
*/
public async getAvailableProvidersForFile(): Promise<DbOcrProvider[]> {
try {
const providers = await this.listProviders()
// Filter providers that can handle the file type
// This logic can be extended based on file type and provider capabilities
const availableProviders: DbOcrProvider[] = []
const capFilter = (provider: OcrProvider) => provider.capabilities.image
for (const provider of providers.data.filter(capFilter)) {
if (await this._isProviderAvailable(provider)) {
availableProviders.push(provider)
}
}
logger.debug(`Found ${availableProviders.length} available providers for file`)
return availableProviders
} catch (error) {
logger.error('Failed to get available providers for file', error as Error)
throw error
}
}
/**
* Cleanup resources
*/
public dispose(): void {
this.registry.clear()
logger.info('OCR service disposed')
}
}