Merge branch 'feat/ocr' into feat/ocr-translate

This commit is contained in:
icarus 2025-08-23 13:48:25 +08:00
commit bee0cefd5c
4 changed files with 24 additions and 16 deletions

View File

@ -197,12 +197,6 @@ export enum FeedUrl {
GITHUB_LATEST = 'https://github.com/CherryHQ/cherry-studio/releases/latest/download' GITHUB_LATEST = 'https://github.com/CherryHQ/cherry-studio/releases/latest/download'
} }
export const tesseractLangs = ['chi_sim', 'chi_tra', 'eng']
export enum TesseractLangsDownloadUrl {
CN = 'https://gitcode.com/beyondkmp/tessdata/releases/download/4.1.0/',
GLOBAL = 'https://github.com/tesseract-ocr/tessdata/raw/main/'
}
export enum UpgradeChannel { export enum UpgradeChannel {
LATEST = 'latest', // 最新稳定版本 LATEST = 'latest', // 最新稳定版本
RC = 'rc', // 公测版本 RC = 'rc', // 公测版本

View File

@ -1,13 +1,19 @@
import { BuiltinOcrProviderIds, FileMetadata, OcrProvider, OcrResult, SupportedOcrFile } from '@types' import { loggerService } from '@logger'
import { BuiltinOcrProviderIds, OcrProvider, OcrResult, SupportedOcrFile } from '@types'
import { tesseractService } from './tesseract/TesseractService' import { tesseractService } from './tesseract/TesseractService'
type OcrHandler = (file: FileMetadata) => Promise<OcrResult> type OcrHandler = (file: SupportedOcrFile) => Promise<OcrResult>
const logger = loggerService.withContext('OcrService')
export class OcrService { export class OcrService {
private registry: Map<string, OcrHandler> = new Map() private registry: Map<string, OcrHandler> = new Map()
register(providerId: string, handler: OcrHandler): void { register(providerId: string, handler: OcrHandler): void {
if (this.registry.has(providerId)) {
logger.warn(`Provider ${providerId} has existing handler. Overwrited.`)
}
this.registry.set(providerId, handler) this.registry.set(providerId, handler)
} }

View File

@ -1,7 +1,7 @@
import { loggerService } from '@logger' import { loggerService } from '@logger'
import { getIpCountry } from '@main/utils/ipService' import { getIpCountry } from '@main/utils/ipService'
import { MB, TesseractLangsDownloadUrl } from '@shared/config/constant' import { MB } from '@shared/config/constant'
import { FileMetadata, ImageFileMetadata, isImageFile, OcrResult } from '@types' import { ImageFileMetadata, isImageFile, OcrResult, SupportedOcrFile } from '@types'
import { app } from 'electron' import { app } from 'electron'
import fs from 'fs' import fs from 'fs'
import path from 'path' import path from 'path'
@ -114,13 +114,21 @@ const logger = loggerService.withContext('TesseractService')
// 'yi-us': 'yid' // 'yi-us': 'yid'
// } // }
// config
const MB_SIZE_THRESHOLD = 50
const tesseractLangs = ['chi_sim', 'chi_tra', 'eng']
enum TesseractLangsDownloadUrl {
CN = 'https://gitcode.com/beyondkmp/tessdata/releases/download/4.1.0/',
GLOBAL = 'https://github.com/tesseract-ocr/tessdata/raw/main/'
}
export class TesseractService { export class TesseractService {
private worker: Tesseract.Worker | null = null private worker: Tesseract.Worker | null = null
async getWorker(): Promise<Tesseract.Worker> { async getWorker(): Promise<Tesseract.Worker> {
if (!this.worker) { if (!this.worker) {
// for now, only support limited languages // for now, only support limited languages
this.worker = await createWorker(['chi_sim', 'chi_tra', 'eng'], undefined, { this.worker = await createWorker(tesseractLangs, undefined, {
langPath: await this._getLangPath(), langPath: await this._getLangPath(),
cachePath: await this._getCacheDir(), cachePath: await this._getCacheDir(),
gzip: false, gzip: false,
@ -133,15 +141,15 @@ export class TesseractService {
async imageOcr(file: ImageFileMetadata): Promise<OcrResult> { async imageOcr(file: ImageFileMetadata): Promise<OcrResult> {
const worker = await this.getWorker() const worker = await this.getWorker()
const stat = await fs.promises.stat(file.path) const stat = await fs.promises.stat(file.path)
if (stat.size > 50 * MB) { if (stat.size > MB_SIZE_THRESHOLD * MB) {
throw new Error('This image is too large (max 50MB)') throw new Error(`This image is too large (max ${MB_SIZE_THRESHOLD}MB)`)
} }
const buffer = await fs.promises.readFile(file.path) const buffer = await fs.promises.readFile(file.path)
const result = await worker.recognize(buffer) const result = await worker.recognize(buffer)
return { text: result.data.text } return { text: result.data.text }
} }
async ocr(file: FileMetadata): Promise<OcrResult> { async ocr(file: SupportedOcrFile): Promise<OcrResult> {
if (!isImageFile(file)) { if (!isImageFile(file)) {
throw new Error('Only image files are supported currently') throw new Error('Only image files are supported currently')
} }

View File

@ -21,7 +21,7 @@ export const isOcrProviderCapability = (cap: string): cap is OcrProviderCapabili
return Object.hasOwn(OcrProviderCapabilities, cap) return Object.hasOwn(OcrProviderCapabilities, cap)
} }
export type OcrProviderCapabilityRecord = Record<OcrProviderCapability, boolean> export type OcrProviderCapabilityRecord = Partial<Record<OcrProviderCapability, boolean>>
export type OcrProvider = { export type OcrProvider = {
id: string id: string
@ -59,7 +59,7 @@ export type ImageOcrProvider = OcrProvider & {
} }
export const isImageOcrProvider = (p: OcrProvider): p is ImageOcrProvider => { export const isImageOcrProvider = (p: OcrProvider): p is ImageOcrProvider => {
return p.capabilities.image return p.capabilities.image === true
} }
export type SupportedOcrFile = ImageFileMetadata export type SupportedOcrFile = ImageFileMetadata