refactor(ocr): 修改tesseractOcr返回完整识别结果而非仅文本

返回完整识别结果以便后续处理使用更多OCR信息,同时简化imageOcr中的条件判断逻辑
This commit is contained in:
icarus 2025-08-22 20:28:33 +08:00
parent 239c9c7205
commit cf0e6a8f73

View File

@ -23,7 +23,7 @@ const logger = loggerService.withContext('main:OcrService')
* @returns ocr result
* @throws {Error}
*/
const tesseractOcr = async (file: ImageFileMetadata | string): Promise<string> => {
const tesseractOcr = async (file: ImageFileMetadata | string): Promise<Tesseract.RecognizeResult> => {
try {
const worker = await getTesseractWorker()
let ret: Tesseract.RecognizeResult
@ -37,7 +37,7 @@ const tesseractOcr = async (file: ImageFileMetadata | string): Promise<string> =
const buffer = await readFile(file.path)
ret = await worker.recognize(buffer)
}
return ret.data.text
return ret
} catch (e) {
logger.error('Failed to ocr with tesseract.', e as Error)
throw e
@ -53,13 +53,11 @@ const tesseractOcr = async (file: ImageFileMetadata | string): Promise<string> =
*/
const imageOcr = async (file: ImageFileMetadata, provider: ImageOcrProvider): Promise<OcrResult> => {
if (isBuiltinOcrProvider(provider)) {
let text: string
switch (provider.id) {
case 'tesseract':
text = await tesseractOcr(file)
return { text }
default:
throw new Error(`Unsupported built-in ocr provider: ${provider.id}`)
if (provider.id === 'tesseract') {
const result = await tesseractOcr(file)
return { text: result.data.text }
} else {
throw new Error(`Unsupported built-in ocr provider: ${provider.id}`)
}
}
throw new Error(`Provider ${provider.id} is not supported.`)