fix(ocr): 添加图像大小检查并优化错误处理

检查图像文件大小是否超过50MB限制
使用buffer读取文件替代直接路径识别
简化错误处理逻辑,直接抛出原始错误
This commit is contained in:
icarus 2025-08-22 16:52:02 +08:00
parent 6a467ceca4
commit 1864419e70

View File

@ -1,4 +1,5 @@
import { loggerService } from '@logger'
import { MB } from '@shared/config/constant'
import {
ImageFileMetadata,
ImageOcrProvider,
@ -9,6 +10,8 @@ import {
OcrResult,
SupportedOcrFile
} from '@types'
import { statSync } from 'fs'
import { readFile } from 'fs/promises'
import { getTesseractWorker } from './TesseractService'
@ -23,12 +26,16 @@ const logger = loggerService.withContext('main:OcrService')
const tesseractOcr = async (file: ImageFileMetadata): Promise<string> => {
try {
const worker = await getTesseractWorker()
const ret = await worker.recognize(file.path)
const stat = statSync(file.path)
if (stat.size > 50 * MB) {
throw new Error('This image is too large (max 50MB)')
}
const buffer = await readFile(file.path)
const ret = await worker.recognize(buffer)
return ret.data.text
} catch (e) {
const message = 'Failed to ocr with tesseract.'
logger.error(message, e as Error)
throw new Error(message)
logger.error('Failed to ocr with tesseract.', e as Error)
throw e
}
}