fix(ocr): 添加图像大小检查并优化错误处理

检查图像文件大小是否超过50MB限制
使用buffer读取文件替代直接路径识别
简化错误处理逻辑,直接抛出原始错误
This commit is contained in:
icarus 2025-08-22 16:52:02 +08:00
parent 6a467ceca4
commit 1864419e70

View File

@ -1,4 +1,5 @@
import { loggerService } from '@logger' import { loggerService } from '@logger'
import { MB } from '@shared/config/constant'
import { import {
ImageFileMetadata, ImageFileMetadata,
ImageOcrProvider, ImageOcrProvider,
@ -9,6 +10,8 @@ import {
OcrResult, OcrResult,
SupportedOcrFile SupportedOcrFile
} from '@types' } from '@types'
import { statSync } from 'fs'
import { readFile } from 'fs/promises'
import { getTesseractWorker } from './TesseractService' import { getTesseractWorker } from './TesseractService'
@ -23,12 +26,16 @@ const logger = loggerService.withContext('main:OcrService')
const tesseractOcr = async (file: ImageFileMetadata): Promise<string> => { const tesseractOcr = async (file: ImageFileMetadata): Promise<string> => {
try { try {
const worker = await getTesseractWorker() const worker = await getTesseractWorker()
const ret = await worker.recognize(file.path) const stat = statSync(file.path)
if (stat.size > 50 * MB) {
throw new Error('This image is too large (max 50MB)')
}
const buffer = await readFile(file.path)
const ret = await worker.recognize(buffer)
return ret.data.text return ret.data.text
} catch (e) { } catch (e) {
const message = 'Failed to ocr with tesseract.' logger.error('Failed to ocr with tesseract.', e as Error)
logger.error(message, e as Error) throw e
throw new Error(message)
} }
} }