mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-01-06 21:35:52 +08:00
fix(ocr): 添加图像大小检查并优化错误处理
检查图像文件大小是否超过50MB限制 使用buffer读取文件替代直接路径识别 简化错误处理逻辑,直接抛出原始错误
This commit is contained in:
parent
6a467ceca4
commit
1864419e70
@ -1,4 +1,5 @@
|
|||||||
import { loggerService } from '@logger'
|
import { loggerService } from '@logger'
|
||||||
|
import { MB } from '@shared/config/constant'
|
||||||
import {
|
import {
|
||||||
ImageFileMetadata,
|
ImageFileMetadata,
|
||||||
ImageOcrProvider,
|
ImageOcrProvider,
|
||||||
@ -9,6 +10,8 @@ import {
|
|||||||
OcrResult,
|
OcrResult,
|
||||||
SupportedOcrFile
|
SupportedOcrFile
|
||||||
} from '@types'
|
} from '@types'
|
||||||
|
import { statSync } from 'fs'
|
||||||
|
import { readFile } from 'fs/promises'
|
||||||
|
|
||||||
import { getTesseractWorker } from './TesseractService'
|
import { getTesseractWorker } from './TesseractService'
|
||||||
|
|
||||||
@ -23,12 +26,16 @@ const logger = loggerService.withContext('main:OcrService')
|
|||||||
const tesseractOcr = async (file: ImageFileMetadata): Promise<string> => {
|
const tesseractOcr = async (file: ImageFileMetadata): Promise<string> => {
|
||||||
try {
|
try {
|
||||||
const worker = await getTesseractWorker()
|
const worker = await getTesseractWorker()
|
||||||
const ret = await worker.recognize(file.path)
|
const stat = statSync(file.path)
|
||||||
|
if (stat.size > 50 * MB) {
|
||||||
|
throw new Error('This image is too large (max 50MB)')
|
||||||
|
}
|
||||||
|
const buffer = await readFile(file.path)
|
||||||
|
const ret = await worker.recognize(buffer)
|
||||||
return ret.data.text
|
return ret.data.text
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
const message = 'Failed to ocr with tesseract.'
|
logger.error('Failed to ocr with tesseract.', e as Error)
|
||||||
logger.error(message, e as Error)
|
throw e
|
||||||
throw new Error(message)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user