mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-24 18:50:56 +08:00
feat(OCR服务): 支持base64字符串作为OCR输入
扩展tesseractOcr函数以接受base64字符串或图像文件作为输入
This commit is contained in:
parent
1864419e70
commit
af05960cd3
@ -19,19 +19,24 @@ const logger = loggerService.withContext('main:OcrService')
|
||||
|
||||
/**
|
||||
* ocr by tesseract
|
||||
* @param file image file
|
||||
* @param file image file or base64 string
|
||||
* @returns ocr result
|
||||
* @throws {Error}
|
||||
*/
|
||||
const tesseractOcr = async (file: ImageFileMetadata): Promise<string> => {
|
||||
const tesseractOcr = async (file: ImageFileMetadata | string): Promise<string> => {
|
||||
try {
|
||||
const worker = await getTesseractWorker()
|
||||
const stat = statSync(file.path)
|
||||
if (stat.size > 50 * MB) {
|
||||
throw new Error('This image is too large (max 50MB)')
|
||||
let ret: Tesseract.RecognizeResult
|
||||
if (typeof file === 'string') {
|
||||
ret = await worker.recognize(file)
|
||||
} else {
|
||||
const stat = statSync(file.path)
|
||||
if (stat.size > 50 * MB) {
|
||||
throw new Error('This image is too large (max 50MB)')
|
||||
}
|
||||
const buffer = await readFile(file.path)
|
||||
ret = await worker.recognize(buffer)
|
||||
}
|
||||
const buffer = await readFile(file.path)
|
||||
const ret = await worker.recognize(buffer)
|
||||
return ret.data.text
|
||||
} catch (e) {
|
||||
logger.error('Failed to ocr with tesseract.', e as Error)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user