mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-01-07 13:59:28 +08:00
feat(OCR服务): 支持base64字符串作为OCR输入
扩展tesseractOcr函数以接受base64字符串或图像文件作为输入
This commit is contained in:
parent
1864419e70
commit
af05960cd3
@ -19,19 +19,24 @@ const logger = loggerService.withContext('main:OcrService')
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* ocr by tesseract
|
* ocr by tesseract
|
||||||
* @param file image file
|
* @param file image file or base64 string
|
||||||
* @returns ocr result
|
* @returns ocr result
|
||||||
* @throws {Error}
|
* @throws {Error}
|
||||||
*/
|
*/
|
||||||
const tesseractOcr = async (file: ImageFileMetadata): Promise<string> => {
|
const tesseractOcr = async (file: ImageFileMetadata | string): Promise<string> => {
|
||||||
try {
|
try {
|
||||||
const worker = await getTesseractWorker()
|
const worker = await getTesseractWorker()
|
||||||
const stat = statSync(file.path)
|
let ret: Tesseract.RecognizeResult
|
||||||
if (stat.size > 50 * MB) {
|
if (typeof file === 'string') {
|
||||||
throw new Error('This image is too large (max 50MB)')
|
ret = await worker.recognize(file)
|
||||||
|
} else {
|
||||||
|
const stat = statSync(file.path)
|
||||||
|
if (stat.size > 50 * MB) {
|
||||||
|
throw new Error('This image is too large (max 50MB)')
|
||||||
|
}
|
||||||
|
const buffer = await readFile(file.path)
|
||||||
|
ret = await worker.recognize(buffer)
|
||||||
}
|
}
|
||||||
const buffer = await readFile(file.path)
|
|
||||||
const ret = await worker.recognize(buffer)
|
|
||||||
return ret.data.text
|
return ret.data.text
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
logger.error('Failed to ocr with tesseract.', e as Error)
|
logger.error('Failed to ocr with tesseract.', e as Error)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user