mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-01-05 12:29:44 +08:00
refactor(ocr): 修改tesseractOcr返回完整识别结果而非仅文本
返回完整识别结果以便后续处理使用更多OCR信息,同时简化imageOcr中的条件判断逻辑
This commit is contained in:
parent
fbe4db0e50
commit
5241f11eeb
@ -23,7 +23,7 @@ const logger = loggerService.withContext('main:OcrService')
|
|||||||
* @returns ocr result
|
* @returns ocr result
|
||||||
* @throws {Error}
|
* @throws {Error}
|
||||||
*/
|
*/
|
||||||
const tesseractOcr = async (file: ImageFileMetadata | string): Promise<string> => {
|
const tesseractOcr = async (file: ImageFileMetadata | string): Promise<Tesseract.RecognizeResult> => {
|
||||||
try {
|
try {
|
||||||
const worker = await getTesseractWorker()
|
const worker = await getTesseractWorker()
|
||||||
let ret: Tesseract.RecognizeResult
|
let ret: Tesseract.RecognizeResult
|
||||||
@ -37,7 +37,7 @@ const tesseractOcr = async (file: ImageFileMetadata | string): Promise<string> =
|
|||||||
const buffer = await readFile(file.path)
|
const buffer = await readFile(file.path)
|
||||||
ret = await worker.recognize(buffer)
|
ret = await worker.recognize(buffer)
|
||||||
}
|
}
|
||||||
return ret.data.text
|
return ret
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
logger.error('Failed to ocr with tesseract.', e as Error)
|
logger.error('Failed to ocr with tesseract.', e as Error)
|
||||||
throw e
|
throw e
|
||||||
@ -53,13 +53,11 @@ const tesseractOcr = async (file: ImageFileMetadata | string): Promise<string> =
|
|||||||
*/
|
*/
|
||||||
const imageOcr = async (file: ImageFileMetadata, provider: ImageOcrProvider): Promise<OcrResult> => {
|
const imageOcr = async (file: ImageFileMetadata, provider: ImageOcrProvider): Promise<OcrResult> => {
|
||||||
if (isBuiltinOcrProvider(provider)) {
|
if (isBuiltinOcrProvider(provider)) {
|
||||||
let text: string
|
if (provider.id === 'tesseract') {
|
||||||
switch (provider.id) {
|
const result = await tesseractOcr(file)
|
||||||
case 'tesseract':
|
return { text: result.data.text }
|
||||||
text = await tesseractOcr(file)
|
} else {
|
||||||
return { text }
|
throw new Error(`Unsupported built-in ocr provider: ${provider.id}`)
|
||||||
default:
|
|
||||||
throw new Error(`Unsupported built-in ocr provider: ${provider.id}`)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw new Error(`Provider ${provider.id} is not supported.`)
|
throw new Error(`Provider ${provider.id} is not supported.`)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user