mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-24 18:50:56 +08:00
refactor(ocr): 修改tesseractOcr返回完整识别结果而非仅文本
返回完整识别结果以便后续处理使用更多OCR信息,同时简化imageOcr中的条件判断逻辑
This commit is contained in:
parent
239c9c7205
commit
cf0e6a8f73
@ -23,7 +23,7 @@ const logger = loggerService.withContext('main:OcrService')
|
||||
* @returns ocr result
|
||||
* @throws {Error}
|
||||
*/
|
||||
const tesseractOcr = async (file: ImageFileMetadata | string): Promise<string> => {
|
||||
const tesseractOcr = async (file: ImageFileMetadata | string): Promise<Tesseract.RecognizeResult> => {
|
||||
try {
|
||||
const worker = await getTesseractWorker()
|
||||
let ret: Tesseract.RecognizeResult
|
||||
@ -37,7 +37,7 @@ const tesseractOcr = async (file: ImageFileMetadata | string): Promise<string> =
|
||||
const buffer = await readFile(file.path)
|
||||
ret = await worker.recognize(buffer)
|
||||
}
|
||||
return ret.data.text
|
||||
return ret
|
||||
} catch (e) {
|
||||
logger.error('Failed to ocr with tesseract.', e as Error)
|
||||
throw e
|
||||
@ -53,13 +53,11 @@ const tesseractOcr = async (file: ImageFileMetadata | string): Promise<string> =
|
||||
*/
|
||||
const imageOcr = async (file: ImageFileMetadata, provider: ImageOcrProvider): Promise<OcrResult> => {
|
||||
if (isBuiltinOcrProvider(provider)) {
|
||||
let text: string
|
||||
switch (provider.id) {
|
||||
case 'tesseract':
|
||||
text = await tesseractOcr(file)
|
||||
return { text }
|
||||
default:
|
||||
throw new Error(`Unsupported built-in ocr provider: ${provider.id}`)
|
||||
if (provider.id === 'tesseract') {
|
||||
const result = await tesseractOcr(file)
|
||||
return { text: result.data.text }
|
||||
} else {
|
||||
throw new Error(`Unsupported built-in ocr provider: ${provider.id}`)
|
||||
}
|
||||
}
|
||||
throw new Error(`Provider ${provider.id} is not supported.`)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user