mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-24 18:50:56 +08:00
feat(ocr): 添加OCR功能支持
实现基于Tesseract的OCR功能,包括文件类型检查、服务接口和IPC通信 新增OCR相关类型定义和服务实现
This commit is contained in:
parent
6011805527
commit
9d438be48f
@ -281,5 +281,8 @@ export enum IpcChannel {
|
||||
TRACE_ADD_STREAM_MESSAGE = 'trace:addStreamMessage',
|
||||
|
||||
// CodeTools
|
||||
CodeTools_Run = 'code-tools:run'
|
||||
CodeTools_Run = 'code-tools:run',
|
||||
|
||||
// OCR
|
||||
OCR_ocr = 'ocr:ocr'
|
||||
}
|
||||
|
||||
@ -30,6 +30,7 @@ import { openTraceWindow, setTraceWindowTitle } from './services/NodeTraceServic
|
||||
import NotificationService from './services/NotificationService'
|
||||
import * as NutstoreService from './services/NutstoreService'
|
||||
import ObsidianVaultService from './services/ObsidianVaultService'
|
||||
import { ipcOcr } from './services/ocr/OcrService'
|
||||
import { proxyManager } from './services/ProxyManager'
|
||||
import { pythonService } from './services/PythonService'
|
||||
import { FileServiceManager } from './services/remotefile/FileServiceManager'
|
||||
@ -709,4 +710,7 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) {
|
||||
|
||||
// CodeTools
|
||||
ipcMain.handle(IpcChannel.CodeTools_Run, codeToolsService.run)
|
||||
|
||||
// OCR
|
||||
ipcMain.handle(IpcChannel.OCR_ocr, ipcOcr)
|
||||
}
|
||||
|
||||
79
src/main/services/ocr/OcrService.ts
Normal file
79
src/main/services/ocr/OcrService.ts
Normal file
@ -0,0 +1,79 @@
|
||||
import { loggerService } from '@logger'
|
||||
import { ImageFileMetadata, isImageFile } from '@types'
|
||||
import {
|
||||
ImageOcrProvider,
|
||||
isBuiltinOcrProvider,
|
||||
isImageOcrProvider,
|
||||
OcrProvider,
|
||||
OcrResult,
|
||||
SupportedOcrFile
|
||||
} from 'src/renderer/src/types/ocr'
|
||||
|
||||
import { getTesseractWorker } from './TesseractService'
|
||||
|
||||
const logger = loggerService.withContext('OcrService')
|
||||
|
||||
/**
|
||||
* ocr by tesseract
|
||||
* @param file image file
|
||||
* @returns ocr result
|
||||
* @throws {Error}
|
||||
*/
|
||||
const tesseractOcr = async (file: ImageFileMetadata): Promise<string> => {
|
||||
try {
|
||||
const worker = await getTesseractWorker()
|
||||
const ret = await worker.recognize(file.path)
|
||||
return ret.data.text
|
||||
} catch (e) {
|
||||
const message = 'Failed to ocr with tesseract.'
|
||||
logger.error(message, e as Error)
|
||||
throw new Error(message)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ocr image file
|
||||
* @param file image file
|
||||
* @param provider ocr provider that supports image ocr
|
||||
* @returns ocr result
|
||||
* @throws {Error}
|
||||
*/
|
||||
const imageOcr = async (file: ImageFileMetadata, provider: ImageOcrProvider): Promise<OcrResult> => {
|
||||
if (isBuiltinOcrProvider(provider)) {
|
||||
let text: string
|
||||
switch (provider.id) {
|
||||
case 'tesseract':
|
||||
text = await tesseractOcr(file)
|
||||
return { text }
|
||||
default:
|
||||
throw new Error(`Unsupported built-in ocr provider: ${provider.id}`)
|
||||
}
|
||||
}
|
||||
throw new Error(`Provider ${provider.id} is not supported.`)
|
||||
}
|
||||
|
||||
/**
|
||||
* ocr a file
|
||||
* @param file any supported file
|
||||
* @param provider ocr provider
|
||||
* @returns ocr result
|
||||
* @throws {Error}
|
||||
*/
|
||||
export const ocr = async (file: SupportedOcrFile, provider: OcrProvider): Promise<OcrResult> => {
|
||||
if (isImageFile(file) && isImageOcrProvider(provider)) {
|
||||
return imageOcr(file, provider)
|
||||
}
|
||||
throw new Error(`File type and provider capability is not matched, otherwise one of them is not supported.`)
|
||||
}
|
||||
|
||||
/**
|
||||
* ocr a file
|
||||
* @param _ ipc event
|
||||
* @param file any supported file
|
||||
* @param provider ocr provider
|
||||
* @returns ocr result
|
||||
* @throws {Error}
|
||||
*/
|
||||
export const ipcOcr = async (_: Electron.IpcMainInvokeEvent, ...args: Parameters<typeof ocr>) => {
|
||||
return ocr(...args)
|
||||
}
|
||||
123
src/main/services/ocr/TesseractService.ts
Normal file
123
src/main/services/ocr/TesseractService.ts
Normal file
@ -0,0 +1,123 @@
|
||||
import Tesseract, { createWorker } from 'tesseract.js'
|
||||
|
||||
let worker: Tesseract.Worker | null = null
|
||||
|
||||
// const languageCodeMap: Record<string, string> = {
|
||||
// 'af-za': 'afr',
|
||||
// 'am-et': 'amh',
|
||||
// 'ar-sa': 'ara',
|
||||
// 'as-in': 'asm',
|
||||
// 'az-az': 'aze',
|
||||
// 'az-cyrl-az': 'aze_cyrl',
|
||||
// 'be-by': 'bel',
|
||||
// 'bn-bd': 'ben',
|
||||
// 'bo-cn': 'bod',
|
||||
// 'bs-ba': 'bos',
|
||||
// 'bg-bg': 'bul',
|
||||
// 'ca-es': 'cat',
|
||||
// 'ceb-ph': 'ceb',
|
||||
// 'cs-cz': 'ces',
|
||||
// 'zh-cn': 'chi_sim',
|
||||
// 'zh-tw': 'chi_tra',
|
||||
// 'chr-us': 'chr',
|
||||
// 'cy-gb': 'cym',
|
||||
// 'da-dk': 'dan',
|
||||
// 'de-de': 'deu',
|
||||
// 'dz-bt': 'dzo',
|
||||
// 'el-gr': 'ell',
|
||||
// 'en-us': 'eng',
|
||||
// 'enm-gb': 'enm',
|
||||
// 'eo-world': 'epo',
|
||||
// 'et-ee': 'est',
|
||||
// 'eu-es': 'eus',
|
||||
// 'fa-ir': 'fas',
|
||||
// 'fi-fi': 'fin',
|
||||
// 'fr-fr': 'fra',
|
||||
// 'frk-de': 'frk',
|
||||
// 'frm-fr': 'frm',
|
||||
// 'ga-ie': 'gle',
|
||||
// 'gl-es': 'glg',
|
||||
// 'grc-gr': 'grc',
|
||||
// 'gu-in': 'guj',
|
||||
// 'ht-ht': 'hat',
|
||||
// 'he-il': 'heb',
|
||||
// 'hi-in': 'hin',
|
||||
// 'hr-hr': 'hrv',
|
||||
// 'hu-hu': 'hun',
|
||||
// 'iu-ca': 'iku',
|
||||
// 'id-id': 'ind',
|
||||
// 'is-is': 'isl',
|
||||
// 'it-it': 'ita',
|
||||
// 'ita-it': 'ita_old',
|
||||
// 'jv-id': 'jav',
|
||||
// 'ja-jp': 'jpn',
|
||||
// 'kn-in': 'kan',
|
||||
// 'ka-ge': 'kat',
|
||||
// 'kat-ge': 'kat_old',
|
||||
// 'kk-kz': 'kaz',
|
||||
// 'km-kh': 'khm',
|
||||
// 'ky-kg': 'kir',
|
||||
// 'ko-kr': 'kor',
|
||||
// 'ku-tr': 'kur',
|
||||
// 'la-la': 'lao',
|
||||
// 'la-va': 'lat',
|
||||
// 'lv-lv': 'lav',
|
||||
// 'lt-lt': 'lit',
|
||||
// 'ml-in': 'mal',
|
||||
// 'mr-in': 'mar',
|
||||
// 'mk-mk': 'mkd',
|
||||
// 'mt-mt': 'mlt',
|
||||
// 'ms-my': 'msa',
|
||||
// 'my-mm': 'mya',
|
||||
// 'ne-np': 'nep',
|
||||
// 'nl-nl': 'nld',
|
||||
// 'no-no': 'nor',
|
||||
// 'or-in': 'ori',
|
||||
// 'pa-in': 'pan',
|
||||
// 'pl-pl': 'pol',
|
||||
// 'pt-pt': 'por',
|
||||
// 'ps-af': 'pus',
|
||||
// 'ro-ro': 'ron',
|
||||
// 'ru-ru': 'rus',
|
||||
// 'sa-in': 'san',
|
||||
// 'si-lk': 'sin',
|
||||
// 'sk-sk': 'slk',
|
||||
// 'sl-si': 'slv',
|
||||
// 'es-es': 'spa',
|
||||
// 'spa-es': 'spa_old',
|
||||
// 'sq-al': 'sqi',
|
||||
// 'sr-rs': 'srp',
|
||||
// 'sr-latn-rs': 'srp_latn',
|
||||
// 'sw-tz': 'swa',
|
||||
// 'sv-se': 'swe',
|
||||
// 'syr-sy': 'syr',
|
||||
// 'ta-in': 'tam',
|
||||
// 'te-in': 'tel',
|
||||
// 'tg-tj': 'tgk',
|
||||
// 'tl-ph': 'tgl',
|
||||
// 'th-th': 'tha',
|
||||
// 'ti-er': 'tir',
|
||||
// 'tr-tr': 'tur',
|
||||
// 'ug-cn': 'uig',
|
||||
// 'uk-ua': 'ukr',
|
||||
// 'ur-pk': 'urd',
|
||||
// 'uz-uz': 'uzb',
|
||||
// 'uz-cyrl-uz': 'uzb_cyrl',
|
||||
// 'vi-vn': 'vie',
|
||||
// 'yi-us': 'yid'
|
||||
// }
|
||||
|
||||
export const getTesseractWorker = async (): Promise<Tesseract.Worker> => {
|
||||
if (!worker) {
|
||||
// for now, only support limited languages
|
||||
worker = await createWorker(['chi_sim', 'chi_tra', 'eng'])
|
||||
}
|
||||
return worker
|
||||
}
|
||||
|
||||
export const disposeTesseractWorker = async () => {
|
||||
if (worker) {
|
||||
await worker.terminate()
|
||||
worker = null
|
||||
}
|
||||
}
|
||||
@ -25,6 +25,7 @@ import {
|
||||
} from '@types'
|
||||
import { contextBridge, ipcRenderer, OpenDialogOptions, shell, webUtils } from 'electron'
|
||||
import { Notification } from 'src/renderer/src/types/notification'
|
||||
import { OcrProvider, OcrResult, SupportedOcrFile } from 'src/renderer/src/types/ocr'
|
||||
import { CreateDirectoryOptions } from 'webdav'
|
||||
|
||||
import type { ActionItem } from '../renderer/src/types/selectionTypes'
|
||||
@ -406,6 +407,10 @@ const api = {
|
||||
env: Record<string, string>,
|
||||
options?: { autoUpdateToLatest?: boolean }
|
||||
) => ipcRenderer.invoke(IpcChannel.CodeTools_Run, cliTool, model, directory, env, options)
|
||||
},
|
||||
ocr: {
|
||||
ocr: (file: SupportedOcrFile, provider: OcrProvider): Promise<OcrResult> =>
|
||||
ipcRenderer.invoke(IpcChannel.OCR_ocr, file, provider)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -75,3 +75,7 @@ export type SupportedOcrFile = FileMetadata & {
|
||||
export const isSupportedOcrFile = (file: FileMetadata): file is SupportedOcrFile => {
|
||||
return isSupportedOcrFileType(file.type)
|
||||
}
|
||||
|
||||
export type OcrResult = {
|
||||
text: string
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user