From 9d438be48fe6f3c77633f81da1d20847c8f5c129 Mon Sep 17 00:00:00 2001 From: icarus Date: Fri, 22 Aug 2025 15:30:02 +0800 Subject: [PATCH] =?UTF-8?q?feat(ocr):=20=E6=B7=BB=E5=8A=A0OCR=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 实现基于Tesseract的OCR功能,包括文件类型检查、服务接口和IPC通信 新增OCR相关类型定义和服务实现 --- packages/shared/IpcChannel.ts | 5 +- src/main/ipc.ts | 4 + src/main/services/ocr/OcrService.ts | 79 ++++++++++++++ src/main/services/ocr/TesseractService.ts | 123 ++++++++++++++++++++++ src/preload/index.ts | 5 + src/renderer/src/types/ocr.ts | 4 + 6 files changed, 219 insertions(+), 1 deletion(-) create mode 100644 src/main/services/ocr/OcrService.ts create mode 100644 src/main/services/ocr/TesseractService.ts diff --git a/packages/shared/IpcChannel.ts b/packages/shared/IpcChannel.ts index 56ebfb3d58..f35db50bc6 100644 --- a/packages/shared/IpcChannel.ts +++ b/packages/shared/IpcChannel.ts @@ -281,5 +281,8 @@ export enum IpcChannel { TRACE_ADD_STREAM_MESSAGE = 'trace:addStreamMessage', // CodeTools - CodeTools_Run = 'code-tools:run' + CodeTools_Run = 'code-tools:run', + + // OCR + OCR_ocr = 'ocr:ocr' } diff --git a/src/main/ipc.ts b/src/main/ipc.ts index 8689ab2c3b..8765ca0025 100644 --- a/src/main/ipc.ts +++ b/src/main/ipc.ts @@ -30,6 +30,7 @@ import { openTraceWindow, setTraceWindowTitle } from './services/NodeTraceServic import NotificationService from './services/NotificationService' import * as NutstoreService from './services/NutstoreService' import ObsidianVaultService from './services/ObsidianVaultService' +import { ipcOcr } from './services/ocr/OcrService' import { proxyManager } from './services/ProxyManager' import { pythonService } from './services/PythonService' import { FileServiceManager } from './services/remotefile/FileServiceManager' @@ -709,4 +710,7 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) { // CodeTools ipcMain.handle(IpcChannel.CodeTools_Run, codeToolsService.run) + + // OCR + ipcMain.handle(IpcChannel.OCR_ocr, ipcOcr) } diff --git a/src/main/services/ocr/OcrService.ts b/src/main/services/ocr/OcrService.ts new file mode 100644 index 0000000000..36f3ebf54e --- /dev/null +++ b/src/main/services/ocr/OcrService.ts @@ -0,0 +1,79 @@ +import { loggerService } from '@logger' +import { ImageFileMetadata, isImageFile } from '@types' +import { + ImageOcrProvider, + isBuiltinOcrProvider, + isImageOcrProvider, + OcrProvider, + OcrResult, + SupportedOcrFile +} from 'src/renderer/src/types/ocr' + +import { getTesseractWorker } from './TesseractService' + +const logger = loggerService.withContext('OcrService') + +/** + * ocr by tesseract + * @param file image file + * @returns ocr result + * @throws {Error} + */ +const tesseractOcr = async (file: ImageFileMetadata): Promise => { + try { + const worker = await getTesseractWorker() + const ret = await worker.recognize(file.path) + return ret.data.text + } catch (e) { + const message = 'Failed to ocr with tesseract.' + logger.error(message, e as Error) + throw new Error(message) + } +} + +/** + * ocr image file + * @param file image file + * @param provider ocr provider that supports image ocr + * @returns ocr result + * @throws {Error} + */ +const imageOcr = async (file: ImageFileMetadata, provider: ImageOcrProvider): Promise => { + if (isBuiltinOcrProvider(provider)) { + let text: string + switch (provider.id) { + case 'tesseract': + text = await tesseractOcr(file) + return { text } + default: + throw new Error(`Unsupported built-in ocr provider: ${provider.id}`) + } + } + throw new Error(`Provider ${provider.id} is not supported.`) +} + +/** + * ocr a file + * @param file any supported file + * @param provider ocr provider + * @returns ocr result + * @throws {Error} + */ +export const ocr = async (file: SupportedOcrFile, provider: OcrProvider): Promise => { + if (isImageFile(file) && isImageOcrProvider(provider)) { + return imageOcr(file, provider) + } + throw new Error(`File type and provider capability is not matched, otherwise one of them is not supported.`) +} + +/** + * ocr a file + * @param _ ipc event + * @param file any supported file + * @param provider ocr provider + * @returns ocr result + * @throws {Error} + */ +export const ipcOcr = async (_: Electron.IpcMainInvokeEvent, ...args: Parameters) => { + return ocr(...args) +} diff --git a/src/main/services/ocr/TesseractService.ts b/src/main/services/ocr/TesseractService.ts new file mode 100644 index 0000000000..6cd98b9bf5 --- /dev/null +++ b/src/main/services/ocr/TesseractService.ts @@ -0,0 +1,123 @@ +import Tesseract, { createWorker } from 'tesseract.js' + +let worker: Tesseract.Worker | null = null + +// const languageCodeMap: Record = { +// 'af-za': 'afr', +// 'am-et': 'amh', +// 'ar-sa': 'ara', +// 'as-in': 'asm', +// 'az-az': 'aze', +// 'az-cyrl-az': 'aze_cyrl', +// 'be-by': 'bel', +// 'bn-bd': 'ben', +// 'bo-cn': 'bod', +// 'bs-ba': 'bos', +// 'bg-bg': 'bul', +// 'ca-es': 'cat', +// 'ceb-ph': 'ceb', +// 'cs-cz': 'ces', +// 'zh-cn': 'chi_sim', +// 'zh-tw': 'chi_tra', +// 'chr-us': 'chr', +// 'cy-gb': 'cym', +// 'da-dk': 'dan', +// 'de-de': 'deu', +// 'dz-bt': 'dzo', +// 'el-gr': 'ell', +// 'en-us': 'eng', +// 'enm-gb': 'enm', +// 'eo-world': 'epo', +// 'et-ee': 'est', +// 'eu-es': 'eus', +// 'fa-ir': 'fas', +// 'fi-fi': 'fin', +// 'fr-fr': 'fra', +// 'frk-de': 'frk', +// 'frm-fr': 'frm', +// 'ga-ie': 'gle', +// 'gl-es': 'glg', +// 'grc-gr': 'grc', +// 'gu-in': 'guj', +// 'ht-ht': 'hat', +// 'he-il': 'heb', +// 'hi-in': 'hin', +// 'hr-hr': 'hrv', +// 'hu-hu': 'hun', +// 'iu-ca': 'iku', +// 'id-id': 'ind', +// 'is-is': 'isl', +// 'it-it': 'ita', +// 'ita-it': 'ita_old', +// 'jv-id': 'jav', +// 'ja-jp': 'jpn', +// 'kn-in': 'kan', +// 'ka-ge': 'kat', +// 'kat-ge': 'kat_old', +// 'kk-kz': 'kaz', +// 'km-kh': 'khm', +// 'ky-kg': 'kir', +// 'ko-kr': 'kor', +// 'ku-tr': 'kur', +// 'la-la': 'lao', +// 'la-va': 'lat', +// 'lv-lv': 'lav', +// 'lt-lt': 'lit', +// 'ml-in': 'mal', +// 'mr-in': 'mar', +// 'mk-mk': 'mkd', +// 'mt-mt': 'mlt', +// 'ms-my': 'msa', +// 'my-mm': 'mya', +// 'ne-np': 'nep', +// 'nl-nl': 'nld', +// 'no-no': 'nor', +// 'or-in': 'ori', +// 'pa-in': 'pan', +// 'pl-pl': 'pol', +// 'pt-pt': 'por', +// 'ps-af': 'pus', +// 'ro-ro': 'ron', +// 'ru-ru': 'rus', +// 'sa-in': 'san', +// 'si-lk': 'sin', +// 'sk-sk': 'slk', +// 'sl-si': 'slv', +// 'es-es': 'spa', +// 'spa-es': 'spa_old', +// 'sq-al': 'sqi', +// 'sr-rs': 'srp', +// 'sr-latn-rs': 'srp_latn', +// 'sw-tz': 'swa', +// 'sv-se': 'swe', +// 'syr-sy': 'syr', +// 'ta-in': 'tam', +// 'te-in': 'tel', +// 'tg-tj': 'tgk', +// 'tl-ph': 'tgl', +// 'th-th': 'tha', +// 'ti-er': 'tir', +// 'tr-tr': 'tur', +// 'ug-cn': 'uig', +// 'uk-ua': 'ukr', +// 'ur-pk': 'urd', +// 'uz-uz': 'uzb', +// 'uz-cyrl-uz': 'uzb_cyrl', +// 'vi-vn': 'vie', +// 'yi-us': 'yid' +// } + +export const getTesseractWorker = async (): Promise => { + if (!worker) { + // for now, only support limited languages + worker = await createWorker(['chi_sim', 'chi_tra', 'eng']) + } + return worker +} + +export const disposeTesseractWorker = async () => { + if (worker) { + await worker.terminate() + worker = null + } +} diff --git a/src/preload/index.ts b/src/preload/index.ts index 1059826224..b7d18edd70 100644 --- a/src/preload/index.ts +++ b/src/preload/index.ts @@ -25,6 +25,7 @@ import { } from '@types' import { contextBridge, ipcRenderer, OpenDialogOptions, shell, webUtils } from 'electron' import { Notification } from 'src/renderer/src/types/notification' +import { OcrProvider, OcrResult, SupportedOcrFile } from 'src/renderer/src/types/ocr' import { CreateDirectoryOptions } from 'webdav' import type { ActionItem } from '../renderer/src/types/selectionTypes' @@ -406,6 +407,10 @@ const api = { env: Record, options?: { autoUpdateToLatest?: boolean } ) => ipcRenderer.invoke(IpcChannel.CodeTools_Run, cliTool, model, directory, env, options) + }, + ocr: { + ocr: (file: SupportedOcrFile, provider: OcrProvider): Promise => + ipcRenderer.invoke(IpcChannel.OCR_ocr, file, provider) } } diff --git a/src/renderer/src/types/ocr.ts b/src/renderer/src/types/ocr.ts index 923c27aeda..42a6d4aaae 100644 --- a/src/renderer/src/types/ocr.ts +++ b/src/renderer/src/types/ocr.ts @@ -75,3 +75,7 @@ export type SupportedOcrFile = FileMetadata & { export const isSupportedOcrFile = (file: FileMetadata): file is SupportedOcrFile => { return isSupportedOcrFileType(file.type) } + +export type OcrResult = { + text: string +}