diff --git a/packages/shared/IpcChannel.ts b/packages/shared/IpcChannel.ts index 93679f5faa..be037f6669 100644 --- a/packages/shared/IpcChannel.ts +++ b/packages/shared/IpcChannel.ts @@ -337,6 +337,7 @@ export enum IpcChannel { // OCR OCR_ocr = 'ocr:ocr', + OCR_ListProviders = 'ocr:list-providers', // OVMS Ovms_AddModel = 'ovms:add-model', diff --git a/src/main/ipc.ts b/src/main/ipc.ts index 55d60fa203..f1a4de6a59 100644 --- a/src/main/ipc.ts +++ b/src/main/ipc.ts @@ -875,6 +875,7 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) { ipcMain.handle(IpcChannel.OCR_ocr, (_, file: SupportedOcrFile, provider: OcrProvider) => ocrService.ocr(file, provider) ) + ipcMain.handle(IpcChannel.OCR_ListProviders, () => ocrService.listProviderIds()) // OVMS ipcMain.handle(IpcChannel.Ovms_AddModel, (_, modelName: string, modelId: string, modelSource: string, task: string) => diff --git a/src/main/services/ocr/OcrService.ts b/src/main/services/ocr/OcrService.ts index 471d31edce..b2943e30ec 100644 --- a/src/main/services/ocr/OcrService.ts +++ b/src/main/services/ocr/OcrService.ts @@ -2,6 +2,7 @@ import { loggerService } from '@logger' import { isLinux } from '@main/constant' import { BuiltinOcrProviderIds, OcrHandler, OcrProvider, OcrResult, SupportedOcrFile } from '@types' +import { ovOcrService } from './builtin/OvOcrService' import { ppocrService } from './builtin/PpocrService' import { systemOcrService } from './builtin/SystemOcrService' import { tesseractService } from './builtin/TesseractService' @@ -22,6 +23,10 @@ export class OcrService { this.registry.delete(providerId) } + public listProviderIds(): string[] { + return Array.from(this.registry.keys()) + } + public async ocr(file: SupportedOcrFile, provider: OcrProvider): Promise { const handler = this.registry.get(provider.id) if (!handler) { @@ -39,3 +44,5 @@ ocrService.register(BuiltinOcrProviderIds.tesseract, tesseractService.ocr.bind(t !isLinux && ocrService.register(BuiltinOcrProviderIds.system, systemOcrService.ocr.bind(systemOcrService)) ocrService.register(BuiltinOcrProviderIds.paddleocr, ppocrService.ocr.bind(ppocrService)) + +ovOcrService.isAvailable() && ocrService.register(BuiltinOcrProviderIds.ovocr, ovOcrService.ocr.bind(ovOcrService)) diff --git a/src/main/services/ocr/builtin/OvOcrService.ts b/src/main/services/ocr/builtin/OvOcrService.ts new file mode 100644 index 0000000000..1650ca8832 --- /dev/null +++ b/src/main/services/ocr/builtin/OvOcrService.ts @@ -0,0 +1,128 @@ +import { loggerService } from '@logger' +import { isWin } from '@main/constant' +import { isImageFileMetadata, OcrOvConfig, OcrResult, SupportedOcrFile } from '@types' +import { exec } from 'child_process' +import * as fs from 'fs' +import * as os from 'os' +import * as path from 'path' +import { promisify } from 'util' + +import { OcrBaseService } from './OcrBaseService' + +const logger = loggerService.withContext('OvOcrService') +const execAsync = promisify(exec) + +const PATH_BAT_FILE = path.join(os.homedir(), '.cherrystudio', 'ovms', 'ovocr', 'run.npu.bat') + +export class OvOcrService extends OcrBaseService { + constructor() { + super() + } + + public isAvailable(): boolean { + return ( + isWin && + os.cpus()[0].model.toLowerCase().includes('intel') && + os.cpus()[0].model.toLowerCase().includes('ultra') && + fs.existsSync(PATH_BAT_FILE) + ) + } + + private getOvOcrPath(): string { + return path.join(os.homedir(), '.cherrystudio', 'ovms', 'ovocr') + } + + private getImgDir(): string { + return path.join(this.getOvOcrPath(), 'img') + } + + private getOutputDir(): string { + return path.join(this.getOvOcrPath(), 'output') + } + + private async clearDirectory(dirPath: string): Promise { + if (fs.existsSync(dirPath)) { + const files = await fs.promises.readdir(dirPath) + for (const file of files) { + const filePath = path.join(dirPath, file) + const stats = await fs.promises.stat(filePath) + if (stats.isDirectory()) { + await this.clearDirectory(filePath) + await fs.promises.rmdir(filePath) + } else { + await fs.promises.unlink(filePath) + } + } + } else { + // If the directory does not exist, create it + await fs.promises.mkdir(dirPath, { recursive: true }) + } + } + + private async copyFileToImgDir(sourceFilePath: string, targetFileName: string): Promise { + const imgDir = this.getImgDir() + const targetFilePath = path.join(imgDir, targetFileName) + await fs.promises.copyFile(sourceFilePath, targetFilePath) + } + + private async runOcrBatch(): Promise { + const ovOcrPath = this.getOvOcrPath() + + try { + // Execute run.bat in the ov-ocr directory + await execAsync(`"${PATH_BAT_FILE}"`, { + cwd: ovOcrPath, + timeout: 60000 // 60 second timeout + }) + } catch (error) { + logger.error(`Error running ovocr batch: ${error}`) + throw new Error(`Failed to run OCR batch: ${error}`) + } + } + + private async ocrImage(filePath: string, options?: OcrOvConfig): Promise { + logger.info(`OV OCR called on ${filePath} with options ${JSON.stringify(options)}`) + + try { + // 1. Clear img directory and output directory + await this.clearDirectory(this.getImgDir()) + await this.clearDirectory(this.getOutputDir()) + + // 2. Copy file to img directory + const fileName = path.basename(filePath) + await this.copyFileToImgDir(filePath, fileName) + logger.info(`File copied to img directory: ${fileName}`) + + // 3. Run run.bat + logger.info('Running OV OCR batch process...') + await this.runOcrBatch() + + // 4. Check that output/[basename].txt file exists + const baseNameWithoutExt = path.basename(fileName, path.extname(fileName)) + const outputFilePath = path.join(this.getOutputDir(), `${baseNameWithoutExt}.txt`) + if (!fs.existsSync(outputFilePath)) { + throw new Error(`OV OCR output file not found at: ${outputFilePath}`) + } + + // 5. Read output/[basename].txt file content + const ocrText = await fs.promises.readFile(outputFilePath, 'utf-8') + logger.info(`OV OCR text extracted: ${ocrText.substring(0, 100)}...`) + + // 6. Return result + return { text: ocrText } + } catch (error) { + logger.error(`Error during OV OCR process: ${error}`) + throw error + } + } + + public ocr = async (file: SupportedOcrFile, options?: OcrOvConfig): Promise => { + if (isImageFileMetadata(file)) { + return this.ocrImage(file.path, options) + } else { + throw new Error('Unsupported file type, currently only image files are supported') + } + } +} + +export const ovOcrService = new OvOcrService() diff --git a/src/preload/index.ts b/src/preload/index.ts index 34656092b2..9004560045 100644 --- a/src/preload/index.ts +++ b/src/preload/index.ts @@ -480,7 +480,8 @@ const api = { }, ocr: { ocr: (file: SupportedOcrFile, provider: OcrProvider): Promise => - ipcRenderer.invoke(IpcChannel.OCR_ocr, file, provider) + ipcRenderer.invoke(IpcChannel.OCR_ocr, file, provider), + listProviders: (): Promise => ipcRenderer.invoke(IpcChannel.OCR_ListProviders) }, cherryai: { generateSignature: (params: { method: string; path: string; query: string; body: Record }) => diff --git a/src/renderer/src/config/ocr.ts b/src/renderer/src/config/ocr.ts index 02f0995a82..6eb52f1ad3 100644 --- a/src/renderer/src/config/ocr.ts +++ b/src/renderer/src/config/ocr.ts @@ -1,6 +1,7 @@ import { BuiltinOcrProvider, BuiltinOcrProviderId, + OcrOvProvider, OcrPpocrProvider, OcrProviderCapability, OcrSystemProvider, @@ -50,10 +51,23 @@ const ppocrOcr: OcrPpocrProvider = { } } as const +const ovOcr: OcrOvProvider = { + id: 'ovocr', + name: 'Intel OV(NPU) OCR', + config: { + langs: isWin ? ['en-us', 'zh-cn'] : undefined + }, + capabilities: { + image: true + // pdf: true + } +} as const satisfies OcrOvProvider + export const BUILTIN_OCR_PROVIDERS_MAP = { tesseract, system: systemOcr, - paddleocr: ppocrOcr + paddleocr: ppocrOcr, + ovocr: ovOcr } as const satisfies Record export const BUILTIN_OCR_PROVIDERS: BuiltinOcrProvider[] = Object.values(BUILTIN_OCR_PROVIDERS_MAP) diff --git a/src/renderer/src/hooks/useOcrProvider.tsx b/src/renderer/src/hooks/useOcrProvider.tsx index 0a40f047ff..bd597e2267 100644 --- a/src/renderer/src/hooks/useOcrProvider.tsx +++ b/src/renderer/src/hooks/useOcrProvider.tsx @@ -1,4 +1,5 @@ import { loggerService } from '@logger' +import IntelLogo from '@renderer/assets/images/providers/intel.png' import PaddleocrLogo from '@renderer/assets/images/providers/paddleocr.png' import TesseractLogo from '@renderer/assets/images/providers/Tesseract.js.png' import { BUILTIN_OCR_PROVIDERS_MAP, DEFAULT_OCR_PROVIDER } from '@renderer/config/ocr' @@ -83,6 +84,8 @@ export const useOcrProviders = () => { return case 'paddleocr': return + case 'ovocr': + return } } return diff --git a/src/renderer/src/i18n/label.ts b/src/renderer/src/i18n/label.ts index c2ac69f1be..51edc964b6 100644 --- a/src/renderer/src/i18n/label.ts +++ b/src/renderer/src/i18n/label.ts @@ -340,12 +340,14 @@ export const getBuiltInMcpServerDescriptionLabel = (key: string): string => { const builtinOcrProviderKeyMap = { system: 'ocr.builtin.system', tesseract: '', - paddleocr: '' + paddleocr: '', + ovocr: '' } as const satisfies Record export const getBuiltinOcrProviderLabel = (key: BuiltinOcrProviderId) => { if (key === 'tesseract') return 'Tesseract' else if (key == 'paddleocr') return 'PaddleOCR' + else if (key == 'ovocr') return 'Intel OV(NPU) OCR' else return getLabel(builtinOcrProviderKeyMap, key) } diff --git a/src/renderer/src/i18n/locales/en-us.json b/src/renderer/src/i18n/locales/en-us.json index 035420b61c..98cce3a51e 100644 --- a/src/renderer/src/i18n/locales/en-us.json +++ b/src/renderer/src/i18n/locales/en-us.json @@ -2049,6 +2049,7 @@ "provider": { "cannot_remove_builtin": "Cannot delete built-in provider", "existing": "The provider already exists", + "get_providers": "Failed to get available providers", "not_found": "OCR provider does not exist", "update_failed": "Failed to update configuration" }, diff --git a/src/renderer/src/i18n/locales/zh-cn.json b/src/renderer/src/i18n/locales/zh-cn.json index 071ff09c83..8e5e4ebe38 100644 --- a/src/renderer/src/i18n/locales/zh-cn.json +++ b/src/renderer/src/i18n/locales/zh-cn.json @@ -2049,6 +2049,7 @@ "provider": { "cannot_remove_builtin": "不能删除内置提供商", "existing": "提供商已存在", + "get_providers": "获取可用提供商失败", "not_found": "OCR 提供商不存在", "update_failed": "更新配置失败" }, diff --git a/src/renderer/src/i18n/locales/zh-tw.json b/src/renderer/src/i18n/locales/zh-tw.json index fea60d0fde..0b88424662 100644 --- a/src/renderer/src/i18n/locales/zh-tw.json +++ b/src/renderer/src/i18n/locales/zh-tw.json @@ -2048,8 +2048,9 @@ "error": { "provider": { "cannot_remove_builtin": "不能刪除內建提供者", - "existing": "提供商已存在", - "not_found": "OCR 提供商不存在", + "existing": "提供者已存在", + "get_providers": "取得可用提供者失敗", + "not_found": "OCR 提供者不存在", "update_failed": "更新配置失敗" }, "unknown": "OCR過程發生錯誤" diff --git a/src/renderer/src/i18n/translate/el-gr.json b/src/renderer/src/i18n/translate/el-gr.json index 370f3ce658..c1d289316d 100644 --- a/src/renderer/src/i18n/translate/el-gr.json +++ b/src/renderer/src/i18n/translate/el-gr.json @@ -2041,6 +2041,7 @@ "provider": { "cannot_remove_builtin": "Δεν είναι δυνατή η διαγραφή του ενσωματωμένου παρόχου", "existing": "Ο πάροχος υπηρεσιών υπάρχει ήδη", + "get_providers": "Αποτυχία λήψης διαθέσιμων παρόχων", "not_found": "Ο πάροχος OCR δεν υπάρχει", "update_failed": "Αποτυχία ενημέρωσης της διαμόρφωσης" }, diff --git a/src/renderer/src/i18n/translate/es-es.json b/src/renderer/src/i18n/translate/es-es.json index 0c923c9735..5825c72fd2 100644 --- a/src/renderer/src/i18n/translate/es-es.json +++ b/src/renderer/src/i18n/translate/es-es.json @@ -2041,6 +2041,7 @@ "provider": { "cannot_remove_builtin": "No se puede eliminar el proveedor integrado", "existing": "El proveedor ya existe", + "get_providers": "Error al obtener proveedores disponibles", "not_found": "El proveedor de OCR no existe", "update_failed": "Actualización de la configuración fallida" }, diff --git a/src/renderer/src/i18n/translate/fr-fr.json b/src/renderer/src/i18n/translate/fr-fr.json index 9e36984cbf..e82bbe99e8 100644 --- a/src/renderer/src/i18n/translate/fr-fr.json +++ b/src/renderer/src/i18n/translate/fr-fr.json @@ -2041,6 +2041,7 @@ "provider": { "cannot_remove_builtin": "Impossible de supprimer le fournisseur intégré", "existing": "Le fournisseur existe déjà", + "get_providers": "Échec de l'obtention des fournisseurs disponibles", "not_found": "Le fournisseur OCR n'existe pas", "update_failed": "Échec de la mise à jour de la configuration" }, diff --git a/src/renderer/src/i18n/translate/ja-jp.json b/src/renderer/src/i18n/translate/ja-jp.json index d9db953aef..5600675de2 100644 --- a/src/renderer/src/i18n/translate/ja-jp.json +++ b/src/renderer/src/i18n/translate/ja-jp.json @@ -2041,6 +2041,7 @@ "provider": { "cannot_remove_builtin": "組み込みプロバイダーは削除できません", "existing": "プロバイダーはすでに存在します", + "get_providers": "利用可能なプロバイダーの取得に失敗しました", "not_found": "OCRプロバイダーが存在しません", "update_failed": "更新構成に失敗しました" }, diff --git a/src/renderer/src/i18n/translate/pt-pt.json b/src/renderer/src/i18n/translate/pt-pt.json index 8f46afda0c..c732b90f1a 100644 --- a/src/renderer/src/i18n/translate/pt-pt.json +++ b/src/renderer/src/i18n/translate/pt-pt.json @@ -2041,6 +2041,7 @@ "provider": { "cannot_remove_builtin": "Não é possível excluir o provedor integrado", "existing": "O provedor já existe", + "get_providers": "Falha ao obter provedores disponíveis", "not_found": "O provedor OCR não existe", "update_failed": "Falha ao atualizar a configuração" }, diff --git a/src/renderer/src/i18n/translate/ru-ru.json b/src/renderer/src/i18n/translate/ru-ru.json index 8bf1b823c5..394cc9163a 100644 --- a/src/renderer/src/i18n/translate/ru-ru.json +++ b/src/renderer/src/i18n/translate/ru-ru.json @@ -2041,6 +2041,7 @@ "provider": { "cannot_remove_builtin": "Не удается удалить встроенного поставщика", "existing": "Поставщик уже существует", + "get_providers": "Не удалось получить доступных поставщиков", "not_found": "Поставщик OCR отсутствует", "update_failed": "Обновление конфигурации не удалось" }, diff --git a/src/renderer/src/pages/settings/DocProcessSettings/OcrImageSettings.tsx b/src/renderer/src/pages/settings/DocProcessSettings/OcrImageSettings.tsx index 622d1349d4..9050088d56 100644 --- a/src/renderer/src/pages/settings/DocProcessSettings/OcrImageSettings.tsx +++ b/src/renderer/src/pages/settings/DocProcessSettings/OcrImageSettings.tsx @@ -1,11 +1,14 @@ +import { Alert, Skeleton } from '@heroui/react' import { loggerService } from '@logger' import { ErrorTag } from '@renderer/components/Tags/ErrorTag' import { isMac, isWin } from '@renderer/config/constant' import { useOcrProviders } from '@renderer/hooks/useOcrProvider' import { BuiltinOcrProviderIds, ImageOcrProvider, isImageOcrProvider, OcrProvider } from '@renderer/types' +import { getErrorMessage } from '@renderer/utils' import { Select } from 'antd' -import { useEffect, useMemo } from 'react' +import { useCallback, useEffect, useMemo } from 'react' import { useTranslation } from 'react-i18next' +import useSWRImmutable from 'swr/immutable' import { SettingRow, SettingRowTitle } from '..' @@ -18,10 +21,16 @@ type Props = { const OcrImageSettings = ({ setProvider }: Props) => { const { t } = useTranslation() const { providers, imageProvider, getOcrProviderName, setImageProviderId } = useOcrProviders() + const fetcher = useCallback(() => { + return window.api.ocr.listProviders() + }, []) + + const { data: validProviders, isLoading, error } = useSWRImmutable('ocr/providers', fetcher) const imageProviders = providers.filter((p) => isImageOcrProvider(p)) // 挂载时更新外部状态 + // FIXME: Just keep the imageProvider always valid, so we don't need update it in this component. useEffect(() => { setProvider(imageProvider) }, [imageProvider, setProvider]) @@ -40,12 +49,17 @@ const OcrImageSettings = ({ setProvider }: Props) => { const platformSupport = isMac || isWin const options = useMemo(() => { + if (!validProviders) return [] const platformFilter = platformSupport ? () => true : (p: ImageOcrProvider) => p.id !== BuiltinOcrProviderIds.system - return imageProviders.filter(platformFilter).map((p) => ({ - value: p.id, - label: getOcrProviderName(p) - })) - }, [getOcrProviderName, imageProviders, platformSupport]) + const validFilter = (p: ImageOcrProvider) => validProviders.includes(p.id) + return imageProviders + .filter(platformFilter) + .filter(validFilter) + .map((p) => ({ + value: p.id, + label: getOcrProviderName(p) + })) + }, [getOcrProviderName, imageProviders, platformSupport, validProviders]) const isSystem = imageProvider.id === BuiltinOcrProviderIds.system @@ -55,12 +69,23 @@ const OcrImageSettings = ({ setProvider }: Props) => { {t('settings.tool.ocr.image_provider')}
{!platformSupport && isSystem && } - setImageProvider(id)} + options={options} + /> + )} + {error && ( + + )} +
diff --git a/src/renderer/src/pages/settings/DocProcessSettings/OcrOVSettings.tsx b/src/renderer/src/pages/settings/DocProcessSettings/OcrOVSettings.tsx new file mode 100644 index 0000000000..5075766086 --- /dev/null +++ b/src/renderer/src/pages/settings/DocProcessSettings/OcrOVSettings.tsx @@ -0,0 +1,32 @@ +import { useOcrProvider } from '@renderer/hooks/useOcrProvider' +import { BuiltinOcrProviderIds, isOcrOVProvider } from '@renderer/types' +import { Flex, Tag } from 'antd' +import { useTranslation } from 'react-i18next' + +import { SettingRow, SettingRowTitle } from '..' + +export const OcrOVSettings = () => { + const { t } = useTranslation() + const { provider } = useOcrProvider(BuiltinOcrProviderIds.ovocr) + + if (!isOcrOVProvider(provider)) { + throw new Error('Not OV OCR provider.') + } + + return ( + <> + + + + {t('settings.tool.ocr.common.langs')} + + +
+ 🇬🇧 {t('languages.english')} + 🇨🇳 {t('languages.chinese')} + 🇭🇰 {t('languages.chinese-traditional')} +
+
+ + ) +} diff --git a/src/renderer/src/pages/settings/DocProcessSettings/OcrProviderSettings.tsx b/src/renderer/src/pages/settings/DocProcessSettings/OcrProviderSettings.tsx index 120e5a9e48..482ff2b9e8 100644 --- a/src/renderer/src/pages/settings/DocProcessSettings/OcrProviderSettings.tsx +++ b/src/renderer/src/pages/settings/DocProcessSettings/OcrProviderSettings.tsx @@ -8,6 +8,7 @@ import { Divider, Flex } from 'antd' import styled from 'styled-components' import { SettingGroup, SettingTitle } from '..' +import { OcrOVSettings } from './OcrOVSettings' import { OcrPpocrSettings } from './OcrPpocrSettings' import { OcrSystemSettings } from './OcrSystemSettings' import { OcrTesseractSettings } from './OcrTesseractSettings' @@ -35,6 +36,8 @@ const OcrProviderSettings = ({ provider }: Props) => { return case 'paddleocr': return + case 'ovocr': + return default: return null } diff --git a/src/renderer/src/store/migrate.ts b/src/renderer/src/store/migrate.ts index 362396ca2d..5cca66b47a 100644 --- a/src/renderer/src/store/migrate.ts +++ b/src/renderer/src/store/migrate.ts @@ -2667,6 +2667,15 @@ const migrateConfig = { logger.error('migrate 162 error', error as Error) return state } + }, + '163': (state: RootState) => { + try { + addOcrProvider(state, BUILTIN_OCR_PROVIDERS_MAP.ovocr) + return state + } catch (error) { + logger.error('migrate 163 error', error as Error) + return state + } } } diff --git a/src/renderer/src/types/ocr.ts b/src/renderer/src/types/ocr.ts index d67cba958d..765c4e411c 100644 --- a/src/renderer/src/types/ocr.ts +++ b/src/renderer/src/types/ocr.ts @@ -5,7 +5,8 @@ import { FileMetadata, ImageFileMetadata, isImageFileMetadata, TranslateLanguage export const BuiltinOcrProviderIds = { tesseract: 'tesseract', system: 'system', - paddleocr: 'paddleocr' + paddleocr: 'paddleocr', + ovocr: 'ovocr' } as const export type BuiltinOcrProviderId = keyof typeof BuiltinOcrProviderIds @@ -188,3 +189,19 @@ export type OcrPpocrProvider = { export const isOcrPpocrProvider = (p: OcrProvider): p is OcrPpocrProvider => { return p.id === BuiltinOcrProviderIds.paddleocr } + +// OV OCR Types +export type OcrOvConfig = OcrProviderBaseConfig & { + langs?: TranslateLanguageCode[] +} + +export type OcrOvProvider = { + id: 'ovocr' + config: OcrOvConfig +} & ImageOcrProvider & + // PdfOcrProvider & + BuiltinOcrProvider + +export const isOcrOVProvider = (p: OcrProvider): p is OcrOvProvider => { + return p.id === BuiltinOcrProviderIds.ovocr +}