diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7a007e4e91..2d60f3e75c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -79,6 +79,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} RENDERER_VITE_AIHUBMIX_SECRET: ${{ vars.RENDERER_VITE_AIHUBMIX_SECRET }} NODE_OPTIONS: --max-old-space-size=8192 + MAIN_VITE_MINERU_API_KEY: ${{ vars.MAIN_VITE_MINERU_API_KEY }} - name: Build Mac if: matrix.os == 'macos-latest' @@ -95,6 +96,7 @@ jobs: RENDERER_VITE_AIHUBMIX_SECRET: ${{ vars.RENDERER_VITE_AIHUBMIX_SECRET }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} NODE_OPTIONS: --max-old-space-size=8192 + MAIN_VITE_MINERU_API_KEY: ${{ vars.MAIN_VITE_MINERU_API_KEY }} - name: Build Windows if: matrix.os == 'windows-latest' @@ -105,6 +107,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} RENDERER_VITE_AIHUBMIX_SECRET: ${{ vars.RENDERER_VITE_AIHUBMIX_SECRET }} NODE_OPTIONS: --max-old-space-size=8192 + MAIN_VITE_MINERU_API_KEY: ${{ vars.MAIN_VITE_MINERU_API_KEY }} - name: Release uses: ncipollo/release-action@v1 diff --git a/electron-builder.yml b/electron-builder.yml index 1303a4a3c8..4bcf025c26 100644 --- a/electron-builder.yml +++ b/electron-builder.yml @@ -53,6 +53,8 @@ files: - '!node_modules/pdf-parse/lib/pdf.js/{v1.9.426,v1.10.88,v2.0.550}' - '!node_modules/mammoth/{mammoth.browser.js,mammoth.browser.min.js}' - '!node_modules/selection-hook/prebuilds/**/*' # we rebuild .node, don't use prebuilds + - '!node_modules/pdfjs-dist/web/**/*' + - '!node_modules/pdfjs-dist/legacy/web/*' - '!node_modules/selection-hook/node_modules' # we don't need what in the node_modules dir - '!node_modules/selection-hook/src' # we don't need source files - '!**/*.{h,iobj,ipdb,tlog,recipe,vcxproj,vcxproj.filters,Makefile,*.Makefile}' # filter .node build files diff --git a/electron.vite.config.ts b/electron.vite.config.ts index 770a47d479..2b4c5e6b92 100644 --- a/electron.vite.config.ts +++ b/electron.vite.config.ts @@ -20,7 +20,7 @@ export default defineConfig({ }, build: { rollupOptions: { - external: ['@libsql/client', 'bufferutil', 'utf-8-validate'], + external: ['@libsql/client', 'bufferutil', 'utf-8-validate', '@cherrystudio/mac-system-ocr'], output: { // 彻底禁用代码分割 - 返回 null 强制单文件打包 manualChunks: undefined, diff --git a/package.json b/package.json index 35a85bf162..468ef5138e 100644 --- a/package.json +++ b/package.json @@ -58,6 +58,7 @@ "prepare": "husky" }, "dependencies": { + "@cherrystudio/pdf-to-img-napi": "^0.0.1", "@libsql/client": "0.14.0", "@libsql/win32-x64-msvc": "^0.4.7", "@strongtz/win32-arm64-msvc": "^0.4.7", @@ -66,6 +67,7 @@ "node-stream-zip": "^1.15.0", "notion-helper": "^1.3.22", "os-proxy-config": "^1.1.2", + "pdfjs-dist": "4.10.38", "selection-hook": "^1.0.3", "turndown": "7.2.0" }, @@ -101,6 +103,7 @@ "@kangfenmao/keyv-storage": "^0.1.0", "@langchain/community": "^0.3.36", "@langchain/ollama": "^0.2.1", + "@mistralai/mistralai": "^1.6.0", "@modelcontextprotocol/sdk": "^1.11.4", "@mozilla/readability": "^0.6.0", "@notionhq/client": "^2.2.15", @@ -225,6 +228,9 @@ "word-extractor": "^1.0.4", "zipread": "^1.3.3" }, + "optionalDependencies": { + "@cherrystudio/mac-system-ocr": "^0.2.2" + }, "resolutions": { "pdf-parse@npm:1.1.1": "patch:pdf-parse@npm%3A1.1.1#~/.yarn/patches/pdf-parse-npm-1.1.1-04a6109b2a.patch", "@langchain/openai@npm:^0.3.16": "patch:@langchain/openai@npm%3A0.3.16#~/.yarn/patches/@langchain-openai-npm-0.3.16-e525b59526.patch", diff --git a/packages/shared/IpcChannel.ts b/packages/shared/IpcChannel.ts index 8118065278..7dd60bab06 100644 --- a/packages/shared/IpcChannel.ts +++ b/packages/shared/IpcChannel.ts @@ -118,6 +118,7 @@ export enum IpcChannel { KnowledgeBase_Remove = 'knowledge-base:remove', KnowledgeBase_Search = 'knowledge-base:search', KnowledgeBase_Rerank = 'knowledge-base:rerank', + KnowledgeBase_Check_Quota = 'knowledge-base:check-quota', //file File_Open = 'file:open', @@ -128,9 +129,10 @@ export enum IpcChannel { File_Clear = 'file:clear', File_Read = 'file:read', File_Delete = 'file:delete', + File_DeleteDir = 'file:deleteDir', File_Get = 'file:get', File_SelectFolder = 'file:selectFolder', - File_Create = 'file:create', + File_CreateTempFile = 'file:createTempFile', File_Write = 'file:write', File_WriteWithId = 'file:writeWithId', File_SaveImage = 'file:saveImage', @@ -143,6 +145,12 @@ export enum IpcChannel { File_GetPdfInfo = 'file:getPdfInfo', Fs_Read = 'fs:read', + // file service + FileService_Upload = 'file-service:upload', + FileService_List = 'file-service:list', + FileService_Delete = 'file-service:delete', + FileService_Retrieve = 'file-service:retrieve', + Export_Word = 'export:word', Shortcuts_Update = 'shortcuts:update', diff --git a/packages/shared/config/types.ts b/packages/shared/config/types.ts index 48a76c4778..28bb4acf65 100644 --- a/packages/shared/config/types.ts +++ b/packages/shared/config/types.ts @@ -1,6 +1,11 @@ +import { ProcessingStatus } from '@types' + export type LoaderReturn = { entriesAdded: number uniqueId: string uniqueIds: string[] loaderType: string + status?: ProcessingStatus + message?: string + messageSource?: 'preprocess' | 'embedding' } diff --git a/scripts/after-pack.js b/scripts/after-pack.js index a764642308..4b18d2dacd 100644 --- a/scripts/after-pack.js +++ b/scripts/after-pack.js @@ -23,6 +23,9 @@ exports.default = async function (context) { const node_modules_path = path.join(context.appOutDir, 'resources', 'app.asar.unpacked', 'node_modules') const _arch = arch === Arch.arm64 ? ['linux-arm64-gnu', 'linux-arm64-musl'] : ['linux-x64-gnu', 'linux-x64-musl'] keepPackageNodeFiles(node_modules_path, '@libsql', _arch) + + // 删除 macOS 专用的 OCR 包 + removeMacOnlyPackages(node_modules_path) } if (platform === 'windows') { @@ -35,6 +38,8 @@ exports.default = async function (context) { keepPackageNodeFiles(node_modules_path, '@strongtz', ['win32-x64-msvc']) keepPackageNodeFiles(node_modules_path, '@libsql', ['win32-x64-msvc']) } + + removeMacOnlyPackages(node_modules_path) } if (platform === 'windows') { @@ -43,6 +48,22 @@ exports.default = async function (context) { } } +/** + * 删除 macOS 专用的包 + * @param {string} nodeModulesPath + */ +function removeMacOnlyPackages(nodeModulesPath) { + const macOnlyPackages = ['@cherrystudio/mac-system-ocr'] + + macOnlyPackages.forEach((packageName) => { + const packagePath = path.join(nodeModulesPath, packageName) + if (fs.existsSync(packagePath)) { + fs.rmSync(packagePath, { recursive: true, force: true }) + console.log(`[After Pack] Removed macOS-only package: ${packageName}`) + } + }) +} + /** * 使用指定架构的 node_modules 文件 * @param {*} nodeModulesPath diff --git a/src/main/ipc.ts b/src/main/ipc.ts index 0176a27525..a9c5169096 100644 --- a/src/main/ipc.ts +++ b/src/main/ipc.ts @@ -7,7 +7,7 @@ import { getBinaryPath, isBinaryExists, runInstallScript } from '@main/utils/pro import { handleZoomFactor } from '@main/utils/zoom' import { UpgradeChannel } from '@shared/config/constant' import { IpcChannel } from '@shared/IpcChannel' -import { Shortcut, ThemeMode } from '@types' +import { FileMetadata, Provider, Shortcut, ThemeMode } from '@types' import { BrowserWindow, dialog, ipcMain, session, shell, systemPreferences, webContents } from 'electron' import log from 'electron-log' import { Notification } from 'src/renderer/src/types/notification' @@ -17,8 +17,8 @@ import BackupManager from './services/BackupManager' import { configManager } from './services/ConfigManager' import CopilotService from './services/CopilotService' import { ExportService } from './services/ExportService' -import FileService from './services/FileService' import FileStorage from './services/FileStorage' +import FileService from './services/FileSystemService' import KnowledgeService from './services/KnowledgeService' import mcpService from './services/MCPService' import NotificationService from './services/NotificationService' @@ -26,6 +26,7 @@ import * as NutstoreService from './services/NutstoreService' import ObsidianVaultService from './services/ObsidianVaultService' import { ProxyConfig, proxyManager } from './services/ProxyManager' import { pythonService } from './services/PythonService' +import { FileServiceManager } from './services/remotefile/FileServiceManager' import { searchService } from './services/SearchService' import { SelectionService } from './services/SelectionService' import { registerShortcuts, unregisterAllShortcuts } from './services/ShortcutService' @@ -377,9 +378,10 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) { ipcMain.handle(IpcChannel.File_Clear, fileManager.clear) ipcMain.handle(IpcChannel.File_Read, fileManager.readFile) ipcMain.handle(IpcChannel.File_Delete, fileManager.deleteFile) + ipcMain.handle('file:deleteDir', fileManager.deleteDir) ipcMain.handle(IpcChannel.File_Get, fileManager.getFile) ipcMain.handle(IpcChannel.File_SelectFolder, fileManager.selectFolder) - ipcMain.handle(IpcChannel.File_Create, fileManager.createTempFile) + ipcMain.handle(IpcChannel.File_CreateTempFile, fileManager.createTempFile) ipcMain.handle(IpcChannel.File_Write, fileManager.writeFile) ipcMain.handle(IpcChannel.File_WriteWithId, fileManager.writeFileWithId) ipcMain.handle(IpcChannel.File_SaveImage, fileManager.saveImage) @@ -391,6 +393,27 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) { ipcMain.handle(IpcChannel.File_Copy, fileManager.copyFile) ipcMain.handle(IpcChannel.File_BinaryImage, fileManager.binaryImage) + // file service + ipcMain.handle(IpcChannel.FileService_Upload, async (_, provider: Provider, file: FileMetadata) => { + const service = FileServiceManager.getInstance().getService(provider) + return await service.uploadFile(file) + }) + + ipcMain.handle(IpcChannel.FileService_List, async (_, provider: Provider) => { + const service = FileServiceManager.getInstance().getService(provider) + return await service.listFiles() + }) + + ipcMain.handle(IpcChannel.FileService_Delete, async (_, provider: Provider, fileId: string) => { + const service = FileServiceManager.getInstance().getService(provider) + return await service.deleteFile(fileId) + }) + + ipcMain.handle(IpcChannel.FileService_Retrieve, async (_, provider: Provider, fileId: string) => { + const service = FileServiceManager.getInstance().getService(provider) + return await service.retrieveFile(fileId) + }) + // fs ipcMain.handle(IpcChannel.Fs_Read, FileService.readFile) @@ -420,6 +443,7 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) { ipcMain.handle(IpcChannel.KnowledgeBase_Remove, KnowledgeService.remove) ipcMain.handle(IpcChannel.KnowledgeBase_Search, KnowledgeService.search) ipcMain.handle(IpcChannel.KnowledgeBase_Rerank, KnowledgeService.rerank) + ipcMain.handle(IpcChannel.KnowledgeBase_Check_Quota, KnowledgeService.checkQuota) // window ipcMain.handle(IpcChannel.Windows_SetMinimumSize, (_, width: number, height: number) => { diff --git a/src/main/knowledage/loader/index.ts b/src/main/knowledage/loader/index.ts index ba66b33e3d..783e62881a 100644 --- a/src/main/knowledage/loader/index.ts +++ b/src/main/knowledage/loader/index.ts @@ -4,7 +4,7 @@ import { JsonLoader, LocalPathLoader, RAGApplication, TextLoader } from '@cherry import type { AddLoaderReturn } from '@cherrystudio/embedjs-interfaces' import { WebLoader } from '@cherrystudio/embedjs-loader-web' import { LoaderReturn } from '@shared/config/types' -import { FileType, KnowledgeBaseParams } from '@types' +import { FileMetadata, KnowledgeBaseParams } from '@types' import Logger from 'electron-log' import { DraftsExportLoader } from './draftsExportLoader' @@ -39,7 +39,7 @@ const FILE_LOADER_MAP: Record = { export async function addOdLoader( ragApplication: RAGApplication, - file: FileType, + file: FileMetadata, base: KnowledgeBaseParams, forceReload: boolean ): Promise { @@ -65,7 +65,7 @@ export async function addOdLoader( export async function addFileLoader( ragApplication: RAGApplication, - file: FileType, + file: FileMetadata, base: KnowledgeBaseParams, forceReload: boolean ): Promise { diff --git a/src/main/ocr/BaseOcrProvider.ts b/src/main/ocr/BaseOcrProvider.ts new file mode 100644 index 0000000000..1bc7ce8530 --- /dev/null +++ b/src/main/ocr/BaseOcrProvider.ts @@ -0,0 +1,122 @@ +import fs from 'node:fs' +import path from 'node:path' + +import { windowService } from '@main/services/WindowService' +import { getFileExt } from '@main/utils/file' +import { FileMetadata, OcrProvider } from '@types' +import { app } from 'electron' +import { TypedArray } from 'pdfjs-dist/types/src/display/api' + +export default abstract class BaseOcrProvider { + protected provider: OcrProvider + public storageDir = path.join(app.getPath('userData'), 'Data', 'Files') + + constructor(provider: OcrProvider) { + if (!provider) { + throw new Error('OCR provider is not set') + } + this.provider = provider + } + abstract parseFile(sourceId: string, file: FileMetadata): Promise<{ processedFile: FileMetadata; quota?: number }> + + /** + * 检查文件是否已经被预处理过 + * 统一检测方法:如果 Data/Files/{file.id} 是目录,说明已被预处理 + * @param file 文件信息 + * @returns 如果已处理返回处理后的文件信息,否则返回null + */ + public async checkIfAlreadyProcessed(file: FileMetadata): Promise { + try { + // 检查 Data/Files/{file.id} 是否是目录 + const preprocessDirPath = path.join(this.storageDir, file.id) + + if (fs.existsSync(preprocessDirPath)) { + const stats = await fs.promises.stat(preprocessDirPath) + + // 如果是目录,说明已经被预处理过 + if (stats.isDirectory()) { + // 查找目录中的处理结果文件 + const files = await fs.promises.readdir(preprocessDirPath) + + // 查找主要的处理结果文件(.md 或 .txt) + const processedFile = files.find((fileName) => fileName.endsWith('.md') || fileName.endsWith('.txt')) + + if (processedFile) { + const processedFilePath = path.join(preprocessDirPath, processedFile) + const processedStats = await fs.promises.stat(processedFilePath) + const ext = getFileExt(processedFile) + + return { + ...file, + name: file.name.replace(file.ext, ext), + path: processedFilePath, + ext: ext, + size: processedStats.size, + created_at: processedStats.birthtime.toISOString() + } + } + } + } + + return null + } catch (error) { + // 如果检查过程中出现错误,返回null表示未处理 + return null + } + } + + /** + * 辅助方法:延迟执行 + */ + public delay = (ms: number): Promise => { + return new Promise((resolve) => setTimeout(resolve, ms)) + } + + public async readPdf( + source: string | URL | TypedArray, + passwordCallback?: (fn: (password: string) => void, reason: string) => string + ) { + const { getDocument } = await import('pdfjs-dist/legacy/build/pdf.mjs') + const documentLoadingTask = getDocument(source) + if (passwordCallback) { + documentLoadingTask.onPassword = passwordCallback + } + + const document = await documentLoadingTask.promise + return document + } + + public async sendOcrProgress(sourceId: string, progress: number): Promise { + const mainWindow = windowService.getMainWindow() + mainWindow?.webContents.send('file-ocr-progress', { + itemId: sourceId, + progress: progress + }) + } + + /** + * 将文件移动到附件目录 + * @param fileId 文件id + * @param filePaths 需要移动的文件路径数组 + * @returns 移动后的文件路径数组 + */ + public moveToAttachmentsDir(fileId: string, filePaths: string[]): string[] { + const attachmentsPath = path.join(this.storageDir, fileId) + if (!fs.existsSync(attachmentsPath)) { + fs.mkdirSync(attachmentsPath, { recursive: true }) + } + + const movedPaths: string[] = [] + + for (const filePath of filePaths) { + if (fs.existsSync(filePath)) { + const fileName = path.basename(filePath) + const destPath = path.join(attachmentsPath, fileName) + fs.copyFileSync(filePath, destPath) + fs.unlinkSync(filePath) // 删除原文件,实现"移动" + movedPaths.push(destPath) + } + } + return movedPaths + } +} diff --git a/src/main/ocr/DefaultOcrProvider.ts b/src/main/ocr/DefaultOcrProvider.ts new file mode 100644 index 0000000000..83c8d51c91 --- /dev/null +++ b/src/main/ocr/DefaultOcrProvider.ts @@ -0,0 +1,12 @@ +import { FileMetadata, OcrProvider } from '@types' + +import BaseOcrProvider from './BaseOcrProvider' + +export default class DefaultOcrProvider extends BaseOcrProvider { + constructor(provider: OcrProvider) { + super(provider) + } + public parseFile(): Promise<{ processedFile: FileMetadata }> { + throw new Error('Method not implemented.') + } +} diff --git a/src/main/ocr/MacSysOcrProvider.ts b/src/main/ocr/MacSysOcrProvider.ts new file mode 100644 index 0000000000..df281eb60b --- /dev/null +++ b/src/main/ocr/MacSysOcrProvider.ts @@ -0,0 +1,128 @@ +import { isMac } from '@main/constant' +import { FileMetadata, OcrProvider } from '@types' +import Logger from 'electron-log' +import * as fs from 'fs' +import * as path from 'path' +import { TextItem } from 'pdfjs-dist/types/src/display/api' + +import BaseOcrProvider from './BaseOcrProvider' + +export default class MacSysOcrProvider extends BaseOcrProvider { + private readonly MIN_TEXT_LENGTH = 1000 + private MacOCR: any + + private async initMacOCR() { + if (!isMac) { + throw new Error('MacSysOcrProvider is only available on macOS') + } + if (!this.MacOCR) { + try { + // @ts-ignore This module is optional and only installed/available on macOS. Runtime checks prevent execution on other platforms. + const module = await import('@cherrystudio/mac-system-ocr') + this.MacOCR = module.default + } catch (error) { + Logger.error('[OCR] Failed to load mac-system-ocr:', error) + throw error + } + } + return this.MacOCR + } + + private getRecognitionLevel(level?: number) { + return level === 0 ? this.MacOCR.RECOGNITION_LEVEL_FAST : this.MacOCR.RECOGNITION_LEVEL_ACCURATE + } + + constructor(provider: OcrProvider) { + super(provider) + } + + private async processPages( + results: any, + totalPages: number, + sourceId: string, + writeStream: fs.WriteStream + ): Promise { + await this.initMacOCR() + // TODO: 下个版本后面使用批处理,以及p-queue来优化 + for (let i = 0; i < totalPages; i++) { + // Convert pages to buffers + const pageNum = i + 1 + const pageBuffer = await results.getPage(pageNum) + + // Process batch + const ocrResult = await this.MacOCR.recognizeFromBuffer(pageBuffer, { + ocrOptions: { + recognitionLevel: this.getRecognitionLevel(this.provider.options?.recognitionLevel), + minConfidence: this.provider.options?.minConfidence || 0.5 + } + }) + + // Write results in order + writeStream.write(ocrResult.text + '\n') + + // Update progress + await this.sendOcrProgress(sourceId, (pageNum / totalPages) * 100) + } + } + + public async isScanPdf(buffer: Buffer): Promise { + const doc = await this.readPdf(new Uint8Array(buffer)) + const pageLength = doc.numPages + let counts = 0 + const pagesToCheck = Math.min(pageLength, 10) + for (let i = 0; i < pagesToCheck; i++) { + const page = await doc.getPage(i + 1) + const pageData = await page.getTextContent() + const pageText = pageData.items.map((item) => (item as TextItem).str).join('') + counts += pageText.length + if (counts >= this.MIN_TEXT_LENGTH) { + return false + } + } + return true + } + + public async parseFile(sourceId: string, file: FileMetadata): Promise<{ processedFile: FileMetadata }> { + Logger.info(`[OCR] Starting OCR process for file: ${file.name}`) + if (file.ext === '.pdf') { + try { + const { pdf } = await import('@cherrystudio/pdf-to-img-napi') + const pdfBuffer = await fs.promises.readFile(file.path) + const results = await pdf(pdfBuffer, { + scale: 2 + }) + const totalPages = results.length + + const baseDir = path.dirname(file.path) + const baseName = path.basename(file.path, path.extname(file.path)) + const txtFileName = `${baseName}.txt` + const txtFilePath = path.join(baseDir, txtFileName) + + const writeStream = fs.createWriteStream(txtFilePath) + await this.processPages(results, totalPages, sourceId, writeStream) + + await new Promise((resolve, reject) => { + writeStream.end(() => { + Logger.info(`[OCR] OCR process completed successfully for ${file.origin_name}`) + resolve() + }) + writeStream.on('error', reject) + }) + const movedPaths = this.moveToAttachmentsDir(file.id, [txtFilePath]) + return { + processedFile: { + ...file, + name: txtFileName, + path: movedPaths[0], + ext: '.txt', + size: fs.statSync(movedPaths[0]).size + } + } + } catch (error) { + Logger.error('[OCR] Error during OCR process:', error) + throw error + } + } + return { processedFile: file } + } +} diff --git a/src/main/ocr/OcrProvider.ts b/src/main/ocr/OcrProvider.ts new file mode 100644 index 0000000000..07587f01e0 --- /dev/null +++ b/src/main/ocr/OcrProvider.ts @@ -0,0 +1,26 @@ +import { FileMetadata, OcrProvider as Provider } from '@types' + +import BaseOcrProvider from './BaseOcrProvider' +import OcrProviderFactory from './OcrProviderFactory' + +export default class OcrProvider { + private sdk: BaseOcrProvider + constructor(provider: Provider) { + this.sdk = OcrProviderFactory.create(provider) + } + public async parseFile( + sourceId: string, + file: FileMetadata + ): Promise<{ processedFile: FileMetadata; quota?: number }> { + return this.sdk.parseFile(sourceId, file) + } + + /** + * 检查文件是否已经被预处理过 + * @param file 文件信息 + * @returns 如果已处理返回处理后的文件信息,否则返回null + */ + public async checkIfAlreadyProcessed(file: FileMetadata): Promise { + return this.sdk.checkIfAlreadyProcessed(file) + } +} diff --git a/src/main/ocr/OcrProviderFactory.ts b/src/main/ocr/OcrProviderFactory.ts new file mode 100644 index 0000000000..96d95a63ad --- /dev/null +++ b/src/main/ocr/OcrProviderFactory.ts @@ -0,0 +1,20 @@ +import { isMac } from '@main/constant' +import { OcrProvider } from '@types' +import Logger from 'electron-log' + +import BaseOcrProvider from './BaseOcrProvider' +import DefaultOcrProvider from './DefaultOcrProvider' +import MacSysOcrProvider from './MacSysOcrProvider' +export default class OcrProviderFactory { + static create(provider: OcrProvider): BaseOcrProvider { + switch (provider.id) { + case 'system': + if (!isMac) { + Logger.warn('[OCR] System OCR provider is only available on macOS') + } + return new MacSysOcrProvider(provider) + default: + return new DefaultOcrProvider(provider) + } + } +} diff --git a/src/main/preprocess/BasePreprocessProvider.ts b/src/main/preprocess/BasePreprocessProvider.ts new file mode 100644 index 0000000000..016e4d10d0 --- /dev/null +++ b/src/main/preprocess/BasePreprocessProvider.ts @@ -0,0 +1,126 @@ +import fs from 'node:fs' +import path from 'node:path' + +import { windowService } from '@main/services/WindowService' +import { getFileExt } from '@main/utils/file' +import { FileMetadata, PreprocessProvider } from '@types' +import { app } from 'electron' +import { TypedArray } from 'pdfjs-dist/types/src/display/api' + +export default abstract class BasePreprocessProvider { + protected provider: PreprocessProvider + protected userId?: string + public storageDir = path.join(app.getPath('userData'), 'Data', 'Files') + + constructor(provider: PreprocessProvider, userId?: string) { + if (!provider) { + throw new Error('Preprocess provider is not set') + } + this.provider = provider + this.userId = userId + } + abstract parseFile(sourceId: string, file: FileMetadata): Promise<{ processedFile: FileMetadata; quota?: number }> + + abstract checkQuota(): Promise + + /** + * 检查文件是否已经被预处理过 + * 统一检测方法:如果 Data/Files/{file.id} 是目录,说明已被预处理 + * @param file 文件信息 + * @returns 如果已处理返回处理后的文件信息,否则返回null + */ + public async checkIfAlreadyProcessed(file: FileMetadata): Promise { + try { + // 检查 Data/Files/{file.id} 是否是目录 + const preprocessDirPath = path.join(this.storageDir, file.id) + + if (fs.existsSync(preprocessDirPath)) { + const stats = await fs.promises.stat(preprocessDirPath) + + // 如果是目录,说明已经被预处理过 + if (stats.isDirectory()) { + // 查找目录中的处理结果文件 + const files = await fs.promises.readdir(preprocessDirPath) + + // 查找主要的处理结果文件(.md 或 .txt) + const processedFile = files.find((fileName) => fileName.endsWith('.md') || fileName.endsWith('.txt')) + + if (processedFile) { + const processedFilePath = path.join(preprocessDirPath, processedFile) + const processedStats = await fs.promises.stat(processedFilePath) + const ext = getFileExt(processedFile) + + return { + ...file, + name: file.name.replace(file.ext, ext), + path: processedFilePath, + ext: ext, + size: processedStats.size, + created_at: processedStats.birthtime.toISOString() + } + } + } + } + + return null + } catch (error) { + // 如果检查过程中出现错误,返回null表示未处理 + return null + } + } + + /** + * 辅助方法:延迟执行 + */ + public delay = (ms: number): Promise => { + return new Promise((resolve) => setTimeout(resolve, ms)) + } + + public async readPdf( + source: string | URL | TypedArray, + passwordCallback?: (fn: (password: string) => void, reason: string) => string + ) { + const { getDocument } = await import('pdfjs-dist/legacy/build/pdf.mjs') + const documentLoadingTask = getDocument(source) + if (passwordCallback) { + documentLoadingTask.onPassword = passwordCallback + } + + const document = await documentLoadingTask.promise + return document + } + + public async sendPreprocessProgress(sourceId: string, progress: number): Promise { + const mainWindow = windowService.getMainWindow() + mainWindow?.webContents.send('file-preprocess-progress', { + itemId: sourceId, + progress: progress + }) + } + + /** + * 将文件移动到附件目录 + * @param fileId 文件id + * @param filePaths 需要移动的文件路径数组 + * @returns 移动后的文件路径数组 + */ + public moveToAttachmentsDir(fileId: string, filePaths: string[]): string[] { + const attachmentsPath = path.join(this.storageDir, fileId) + if (!fs.existsSync(attachmentsPath)) { + fs.mkdirSync(attachmentsPath, { recursive: true }) + } + + const movedPaths: string[] = [] + + for (const filePath of filePaths) { + if (fs.existsSync(filePath)) { + const fileName = path.basename(filePath) + const destPath = path.join(attachmentsPath, fileName) + fs.copyFileSync(filePath, destPath) + fs.unlinkSync(filePath) // 删除原文件,实现"移动" + movedPaths.push(destPath) + } + } + return movedPaths + } +} diff --git a/src/main/preprocess/DefaultPreprocessProvider.ts b/src/main/preprocess/DefaultPreprocessProvider.ts new file mode 100644 index 0000000000..3899a3d25a --- /dev/null +++ b/src/main/preprocess/DefaultPreprocessProvider.ts @@ -0,0 +1,16 @@ +import { FileMetadata, PreprocessProvider } from '@types' + +import BasePreprocessProvider from './BasePreprocessProvider' + +export default class DefaultPreprocessProvider extends BasePreprocessProvider { + constructor(provider: PreprocessProvider) { + super(provider) + } + public parseFile(): Promise<{ processedFile: FileMetadata }> { + throw new Error('Method not implemented.') + } + + public checkQuota(): Promise { + throw new Error('Method not implemented.') + } +} diff --git a/src/main/preprocess/Doc2xPreprocessProvider.ts b/src/main/preprocess/Doc2xPreprocessProvider.ts new file mode 100644 index 0000000000..ad311a5b83 --- /dev/null +++ b/src/main/preprocess/Doc2xPreprocessProvider.ts @@ -0,0 +1,329 @@ +import fs from 'node:fs' +import path from 'node:path' + +import { FileMetadata, PreprocessProvider } from '@types' +import AdmZip from 'adm-zip' +import axios, { AxiosRequestConfig } from 'axios' +import Logger from 'electron-log' + +import BasePreprocessProvider from './BasePreprocessProvider' + +type ApiResponse = { + code: string + data: T + message?: string +} + +type PreuploadResponse = { + uid: string + url: string +} + +type StatusResponse = { + status: string + progress: number +} + +type ParsedFileResponse = { + status: string + url: string +} + +export default class Doc2xPreprocessProvider extends BasePreprocessProvider { + constructor(provider: PreprocessProvider) { + super(provider) + } + + private async validateFile(filePath: string): Promise { + const pdfBuffer = await fs.promises.readFile(filePath) + + const doc = await this.readPdf(new Uint8Array(pdfBuffer)) + + // 文件页数小于1000页 + if (doc.numPages >= 1000) { + throw new Error(`PDF page count (${doc.numPages}) exceeds the limit of 1000 pages`) + } + // 文件大小小于300MB + if (pdfBuffer.length >= 300 * 1024 * 1024) { + const fileSizeMB = Math.round(pdfBuffer.length / (1024 * 1024)) + throw new Error(`PDF file size (${fileSizeMB}MB) exceeds the limit of 300MB`) + } + } + + public async parseFile(sourceId: string, file: FileMetadata): Promise<{ processedFile: FileMetadata }> { + try { + Logger.info(`Preprocess processing started: ${file.path}`) + + // 步骤1: 准备上传 + const { uid, url } = await this.preupload() + Logger.info(`Preprocess preupload completed: uid=${uid}`) + + await this.validateFile(file.path) + + // 步骤2: 上传文件 + await this.putFile(file.path, url) + + // 步骤3: 等待处理完成 + await this.waitForProcessing(sourceId, uid) + Logger.info(`Preprocess parsing completed successfully for: ${file.path}`) + + // 步骤4: 导出文件 + const { path: outputPath } = await this.exportFile(file, uid) + + // 步骤5: 创建处理后的文件信息 + return { + processedFile: this.createProcessedFileInfo(file, outputPath) + } + } catch (error) { + Logger.error( + `Preprocess processing failed for ${file.path}: ${error instanceof Error ? error.message : String(error)}` + ) + throw error + } + } + + private createProcessedFileInfo(file: FileMetadata, outputPath: string): FileMetadata { + const outputFilePath = `${outputPath}/${file.name.split('.').slice(0, -1).join('.')}.md` + return { + ...file, + name: file.name.replace('.pdf', '.md'), + path: outputFilePath, + ext: '.md', + size: fs.statSync(outputFilePath).size + } + } + + /** + * 导出文件 + * @param file 文件信息 + * @param uid 预上传响应的uid + * @returns 导出文件的路径 + */ + public async exportFile(file: FileMetadata, uid: string): Promise<{ path: string }> { + Logger.info(`Exporting file: ${file.path}`) + + // 步骤1: 转换文件 + await this.convertFile(uid, file.path) + Logger.info(`File conversion completed for: ${file.path}`) + + // 步骤2: 等待导出并获取URL + const exportUrl = await this.waitForExport(uid) + + // 步骤3: 下载并解压文件 + return this.downloadFile(exportUrl, file) + } + + /** + * 等待处理完成 + * @param sourceId 源文件ID + * @param uid 预上传响应的uid + */ + private async waitForProcessing(sourceId: string, uid: string): Promise { + while (true) { + await this.delay(1000) + const { status, progress } = await this.getStatus(uid) + await this.sendPreprocessProgress(sourceId, progress) + Logger.info(`Preprocess processing status: ${status}, progress: ${progress}%`) + + if (status === 'success') { + return + } else if (status === 'failed') { + throw new Error('Preprocess processing failed') + } + } + } + + /** + * 等待导出完成 + * @param uid 预上传响应的uid + * @returns 导出文件的url + */ + private async waitForExport(uid: string): Promise { + while (true) { + await this.delay(1000) + const { status, url } = await this.getParsedFile(uid) + Logger.info(`Export status: ${status}`) + + if (status === 'success' && url) { + return url + } else if (status === 'failed') { + throw new Error('Export failed') + } + } + } + + /** + * 预上传文件 + * @returns 预上传响应的url和uid + */ + private async preupload(): Promise { + const config = this.createAuthConfig() + const endpoint = `${this.provider.apiHost}/api/v2/parse/preupload` + + try { + const { data } = await axios.post>(endpoint, null, config) + + if (data.code === 'success' && data.data) { + return data.data + } else { + throw new Error(`API returned error: ${data.message || JSON.stringify(data)}`) + } + } catch (error) { + Logger.error(`Failed to get preupload URL: ${error instanceof Error ? error.message : String(error)}`) + throw new Error('Failed to get preupload URL') + } + } + + /** + * 上传文件 + * @param filePath 文件路径 + * @param url 预上传响应的url + */ + private async putFile(filePath: string, url: string): Promise { + try { + const fileStream = fs.createReadStream(filePath) + const response = await axios.put(url, fileStream) + + if (response.status !== 200) { + throw new Error(`HTTP status ${response.status}: ${response.statusText}`) + } + } catch (error) { + Logger.error(`Failed to upload file ${filePath}: ${error instanceof Error ? error.message : String(error)}`) + throw new Error('Failed to upload file') + } + } + + private async getStatus(uid: string): Promise { + const config = this.createAuthConfig() + const endpoint = `${this.provider.apiHost}/api/v2/parse/status?uid=${uid}` + + try { + const response = await axios.get>(endpoint, config) + + if (response.data.code === 'success' && response.data.data) { + return response.data.data + } else { + throw new Error(`API returned error: ${response.data.message || JSON.stringify(response.data)}`) + } + } catch (error) { + Logger.error(`Failed to get status for uid ${uid}: ${error instanceof Error ? error.message : String(error)}`) + throw new Error('Failed to get processing status') + } + } + + /** + * Preprocess文件 + * @param uid 预上传响应的uid + * @param filePath 文件路径 + */ + private async convertFile(uid: string, filePath: string): Promise { + const fileName = path.basename(filePath).split('.')[0] + const config = { + ...this.createAuthConfig(), + headers: { + ...this.createAuthConfig().headers, + 'Content-Type': 'application/json' + } + } + + const payload = { + uid, + to: 'md', + formula_mode: 'normal', + filename: fileName + } + + const endpoint = `${this.provider.apiHost}/api/v2/convert/parse` + + try { + const response = await axios.post>(endpoint, payload, config) + + if (response.data.code !== 'success') { + throw new Error(`API returned error: ${response.data.message || JSON.stringify(response.data)}`) + } + } catch (error) { + Logger.error(`Failed to convert file ${filePath}: ${error instanceof Error ? error.message : String(error)}`) + throw new Error('Failed to convert file') + } + } + + /** + * 获取解析后的文件信息 + * @param uid 预上传响应的uid + * @returns 解析后的文件信息 + */ + private async getParsedFile(uid: string): Promise { + const config = this.createAuthConfig() + const endpoint = `${this.provider.apiHost}/api/v2/convert/parse/result?uid=${uid}` + + try { + const response = await axios.get>(endpoint, config) + + if (response.status === 200 && response.data.data) { + return response.data.data + } else { + throw new Error(`HTTP status ${response.status}: ${response.statusText}`) + } + } catch (error) { + Logger.error( + `Failed to get parsed file for uid ${uid}: ${error instanceof Error ? error.message : String(error)}` + ) + throw new Error('Failed to get parsed file information') + } + } + + /** + * 下载文件 + * @param url 导出文件的url + * @param file 文件信息 + * @returns 下载文件的路径 + */ + private async downloadFile(url: string, file: FileMetadata): Promise<{ path: string }> { + const dirPath = this.storageDir + // 使用统一的存储路径:Data/Files/{file.id}/ + const extractPath = path.join(dirPath, file.id) + const zipPath = path.join(dirPath, `${file.id}.zip`) + + // 确保目录存在 + fs.mkdirSync(dirPath, { recursive: true }) + fs.mkdirSync(extractPath, { recursive: true }) + + Logger.info(`Downloading to export path: ${zipPath}`) + + try { + // 下载文件 + const response = await axios.get(url, { responseType: 'arraybuffer' }) + fs.writeFileSync(zipPath, response.data) + + // 确保提取目录存在 + if (!fs.existsSync(extractPath)) { + fs.mkdirSync(extractPath, { recursive: true }) + } + + // 解压文件 + const zip = new AdmZip(zipPath) + zip.extractAllTo(extractPath, true) + Logger.info(`Extracted files to: ${extractPath}`) + + // 删除临时ZIP文件 + fs.unlinkSync(zipPath) + + return { path: extractPath } + } catch (error) { + Logger.error(`Failed to download and extract file: ${error instanceof Error ? error.message : String(error)}`) + throw new Error('Failed to download and extract file') + } + } + + private createAuthConfig(): AxiosRequestConfig { + return { + headers: { + Authorization: `Bearer ${this.provider.apiKey}` + } + } + } + + public checkQuota(): Promise { + throw new Error('Method not implemented.') + } +} diff --git a/src/main/preprocess/MineruPreprocessProvider.ts b/src/main/preprocess/MineruPreprocessProvider.ts new file mode 100644 index 0000000000..a0a9c65417 --- /dev/null +++ b/src/main/preprocess/MineruPreprocessProvider.ts @@ -0,0 +1,399 @@ +import fs from 'node:fs' +import path from 'node:path' + +import { FileMetadata, PreprocessProvider } from '@types' +import AdmZip from 'adm-zip' +import axios from 'axios' +import Logger from 'electron-log' + +import BasePreprocessProvider from './BasePreprocessProvider' + +type ApiResponse = { + code: number + data: T + msg?: string + trace_id?: string +} + +type BatchUploadResponse = { + batch_id: string + file_urls: string[] +} + +type ExtractProgress = { + extracted_pages: number + total_pages: number + start_time: string +} + +type ExtractFileResult = { + file_name: string + state: 'done' | 'waiting-file' | 'pending' | 'running' | 'converting' | 'failed' + err_msg: string + full_zip_url?: string + extract_progress?: ExtractProgress +} + +type ExtractResultResponse = { + batch_id: string + extract_result: ExtractFileResult[] +} + +type QuotaResponse = { + code: number + data: { + user_left_quota: number + total_left_quota: number + } + msg?: string + trace_id?: string +} + +export default class MineruPreprocessProvider extends BasePreprocessProvider { + constructor(provider: PreprocessProvider, userId?: string) { + super(provider, userId) + // todo:免费期结束后删除 + this.provider.apiKey = this.provider.apiKey || import.meta.env.MAIN_VITE_MINERU_API_KEY + } + + public async parseFile( + sourceId: string, + file: FileMetadata + ): Promise<{ processedFile: FileMetadata; quota: number }> { + try { + Logger.info(`MinerU preprocess processing started: ${file.path}`) + await this.validateFile(file.path) + + // 1. 获取上传URL并上传文件 + const batchId = await this.uploadFile(file) + Logger.info(`MinerU file upload completed: batch_id=${batchId}`) + + // 2. 等待处理完成并获取结果 + const extractResult = await this.waitForCompletion(sourceId, batchId, file.origin_name) + Logger.info(`MinerU processing completed for batch: ${batchId}`) + + // 3. 下载并解压文件 + const { path: outputPath } = await this.downloadAndExtractFile(extractResult.full_zip_url!, file) + + // 4. check quota + const quota = await this.checkQuota() + + // 5. 创建处理后的文件信息 + return { + processedFile: this.createProcessedFileInfo(file, outputPath), + quota + } + } catch (error: any) { + Logger.error(`MinerU preprocess processing failed for ${file.path}: ${error.message}`) + throw new Error(error.message) + } + } + + public async checkQuota() { + try { + const quota = await fetch(`${this.provider.apiHost}/api/v4/quota`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${this.provider.apiKey}`, + token: this.userId ?? '' + } + }) + if (!quota.ok) { + throw new Error(`HTTP ${quota.status}: ${quota.statusText}`) + } + const response: QuotaResponse = await quota.json() + return response.data.user_left_quota + } catch (error) { + console.error('Error checking quota:', error) + throw error + } + } + + private async validateFile(filePath: string): Promise { + const quota = await this.checkQuota() + const pdfBuffer = await fs.promises.readFile(filePath) + + const doc = await this.readPdf(new Uint8Array(pdfBuffer)) + + // 文件页数小于600页 + if (doc.numPages >= 600) { + throw new Error(`PDF page count (${doc.numPages}) exceeds the limit of 600 pages`) + } + // 文件大小小于200MB + if (pdfBuffer.length >= 200 * 1024 * 1024) { + const fileSizeMB = Math.round(pdfBuffer.length / (1024 * 1024)) + throw new Error(`PDF file size (${fileSizeMB}MB) exceeds the limit of 200MB`) + } + // 检查配额 + if (quota <= 0 || quota - doc.numPages <= 0) { + throw new Error('MinerU解析配额不足,请申请企业账户或自行部署,剩余额度:' + quota) + } + } + + private createProcessedFileInfo(file: FileMetadata, outputPath: string): FileMetadata { + // 查找解压后的主要文件 + let finalPath = '' + let finalName = file.origin_name.replace('.pdf', '.md') + + try { + const files = fs.readdirSync(outputPath) + + const mdFile = files.find((f) => f.endsWith('.md')) + if (mdFile) { + const originalMdPath = path.join(outputPath, mdFile) + const newMdPath = path.join(outputPath, finalName) + + // 重命名文件为原始文件名 + try { + fs.renameSync(originalMdPath, newMdPath) + finalPath = newMdPath + Logger.info(`Renamed markdown file from ${mdFile} to ${finalName}`) + } catch (renameError) { + Logger.warn(`Failed to rename file ${mdFile} to ${finalName}: ${renameError}`) + // 如果重命名失败,使用原文件 + finalPath = originalMdPath + finalName = mdFile + } + } + } catch (error) { + Logger.warn(`Failed to read output directory ${outputPath}: ${error}`) + finalPath = path.join(outputPath, `${file.id}.md`) + } + + return { + ...file, + name: finalName, + path: finalPath, + ext: '.md', + size: fs.existsSync(finalPath) ? fs.statSync(finalPath).size : 0 + } + } + + private async downloadAndExtractFile(zipUrl: string, file: FileMetadata): Promise<{ path: string }> { + const dirPath = this.storageDir + + const zipPath = path.join(dirPath, `${file.id}.zip`) + const extractPath = path.join(dirPath, `${file.id}`) + + Logger.info(`Downloading MinerU result to: ${zipPath}`) + + try { + // 下载ZIP文件 + const response = await axios.get(zipUrl, { responseType: 'arraybuffer' }) + fs.writeFileSync(zipPath, response.data) + Logger.info(`Downloaded ZIP file: ${zipPath}`) + + // 确保提取目录存在 + if (!fs.existsSync(extractPath)) { + fs.mkdirSync(extractPath, { recursive: true }) + } + + // 解压文件 + const zip = new AdmZip(zipPath) + zip.extractAllTo(extractPath, true) + Logger.info(`Extracted files to: ${extractPath}`) + + // 删除临时ZIP文件 + fs.unlinkSync(zipPath) + + return { path: extractPath } + } catch (error: any) { + Logger.error(`Failed to download and extract file: ${error.message}`) + throw new Error(error.message) + } + } + + private async uploadFile(file: FileMetadata): Promise { + try { + // 步骤1: 获取上传URL + const { batchId, fileUrls } = await this.getBatchUploadUrls(file) + Logger.info(`Got upload URLs for batch: ${batchId}`) + + console.log('batchId:', batchId, 'fileurls:', fileUrls) + // 步骤2: 上传文件到获取的URL + await this.putFileToUrl(file.path, fileUrls[0]) + Logger.info(`File uploaded successfully: ${file.path}`) + + return batchId + } catch (error: any) { + Logger.error(`Failed to upload file ${file.path}: ${error.message}`) + throw new Error(error.message) + } + } + + private async getBatchUploadUrls(file: FileMetadata): Promise<{ batchId: string; fileUrls: string[] }> { + const endpoint = `${this.provider.apiHost}/api/v4/file-urls/batch` + + const payload = { + language: 'auto', + enable_formula: true, + enable_table: true, + files: [ + { + name: file.origin_name, + is_ocr: true, + data_id: file.id + } + ] + } + + try { + const response = await fetch(endpoint, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${this.provider.apiKey}`, + token: this.userId ?? '', + Accept: '*/*' + }, + body: JSON.stringify(payload) + }) + + if (response.ok) { + const data: ApiResponse = await response.json() + if (data.code === 0 && data.data) { + const { batch_id, file_urls } = data.data + return { + batchId: batch_id, + fileUrls: file_urls + } + } else { + throw new Error(`API returned error: ${data.msg || JSON.stringify(data)}`) + } + } else { + throw new Error(`HTTP ${response.status}: ${response.statusText}`) + } + } catch (error: any) { + Logger.error(`Failed to get batch upload URLs: ${error.message}`) + throw new Error(error.message) + } + } + + private async putFileToUrl(filePath: string, uploadUrl: string): Promise { + try { + const fileBuffer = await fs.promises.readFile(filePath) + + const response = await fetch(uploadUrl, { + method: 'PUT', + body: fileBuffer, + headers: { + 'Content-Type': 'application/pdf' + } + // headers: { + // 'Content-Length': fileBuffer.length.toString() + // } + }) + + if (!response.ok) { + // 克隆 response 以避免消费 body stream + const responseClone = response.clone() + + try { + const responseBody = await responseClone.text() + const errorInfo = { + status: response.status, + statusText: response.statusText, + url: response.url, + type: response.type, + redirected: response.redirected, + headers: Object.fromEntries(response.headers.entries()), + body: responseBody + } + + console.error('Response details:', errorInfo) + throw new Error(`Upload failed with status ${response.status}: ${responseBody}`) + } catch (parseError) { + throw new Error(`Upload failed with status ${response.status}. Could not parse response body.`) + } + } + + Logger.info(`File uploaded successfully to: ${uploadUrl}`) + } catch (error: any) { + Logger.error(`Failed to upload file to URL ${uploadUrl}: ${error}`) + throw new Error(error.message) + } + } + + private async getExtractResults(batchId: string): Promise { + const endpoint = `${this.provider.apiHost}/api/v4/extract-results/batch/${batchId}` + + try { + const response = await fetch(endpoint, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${this.provider.apiKey}`, + token: this.userId ?? '' + } + }) + + if (response.ok) { + const data: ApiResponse = await response.json() + if (data.code === 0 && data.data) { + return data.data + } else { + throw new Error(`API returned error: ${data.msg || JSON.stringify(data)}`) + } + } else { + throw new Error(`HTTP ${response.status}: ${response.statusText}`) + } + } catch (error: any) { + Logger.error(`Failed to get extract results for batch ${batchId}: ${error.message}`) + throw new Error(error.message) + } + } + + private async waitForCompletion( + sourceId: string, + batchId: string, + fileName: string, + maxRetries: number = 60, + intervalMs: number = 5000 + ): Promise { + let retries = 0 + + while (retries < maxRetries) { + try { + const result = await this.getExtractResults(batchId) + + // 查找对应文件的处理结果 + const fileResult = result.extract_result.find((item) => item.file_name === fileName) + if (!fileResult) { + throw new Error(`File ${fileName} not found in batch results`) + } + + // 检查处理状态 + if (fileResult.state === 'done' && fileResult.full_zip_url) { + Logger.info(`Processing completed for file: ${fileName}`) + return fileResult + } else if (fileResult.state === 'failed') { + throw new Error(`Processing failed for file: ${fileName}, error: ${fileResult.err_msg}`) + } else if (fileResult.state === 'running') { + // 发送进度更新 + if (fileResult.extract_progress) { + const progress = Math.round( + (fileResult.extract_progress.extracted_pages / fileResult.extract_progress.total_pages) * 100 + ) + await this.sendPreprocessProgress(sourceId, progress) + Logger.info(`File ${fileName} processing progress: ${progress}%`) + } else { + // 如果没有具体进度信息,发送一个通用进度 + await this.sendPreprocessProgress(sourceId, 50) + Logger.info(`File ${fileName} is still processing...`) + } + } + } catch (error) { + Logger.warn(`Failed to check status for batch ${batchId}, retry ${retries + 1}/${maxRetries}`) + if (retries === maxRetries - 1) { + throw error + } + } + + retries++ + await new Promise((resolve) => setTimeout(resolve, intervalMs)) + } + + throw new Error(`Processing timeout for batch: ${batchId}`) + } +} diff --git a/src/main/preprocess/MistralPreprocessProvider.ts b/src/main/preprocess/MistralPreprocessProvider.ts new file mode 100644 index 0000000000..3150162801 --- /dev/null +++ b/src/main/preprocess/MistralPreprocessProvider.ts @@ -0,0 +1,187 @@ +import fs from 'node:fs' + +import { MistralClientManager } from '@main/services/MistralClientManager' +import { MistralService } from '@main/services/remotefile/MistralService' +import { Mistral } from '@mistralai/mistralai' +import { DocumentURLChunk } from '@mistralai/mistralai/models/components/documenturlchunk' +import { ImageURLChunk } from '@mistralai/mistralai/models/components/imageurlchunk' +import { OCRResponse } from '@mistralai/mistralai/models/components/ocrresponse' +import { FileMetadata, FileTypes, PreprocessProvider, Provider } from '@types' +import Logger from 'electron-log' +import path from 'path' + +import BasePreprocessProvider from './BasePreprocessProvider' + +type PreuploadResponse = DocumentURLChunk | ImageURLChunk + +export default class MistralPreprocessProvider extends BasePreprocessProvider { + private sdk: Mistral + private fileService: MistralService + + constructor(provider: PreprocessProvider) { + super(provider) + const clientManager = MistralClientManager.getInstance() + const aiProvider: Provider = { + id: provider.id, + type: 'mistral', + name: provider.name, + apiKey: provider.apiKey!, + apiHost: provider.apiHost!, + models: [] + } + clientManager.initializeClient(aiProvider) + this.sdk = clientManager.getClient() + this.fileService = new MistralService(aiProvider) + } + + private async preupload(file: FileMetadata): Promise { + let document: PreuploadResponse + Logger.info(`preprocess preupload started for local file: ${file.path}`) + + if (file.ext.toLowerCase() === '.pdf') { + const uploadResponse = await this.fileService.uploadFile(file) + + if (uploadResponse.status === 'failed') { + Logger.error('File upload failed:', uploadResponse) + throw new Error('Failed to upload file: ' + uploadResponse.displayName) + } + await this.sendPreprocessProgress(file.id, 15) + const fileUrl = await this.sdk.files.getSignedUrl({ + fileId: uploadResponse.fileId + }) + Logger.info('Got signed URL:', fileUrl) + await this.sendPreprocessProgress(file.id, 20) + document = { + type: 'document_url', + documentUrl: fileUrl.url + } + } else { + const base64Image = Buffer.from(fs.readFileSync(file.path)).toString('base64') + document = { + type: 'image_url', + imageUrl: `data:image/png;base64,${base64Image}` + } + } + + if (!document) { + throw new Error('Unsupported file type') + } + return document + } + + public async parseFile(sourceId: string, file: FileMetadata): Promise<{ processedFile: FileMetadata }> { + try { + const document = await this.preupload(file) + const result = await this.sdk.ocr.process({ + model: this.provider.model!, + document: document, + includeImageBase64: true + }) + if (result) { + await this.sendPreprocessProgress(sourceId, 100) + const processedFile = this.convertFile(result, file) + return { + processedFile + } + } else { + throw new Error('preprocess processing failed: OCR response is empty') + } + } catch (error) { + throw new Error('preprocess processing failed: ' + error) + } + } + + private convertFile(result: OCRResponse, file: FileMetadata): FileMetadata { + // 使用统一的存储路径:Data/Files/{file.id}/ + const conversionId = file.id + const outputPath = path.join(this.storageDir, file.id) + // const outputPath = this.storageDir + const outputFileName = path.basename(file.path, path.extname(file.path)) + fs.mkdirSync(outputPath, { recursive: true }) + + const markdownParts: string[] = [] + let counter = 0 + + // Process each page + result.pages.forEach((page) => { + let pageMarkdown = page.markdown + + // Process images from this page + page.images.forEach((image) => { + if (image.imageBase64) { + let imageFormat = 'jpeg' // default format + let imageBase64Data = image.imageBase64 + + // Check for data URL prefix more efficiently + const prefixEnd = image.imageBase64.indexOf(';base64,') + if (prefixEnd > 0) { + const prefix = image.imageBase64.substring(0, prefixEnd) + const formatIndex = prefix.indexOf('image/') + if (formatIndex >= 0) { + imageFormat = prefix.substring(formatIndex + 6) + } + imageBase64Data = image.imageBase64.substring(prefixEnd + 8) + } + + const imageFileName = `img-${counter}.${imageFormat}` + const imagePath = path.join(outputPath, imageFileName) + + // Save image file + try { + fs.writeFileSync(imagePath, Buffer.from(imageBase64Data, 'base64')) + + // Update image reference in markdown + // Use relative path for better portability + const relativeImagePath = `./${imageFileName}` + + // Find the start and end of the image markdown + const imgStart = pageMarkdown.indexOf(image.imageBase64) + if (imgStart >= 0) { + // Find the markdown image syntax around this base64 + const mdStart = pageMarkdown.lastIndexOf('![', imgStart) + const mdEnd = pageMarkdown.indexOf(')', imgStart) + + if (mdStart >= 0 && mdEnd >= 0) { + // Replace just this specific image reference + pageMarkdown = + pageMarkdown.substring(0, mdStart) + + `![Image ${counter}](${relativeImagePath})` + + pageMarkdown.substring(mdEnd + 1) + } + } + + counter++ + } catch (error) { + Logger.error(`Failed to save image ${imageFileName}:`, error) + } + } + }) + + markdownParts.push(pageMarkdown) + }) + + // Combine all markdown content with double newlines for readability + const combinedMarkdown = markdownParts.join('\n\n') + + // Write the markdown content to a file + const mdFileName = `${outputFileName}.md` + const mdFilePath = path.join(outputPath, mdFileName) + fs.writeFileSync(mdFilePath, combinedMarkdown) + + return { + id: conversionId, + name: file.name.replace(/\.[^/.]+$/, '.md'), + origin_name: file.origin_name, + path: mdFilePath, + created_at: new Date().toISOString(), + type: FileTypes.DOCUMENT, + ext: '.md', + size: fs.statSync(mdFilePath).size, + count: 1 + } as FileMetadata + } + + public checkQuota(): Promise { + throw new Error('Method not implemented.') + } +} diff --git a/src/main/preprocess/PreprocessProvider.ts b/src/main/preprocess/PreprocessProvider.ts new file mode 100644 index 0000000000..44a34f64ae --- /dev/null +++ b/src/main/preprocess/PreprocessProvider.ts @@ -0,0 +1,30 @@ +import { FileMetadata, PreprocessProvider as Provider } from '@types' + +import BasePreprocessProvider from './BasePreprocessProvider' +import PreprocessProviderFactory from './PreprocessProviderFactory' + +export default class PreprocessProvider { + private sdk: BasePreprocessProvider + constructor(provider: Provider, userId?: string) { + this.sdk = PreprocessProviderFactory.create(provider, userId) + } + public async parseFile( + sourceId: string, + file: FileMetadata + ): Promise<{ processedFile: FileMetadata; quota?: number }> { + return this.sdk.parseFile(sourceId, file) + } + + public async checkQuota(): Promise { + return this.sdk.checkQuota() + } + + /** + * 检查文件是否已经被预处理过 + * @param file 文件信息 + * @returns 如果已处理返回处理后的文件信息,否则返回null + */ + public async checkIfAlreadyProcessed(file: FileMetadata): Promise { + return this.sdk.checkIfAlreadyProcessed(file) + } +} diff --git a/src/main/preprocess/PreprocessProviderFactory.ts b/src/main/preprocess/PreprocessProviderFactory.ts new file mode 100644 index 0000000000..bebecd388f --- /dev/null +++ b/src/main/preprocess/PreprocessProviderFactory.ts @@ -0,0 +1,21 @@ +import { PreprocessProvider } from '@types' + +import BasePreprocessProvider from './BasePreprocessProvider' +import DefaultPreprocessProvider from './DefaultPreprocessProvider' +import Doc2xPreprocessProvider from './Doc2xPreprocessProvider' +import MineruPreprocessProvider from './MineruPreprocessProvider' +import MistralPreprocessProvider from './MistralPreprocessProvider' +export default class PreprocessProviderFactory { + static create(provider: PreprocessProvider, userId?: string): BasePreprocessProvider { + switch (provider.id) { + case 'doc2x': + return new Doc2xPreprocessProvider(provider) + case 'mistral': + return new MistralPreprocessProvider(provider) + case 'mineru': + return new MineruPreprocessProvider(provider, userId) + default: + return new DefaultPreprocessProvider(provider) + } + } +} diff --git a/src/main/services/FileStorage.ts b/src/main/services/FileStorage.ts index 0c81a454a7..0bdcdf56f5 100644 --- a/src/main/services/FileStorage.ts +++ b/src/main/services/FileStorage.ts @@ -1,6 +1,6 @@ import { getFilesDir, getFileType, getTempDir } from '@main/utils/file' import { documentExts, imageExts, MB } from '@shared/config/constant' -import { FileType } from '@types' +import { FileMetadata } from '@types' import * as crypto from 'crypto' import { dialog, @@ -53,8 +53,9 @@ class FileStorage { }) } - findDuplicateFile = async (filePath: string): Promise => { + findDuplicateFile = async (filePath: string): Promise => { const stats = fs.statSync(filePath) + console.log('stats', stats, filePath) const fileSize = stats.size const files = await fs.promises.readdir(this.storageDir) @@ -92,7 +93,7 @@ class FileStorage { public selectFile = async ( _: Electron.IpcMainInvokeEvent, options?: OpenDialogOptions - ): Promise => { + ): Promise => { const defaultOptions: OpenDialogOptions = { properties: ['openFile'] } @@ -151,7 +152,7 @@ class FileStorage { } } - public uploadFile = async (_: Electron.IpcMainInvokeEvent, file: FileType): Promise => { + public uploadFile = async (_: Electron.IpcMainInvokeEvent, file: FileMetadata): Promise => { const duplicateFile = await this.findDuplicateFile(file.path) if (duplicateFile) { @@ -175,7 +176,7 @@ class FileStorage { const stats = await fs.promises.stat(destPath) const fileType = getFileType(ext) - const fileMetadata: FileType = { + const fileMetadata: FileMetadata = { id: uuid, origin_name, name: uuid + ext, @@ -190,7 +191,7 @@ class FileStorage { return fileMetadata } - public getFile = async (_: Electron.IpcMainInvokeEvent, filePath: string): Promise => { + public getFile = async (_: Electron.IpcMainInvokeEvent, filePath: string): Promise => { if (!fs.existsSync(filePath)) { return null } @@ -199,7 +200,7 @@ class FileStorage { const ext = path.extname(filePath) const fileType = getFileType(ext) - const fileInfo: FileType = { + const fileInfo: FileMetadata = { id: uuidv4(), origin_name: path.basename(filePath), name: path.basename(filePath), @@ -215,9 +216,19 @@ class FileStorage { } public deleteFile = async (_: Electron.IpcMainInvokeEvent, id: string): Promise => { + if (!fs.existsSync(path.join(this.storageDir, id))) { + return + } await fs.promises.unlink(path.join(this.storageDir, id)) } + public deleteDir = async (_: Electron.IpcMainInvokeEvent, id: string): Promise => { + if (!fs.existsSync(path.join(this.storageDir, id))) { + return + } + await fs.promises.rm(path.join(this.storageDir, id), { recursive: true }) + } + public readFile = async (_: Electron.IpcMainInvokeEvent, id: string): Promise => { const filePath = path.join(this.storageDir, id) @@ -252,8 +263,8 @@ class FileStorage { if (!fs.existsSync(this.tempDir)) { fs.mkdirSync(this.tempDir, { recursive: true }) } - const tempFilePath = path.join(this.tempDir, `temp_file_${uuidv4()}_${fileName}`) - return tempFilePath + + return path.join(this.tempDir, `temp_file_${uuidv4()}_${fileName}`) } public writeFile = async ( @@ -280,7 +291,7 @@ class FileStorage { } } - public saveBase64Image = async (_: Electron.IpcMainInvokeEvent, base64Data: string): Promise => { + public saveBase64Image = async (_: Electron.IpcMainInvokeEvent, base64Data: string): Promise => { try { if (!base64Data) { throw new Error('Base64 data is required') @@ -306,7 +317,7 @@ class FileStorage { await fs.promises.writeFile(destPath, buffer) - const fileMetadata: FileType = { + const fileMetadata: FileMetadata = { id: uuid, origin_name: uuid + ext, name: uuid + ext, @@ -465,7 +476,7 @@ class FileStorage { _: Electron.IpcMainInvokeEvent, url: string, isUseContentType?: boolean - ): Promise => { + ): Promise => { try { const response = await fetch(url) if (!response.ok) { @@ -507,7 +518,7 @@ class FileStorage { const stats = await fs.promises.stat(destPath) const fileType = getFileType(ext) - const fileMetadata: FileType = { + const fileMetadata: FileMetadata = { id: uuid, origin_name: filename, name: uuid + ext, diff --git a/src/main/services/FileService.ts b/src/main/services/FileSystemService.ts similarity index 100% rename from src/main/services/FileService.ts rename to src/main/services/FileSystemService.ts diff --git a/src/main/services/KnowledgeService.ts b/src/main/services/KnowledgeService.ts index 686e643711..c57c0eb104 100644 --- a/src/main/services/KnowledgeService.ts +++ b/src/main/services/KnowledgeService.ts @@ -25,13 +25,15 @@ import Embeddings from '@main/knowledage/embeddings/Embeddings' import { addFileLoader } from '@main/knowledage/loader' import { NoteLoader } from '@main/knowledage/loader/noteLoader' import Reranker from '@main/knowledage/reranker/Reranker' +import OcrProvider from '@main/ocr/OcrProvider' +import PreprocessProvider from '@main/preprocess/PreprocessProvider' import { windowService } from '@main/services/WindowService' import { getDataPath } from '@main/utils' import { getAllFiles } from '@main/utils/file' import { MB } from '@shared/config/constant' import type { LoaderReturn } from '@shared/config/types' import { IpcChannel } from '@shared/IpcChannel' -import { FileType, KnowledgeBaseParams, KnowledgeItem } from '@types' +import { FileMetadata, KnowledgeBaseParams, KnowledgeItem } from '@types' import Logger from 'electron-log' import { v4 as uuidv4 } from 'uuid' @@ -39,12 +41,14 @@ export interface KnowledgeBaseAddItemOptions { base: KnowledgeBaseParams item: KnowledgeItem forceReload?: boolean + userId?: string } interface KnowledgeBaseAddItemOptionsNonNullableAttribute { base: KnowledgeBaseParams item: KnowledgeItem forceReload: boolean + userId: string } interface EvaluateTaskWorkload { @@ -96,7 +100,13 @@ class KnowledgeService { private knowledgeItemProcessingQueueMappingPromise: Map void> = new Map() private static MAXIMUM_WORKLOAD = 80 * MB private static MAXIMUM_PROCESSING_ITEM_COUNT = 30 - private static ERROR_LOADER_RETURN: LoaderReturn = { entriesAdded: 0, uniqueId: '', uniqueIds: [''], loaderType: '' } + private static ERROR_LOADER_RETURN: LoaderReturn = { + entriesAdded: 0, + uniqueId: '', + uniqueIds: [''], + loaderType: '', + status: 'failed' + } constructor() { this.initStorageDir() @@ -150,6 +160,7 @@ class KnowledgeService { } public delete = async (_: Electron.IpcMainInvokeEvent, id: string): Promise => { + console.log('id', id) const dbPath = path.join(this.storageDir, id) if (fs.existsSync(dbPath)) { fs.rmSync(dbPath, { recursive: true }) @@ -162,28 +173,49 @@ class KnowledgeService { this.workload >= KnowledgeService.MAXIMUM_WORKLOAD ) } - private fileTask( ragApplication: RAGApplication, options: KnowledgeBaseAddItemOptionsNonNullableAttribute ): LoaderTask { - const { base, item, forceReload } = options - const file = item.content as FileType + const { base, item, forceReload, userId } = options + const file = item.content as FileMetadata const loaderTask: LoaderTask = { loaderTasks: [ { state: LoaderTaskItemState.PENDING, - task: () => - addFileLoader(ragApplication, file, base, forceReload) - .then((result) => { - loaderTask.loaderDoneReturn = result - return result - }) - .catch((err) => { - Logger.error(err) - return KnowledgeService.ERROR_LOADER_RETURN - }), + task: async () => { + try { + // 添加预处理逻辑 + const fileToProcess: FileMetadata = await this.preprocessing(file, base, item, userId) + + // 使用处理后的文件进行加载 + return addFileLoader(ragApplication, fileToProcess, base, forceReload) + .then((result) => { + loaderTask.loaderDoneReturn = result + return result + }) + .catch((e) => { + Logger.error(`Error in addFileLoader for ${file.name}: ${e}`) + const errorResult: LoaderReturn = { + ...KnowledgeService.ERROR_LOADER_RETURN, + message: e.message, + messageSource: 'embedding' + } + loaderTask.loaderDoneReturn = errorResult + return errorResult + }) + } catch (e: any) { + Logger.error(`Preprocessing failed for ${file.name}: ${e}`) + const errorResult: LoaderReturn = { + ...KnowledgeService.ERROR_LOADER_RETURN, + message: e.message, + messageSource: 'preprocess' + } + loaderTask.loaderDoneReturn = errorResult + return errorResult + } + }, evaluateTaskWorkload: { workload: file.size } } ], @@ -192,7 +224,6 @@ class KnowledgeService { return loaderTask } - private directoryTask( ragApplication: RAGApplication, options: KnowledgeBaseAddItemOptionsNonNullableAttribute @@ -232,7 +263,11 @@ class KnowledgeService { }) .catch((err) => { Logger.error(err) - return KnowledgeService.ERROR_LOADER_RETURN + return { + ...KnowledgeService.ERROR_LOADER_RETURN, + message: `Failed to add dir loader: ${err.message}`, + messageSource: 'embedding' + } }), evaluateTaskWorkload: { workload: file.size } }) @@ -278,7 +313,11 @@ class KnowledgeService { }) .catch((err) => { Logger.error(err) - return KnowledgeService.ERROR_LOADER_RETURN + return { + ...KnowledgeService.ERROR_LOADER_RETURN, + message: `Failed to add url loader: ${err.message}`, + messageSource: 'embedding' + } }) }, evaluateTaskWorkload: { workload: 2 * MB } @@ -318,7 +357,11 @@ class KnowledgeService { }) .catch((err) => { Logger.error(err) - return KnowledgeService.ERROR_LOADER_RETURN + return { + ...KnowledgeService.ERROR_LOADER_RETURN, + message: `Failed to add sitemap loader: ${err.message}`, + messageSource: 'embedding' + } }), evaluateTaskWorkload: { workload: 20 * MB } } @@ -364,7 +407,11 @@ class KnowledgeService { }) .catch((err) => { Logger.error(err) - return KnowledgeService.ERROR_LOADER_RETURN + return { + ...KnowledgeService.ERROR_LOADER_RETURN, + message: `Failed to add note loader: ${err.message}`, + messageSource: 'embedding' + } }) }, evaluateTaskWorkload: { workload: contentBytes.length } @@ -430,10 +477,10 @@ class KnowledgeService { }) } - public add = (_: Electron.IpcMainInvokeEvent, options: KnowledgeBaseAddItemOptions): Promise => { + public add = async (_: Electron.IpcMainInvokeEvent, options: KnowledgeBaseAddItemOptions): Promise => { return new Promise((resolve) => { - const { base, item, forceReload = false } = options - const optionsNonNullableAttribute = { base, item, forceReload } + const { base, item, forceReload = false, userId = '' } = options + const optionsNonNullableAttribute = { base, item, forceReload, userId } this.getRagApplication(base) .then((ragApplication) => { const task = (() => { @@ -459,12 +506,20 @@ class KnowledgeService { }) this.processingQueueHandle() } else { - resolve(KnowledgeService.ERROR_LOADER_RETURN) + resolve({ + ...KnowledgeService.ERROR_LOADER_RETURN, + message: 'Unsupported item type', + messageSource: 'embedding' + }) } }) .catch((err) => { Logger.error(err) - resolve(KnowledgeService.ERROR_LOADER_RETURN) + resolve({ + ...KnowledgeService.ERROR_LOADER_RETURN, + message: `Failed to add item: ${err.message}`, + messageSource: 'embedding' + }) }) }) } @@ -497,6 +552,69 @@ class KnowledgeService { } return await new Reranker(base).rerank(search, results) } + + public getStorageDir = (): string => { + return this.storageDir + } + + private preprocessing = async ( + file: FileMetadata, + base: KnowledgeBaseParams, + item: KnowledgeItem, + userId: string + ): Promise => { + let fileToProcess: FileMetadata = file + if (base.preprocessOrOcrProvider && file.ext.toLowerCase() === '.pdf') { + try { + let provider: PreprocessProvider | OcrProvider + if (base.preprocessOrOcrProvider.type === 'preprocess') { + provider = new PreprocessProvider(base.preprocessOrOcrProvider.provider, userId) + } else { + provider = new OcrProvider(base.preprocessOrOcrProvider.provider) + } + // 首先检查文件是否已经被预处理过 + const alreadyProcessed = await provider.checkIfAlreadyProcessed(file) + if (alreadyProcessed) { + Logger.info(`File already preprocess processed, using cached result: ${file.path}`) + return alreadyProcessed + } + + // 执行预处理 + Logger.info(`Starting preprocess processing for scanned PDF: ${file.path}`) + const { processedFile, quota } = await provider.parseFile(item.id, file) + fileToProcess = processedFile + const mainWindow = windowService.getMainWindow() + mainWindow?.webContents.send('file-preprocess-finished', { + itemId: item.id, + quota: quota + }) + } catch (err) { + Logger.error(`Preprocess processing failed: ${err}`) + // 如果预处理失败,使用原始文件 + // fileToProcess = file + throw new Error(`Preprocess processing failed: ${err}`) + } + } + + return fileToProcess + } + + public checkQuota = async ( + _: Electron.IpcMainInvokeEvent, + base: KnowledgeBaseParams, + userId: string + ): Promise => { + try { + if (base.preprocessOrOcrProvider && base.preprocessOrOcrProvider.type === 'preprocess') { + const provider = new PreprocessProvider(base.preprocessOrOcrProvider.provider, userId) + return await provider.checkQuota() + } + throw new Error('No preprocess provider configured') + } catch (err) { + Logger.error(`Failed to check quota: ${err}`) + throw new Error(`Failed to check quota: ${err}`) + } + } } export default new KnowledgeService() diff --git a/src/main/services/MistralClientManager.ts b/src/main/services/MistralClientManager.ts new file mode 100644 index 0000000000..fa4aa53df8 --- /dev/null +++ b/src/main/services/MistralClientManager.ts @@ -0,0 +1,33 @@ +import { Mistral } from '@mistralai/mistralai' +import { Provider } from '@types' + +export class MistralClientManager { + private static instance: MistralClientManager + private client: Mistral | null = null + + // eslint-disable-next-line @typescript-eslint/no-empty-function + private constructor() {} + + public static getInstance(): MistralClientManager { + if (!MistralClientManager.instance) { + MistralClientManager.instance = new MistralClientManager() + } + return MistralClientManager.instance + } + + public initializeClient(provider: Provider): void { + if (!this.client) { + this.client = new Mistral({ + apiKey: provider.apiKey, + serverURL: provider.apiHost + }) + } + } + + public getClient(): Mistral { + if (!this.client) { + throw new Error('Mistral client not initialized. Call initializeClient first.') + } + return this.client + } +} diff --git a/src/main/services/remotefile/BaseFileService.ts b/src/main/services/remotefile/BaseFileService.ts new file mode 100644 index 0000000000..ff06eb0b44 --- /dev/null +++ b/src/main/services/remotefile/BaseFileService.ts @@ -0,0 +1,13 @@ +import { FileListResponse, FileMetadata, FileUploadResponse, Provider } from '@types' + +export abstract class BaseFileService { + protected readonly provider: Provider + protected constructor(provider: Provider) { + this.provider = provider + } + + abstract uploadFile(file: FileMetadata): Promise + abstract deleteFile(fileId: string): Promise + abstract listFiles(): Promise + abstract retrieveFile(fileId: string): Promise +} diff --git a/src/main/services/remotefile/FileServiceManager.ts b/src/main/services/remotefile/FileServiceManager.ts new file mode 100644 index 0000000000..9cdf6f834c --- /dev/null +++ b/src/main/services/remotefile/FileServiceManager.ts @@ -0,0 +1,41 @@ +import { Provider } from '@types' + +import { BaseFileService } from './BaseFileService' +import { GeminiService } from './GeminiService' +import { MistralService } from './MistralService' + +export class FileServiceManager { + private static instance: FileServiceManager + private services: Map = new Map() + + // eslint-disable-next-line @typescript-eslint/no-empty-function + private constructor() {} + + static getInstance(): FileServiceManager { + if (!this.instance) { + this.instance = new FileServiceManager() + } + return this.instance + } + + getService(provider: Provider): BaseFileService { + const type = provider.type + let service = this.services.get(type) + + if (!service) { + switch (type) { + case 'gemini': + service = new GeminiService(provider) + break + case 'mistral': + service = new MistralService(provider) + break + default: + throw new Error(`Unsupported service type: ${type}`) + } + this.services.set(type, service) + } + + return service + } +} diff --git a/src/main/services/remotefile/GeminiService.ts b/src/main/services/remotefile/GeminiService.ts new file mode 100644 index 0000000000..82178f5c14 --- /dev/null +++ b/src/main/services/remotefile/GeminiService.ts @@ -0,0 +1,190 @@ +import { File, Files, FileState, GoogleGenAI } from '@google/genai' +import { FileListResponse, FileMetadata, FileUploadResponse, Provider } from '@types' +import Logger from 'electron-log' +import { v4 as uuidv4 } from 'uuid' + +import { CacheService } from '../CacheService' +import { BaseFileService } from './BaseFileService' + +export class GeminiService extends BaseFileService { + private static readonly FILE_LIST_CACHE_KEY = 'gemini_file_list' + private static readonly FILE_CACHE_DURATION = 48 * 60 * 60 * 1000 + private static readonly LIST_CACHE_DURATION = 3000 + + protected readonly fileManager: Files + + constructor(provider: Provider) { + super(provider) + this.fileManager = new GoogleGenAI({ + vertexai: false, + apiKey: provider.apiKey, + httpOptions: { + baseUrl: provider.apiHost + } + }).files + } + + async uploadFile(file: FileMetadata): Promise { + try { + const uploadResult = await this.fileManager.upload({ + file: file.path, + config: { + mimeType: 'application/pdf', + name: file.id, + displayName: file.origin_name + } + }) + + // 根据文件状态设置响应状态 + let status: 'success' | 'processing' | 'failed' | 'unknown' + switch (uploadResult.state) { + case FileState.ACTIVE: + status = 'success' + break + case FileState.PROCESSING: + status = 'processing' + break + case FileState.FAILED: + status = 'failed' + break + default: + status = 'unknown' + } + + const response: FileUploadResponse = { + fileId: uploadResult.name || '', + displayName: file.origin_name, + status, + originalFile: { + type: 'gemini', + file: uploadResult + } + } + + // 只缓存成功的文件 + if (status === 'success') { + const cacheKey = `${GeminiService.FILE_LIST_CACHE_KEY}_${response.fileId}` + CacheService.set(cacheKey, response, GeminiService.FILE_CACHE_DURATION) + } + + return response + } catch (error) { + Logger.error('Error uploading file to Gemini:', error) + return { + fileId: '', + displayName: file.origin_name, + status: 'failed', + originalFile: undefined + } + } + } + + async retrieveFile(fileId: string): Promise { + try { + const cachedResponse = CacheService.get(`${GeminiService.FILE_LIST_CACHE_KEY}_${fileId}`) + Logger.info('[GeminiService] cachedResponse', cachedResponse) + if (cachedResponse) { + return cachedResponse + } + const files: File[] = [] + + for await (const f of await this.fileManager.list()) { + files.push(f) + } + Logger.info('[GeminiService] files', files) + const file = files + .filter((file) => file.state === FileState.ACTIVE) + .find((file) => file.name?.substring(6) === fileId) // 去掉 files/ 前缀 + Logger.info('[GeminiService] file', file) + if (file) { + return { + fileId: fileId, + displayName: file.displayName || '', + status: 'success', + originalFile: { + type: 'gemini', + file + } + } + } + + return { + fileId: fileId, + displayName: '', + status: 'failed', + originalFile: undefined + } + } catch (error) { + Logger.error('Error retrieving file from Gemini:', error) + return { + fileId: fileId, + displayName: '', + status: 'failed', + originalFile: undefined + } + } + } + + async listFiles(): Promise { + try { + const cachedList = CacheService.get(GeminiService.FILE_LIST_CACHE_KEY) + if (cachedList) { + return cachedList + } + const geminiFiles: File[] = [] + + for await (const f of await this.fileManager.list()) { + geminiFiles.push(f) + } + const fileList: FileListResponse = { + files: geminiFiles + .filter((file) => file.state === FileState.ACTIVE) + .map((file) => { + // 更新单个文件的缓存 + const fileResponse: FileUploadResponse = { + fileId: file.name || uuidv4(), + displayName: file.displayName || '', + status: 'success', + originalFile: { + type: 'gemini', + file + } + } + CacheService.set( + `${GeminiService.FILE_LIST_CACHE_KEY}_${file.name}`, + fileResponse, + GeminiService.FILE_CACHE_DURATION + ) + + return { + id: file.name || uuidv4(), + displayName: file.displayName || '', + size: Number(file.sizeBytes), + status: 'success', + originalFile: { + type: 'gemini', + file + } + } + }) + } + + // 更新文件列表缓存 + CacheService.set(GeminiService.FILE_LIST_CACHE_KEY, fileList, GeminiService.LIST_CACHE_DURATION) + return fileList + } catch (error) { + Logger.error('Error listing files from Gemini:', error) + return { files: [] } + } + } + + async deleteFile(fileId: string): Promise { + try { + await this.fileManager.delete({ name: fileId }) + Logger.info(`File ${fileId} deleted from Gemini`) + } catch (error) { + Logger.error('Error deleting file from Gemini:', error) + throw error + } + } +} diff --git a/src/main/services/remotefile/MistralService.ts b/src/main/services/remotefile/MistralService.ts new file mode 100644 index 0000000000..3964871ce4 --- /dev/null +++ b/src/main/services/remotefile/MistralService.ts @@ -0,0 +1,104 @@ +import fs from 'node:fs/promises' + +import { Mistral } from '@mistralai/mistralai' +import { FileListResponse, FileMetadata, FileUploadResponse, Provider } from '@types' +import Logger from 'electron-log' + +import { MistralClientManager } from '../MistralClientManager' +import { BaseFileService } from './BaseFileService' + +export class MistralService extends BaseFileService { + private readonly client: Mistral + + constructor(provider: Provider) { + super(provider) + const clientManager = MistralClientManager.getInstance() + clientManager.initializeClient(provider) + this.client = clientManager.getClient() + } + + async uploadFile(file: FileMetadata): Promise { + try { + const fileBuffer = await fs.readFile(file.path) + const response = await this.client.files.upload({ + file: { + fileName: file.origin_name, + content: new Uint8Array(fileBuffer) + }, + purpose: 'ocr' + }) + + return { + fileId: response.id, + displayName: file.origin_name, + status: 'success', + originalFile: { + type: 'mistral', + file: response + } + } + } catch (error) { + Logger.error('Error uploading file:', error) + return { + fileId: '', + displayName: file.origin_name, + status: 'failed' + } + } + } + + async listFiles(): Promise { + try { + const response = await this.client.files.list({}) + return { + files: response.data.map((file) => ({ + id: file.id, + displayName: file.filename || '', + size: file.sizeBytes, + status: 'success', // All listed files are processed, + originalFile: { + type: 'mistral', + file + } + })) + } + } catch (error) { + Logger.error('Error listing files:', error) + return { files: [] } + } + } + + async deleteFile(fileId: string): Promise { + try { + await this.client.files.delete({ + fileId + }) + Logger.info(`File ${fileId} deleted`) + } catch (error) { + Logger.error('Error deleting file:', error) + throw error + } + } + + async retrieveFile(fileId: string): Promise { + try { + const response = await this.client.files.retrieve({ + fileId + }) + + return { + fileId: response.id, + displayName: response.filename || '', + status: 'success' // Retrieved files are always processed + } + } catch (error) { + Logger.error('Error retrieving file:', error) + return { + fileId: fileId, + displayName: '', + status: 'failed', + originalFile: undefined + } + } + } +} diff --git a/src/main/utils/file.ts b/src/main/utils/file.ts index a85c5cf8ed..2c52e82a71 100644 --- a/src/main/utils/file.ts +++ b/src/main/utils/file.ts @@ -4,7 +4,7 @@ import path from 'node:path' import { isLinux, isPortable } from '@main/constant' import { audioExts, documentExts, imageExts, textExts, videoExts } from '@shared/config/constant' -import { FileType, FileTypes } from '@types' +import { FileMetadata, FileTypes } from '@types' import { app } from 'electron' import { v4 as uuidv4 } from 'uuid' @@ -130,7 +130,19 @@ export function getFileType(ext: string): FileTypes { return fileTypeMap.get(ext) || FileTypes.OTHER } -export function getAllFiles(dirPath: string, arrayOfFiles: FileType[] = []): FileType[] { +export function getFileDir(filePath: string) { + return path.dirname(filePath) +} + +export function getFileName(filePath: string) { + return path.basename(filePath) +} + +export function getFileExt(filePath: string) { + return path.extname(filePath) +} + +export function getAllFiles(dirPath: string, arrayOfFiles: FileMetadata[] = []): FileMetadata[] { const files = fs.readdirSync(dirPath) files.forEach((file) => { @@ -152,7 +164,7 @@ export function getAllFiles(dirPath: string, arrayOfFiles: FileType[] = []): Fil const name = path.basename(file) const size = fs.statSync(fullPath).size - const fileItem: FileType = { + const fileItem: FileMetadata = { id: uuidv4(), name, path: fullPath, diff --git a/src/main/utils/process.ts b/src/main/utils/process.ts index 36a0d731bb..b83f8a8b26 100644 --- a/src/main/utils/process.ts +++ b/src/main/utils/process.ts @@ -49,7 +49,7 @@ export async function getBinaryPath(name?: string): Promise { const binaryName = await getBinaryName(name) const binariesDir = path.join(os.homedir(), '.cherrystudio', 'bin') - const binariesDirExists = await fs.existsSync(binariesDir) + const binariesDirExists = fs.existsSync(binariesDir) return binariesDirExists ? path.join(binariesDir, binaryName) : binaryName } diff --git a/src/preload/index.ts b/src/preload/index.ts index beabfa1a27..3120492dde 100644 --- a/src/preload/index.ts +++ b/src/preload/index.ts @@ -2,7 +2,18 @@ import type { ExtractChunkData } from '@cherrystudio/embedjs-interfaces' import { electronAPI } from '@electron-toolkit/preload' import { UpgradeChannel } from '@shared/config/constant' import { IpcChannel } from '@shared/IpcChannel' -import { FileType, KnowledgeBaseParams, KnowledgeItem, MCPServer, Shortcut, ThemeMode, WebDavConfig } from '@types' +import { + FileListResponse, + FileMetadata, + FileUploadResponse, + KnowledgeBaseParams, + KnowledgeItem, + MCPServer, + Provider, + Shortcut, + ThemeMode, + WebDavConfig +} from '@types' import { contextBridge, ipcRenderer, OpenDialogOptions, shell, webUtils } from 'electron' import { Notification } from 'src/renderer/src/types/notification' import { CreateDirectoryOptions } from 'webdav' @@ -79,13 +90,25 @@ const api = { }, file: { select: (options?: OpenDialogOptions) => ipcRenderer.invoke(IpcChannel.File_Select, options), - upload: (file: FileType) => ipcRenderer.invoke(IpcChannel.File_Upload, file), + upload: (file: FileMetadata) => ipcRenderer.invoke(IpcChannel.File_Upload, file), delete: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_Delete, fileId), + deleteDir: (dirPath: string) => ipcRenderer.invoke(IpcChannel.File_DeleteDir, dirPath), read: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_Read, fileId), clear: () => ipcRenderer.invoke(IpcChannel.File_Clear), get: (filePath: string) => ipcRenderer.invoke(IpcChannel.File_Get, filePath), - create: (fileName: string) => ipcRenderer.invoke(IpcChannel.File_Create, fileName), + /** + * 创建一个空的临时文件 + * @param fileName 文件名 + * @returns 临时文件路径 + */ + createTempFile: (fileName: string): Promise => ipcRenderer.invoke(IpcChannel.File_CreateTempFile, fileName), + /** + * 写入文件 + * @param filePath 文件路径 + * @param data 数据 + */ write: (filePath: string, data: Uint8Array | string) => ipcRenderer.invoke(IpcChannel.File_Write, filePath, data), + writeWithId: (id: string, content: string) => ipcRenderer.invoke(IpcChannel.File_WriteWithId, id, content), open: (options?: OpenDialogOptions) => ipcRenderer.invoke(IpcChannel.File_Open, options), openPath: (path: string) => ipcRenderer.invoke(IpcChannel.File_OpenPath, path), @@ -93,12 +116,12 @@ const api = { ipcRenderer.invoke(IpcChannel.File_Save, path, content, options), selectFolder: () => ipcRenderer.invoke(IpcChannel.File_SelectFolder), saveImage: (name: string, data: string) => ipcRenderer.invoke(IpcChannel.File_SaveImage, name, data), + binaryImage: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_BinaryImage, fileId), base64Image: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_Base64Image, fileId), saveBase64Image: (data: string) => ipcRenderer.invoke(IpcChannel.File_SaveBase64Image, data), download: (url: string, isUseContentType?: boolean) => ipcRenderer.invoke(IpcChannel.File_Download, url, isUseContentType), copy: (fileId: string, destPath: string) => ipcRenderer.invoke(IpcChannel.File_Copy, fileId, destPath), - binaryImage: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_BinaryImage, fileId), base64File: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_Base64File, fileId), pdfInfo: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_GetPdfInfo, fileId), getPathForFile: (file: File) => webUtils.getPathForFile(file) @@ -120,31 +143,38 @@ const api = { add: ({ base, item, + userId, forceReload = false }: { base: KnowledgeBaseParams item: KnowledgeItem + userId?: string forceReload?: boolean - }) => ipcRenderer.invoke(IpcChannel.KnowledgeBase_Add, { base, item, forceReload }), + }) => ipcRenderer.invoke(IpcChannel.KnowledgeBase_Add, { base, item, forceReload, userId }), remove: ({ uniqueId, uniqueIds, base }: { uniqueId: string; uniqueIds: string[]; base: KnowledgeBaseParams }) => ipcRenderer.invoke(IpcChannel.KnowledgeBase_Remove, { uniqueId, uniqueIds, base }), search: ({ search, base }: { search: string; base: KnowledgeBaseParams }) => ipcRenderer.invoke(IpcChannel.KnowledgeBase_Search, { search, base }), rerank: ({ search, base, results }: { search: string; base: KnowledgeBaseParams; results: ExtractChunkData[] }) => - ipcRenderer.invoke(IpcChannel.KnowledgeBase_Rerank, { search, base, results }) + ipcRenderer.invoke(IpcChannel.KnowledgeBase_Rerank, { search, base, results }), + checkQuota: ({ base, userId }: { base: KnowledgeBaseParams; userId: string }) => + ipcRenderer.invoke(IpcChannel.KnowledgeBase_Check_Quota, base, userId) }, window: { setMinimumSize: (width: number, height: number) => ipcRenderer.invoke(IpcChannel.Windows_SetMinimumSize, width, height), resetMinimumSize: () => ipcRenderer.invoke(IpcChannel.Windows_ResetMinimumSize) }, - gemini: { - uploadFile: (file: FileType, { apiKey, baseURL }: { apiKey: string; baseURL: string }) => - ipcRenderer.invoke(IpcChannel.Gemini_UploadFile, file, { apiKey, baseURL }), - base64File: (file: FileType) => ipcRenderer.invoke(IpcChannel.Gemini_Base64File, file), - retrieveFile: (file: FileType, apiKey: string) => ipcRenderer.invoke(IpcChannel.Gemini_RetrieveFile, file, apiKey), - listFiles: (apiKey: string) => ipcRenderer.invoke(IpcChannel.Gemini_ListFiles, apiKey), - deleteFile: (fileId: string, apiKey: string) => ipcRenderer.invoke(IpcChannel.Gemini_DeleteFile, fileId, apiKey) + fileService: { + upload: (provider: Provider, file: FileMetadata): Promise => + ipcRenderer.invoke(IpcChannel.FileService_Upload, provider, file), + list: (provider: Provider): Promise => ipcRenderer.invoke(IpcChannel.FileService_List, provider), + delete: (provider: Provider, fileId: string) => ipcRenderer.invoke(IpcChannel.FileService_Delete, provider, fileId), + retrieve: (provider: Provider, fileId: string): Promise => + ipcRenderer.invoke(IpcChannel.FileService_Retrieve, provider, fileId) + }, + selectionMenu: { + action: (action: string) => ipcRenderer.invoke('selection-menu:action', action) }, vertexAI: { diff --git a/src/renderer/src/aiCore/clients/gemini/GeminiAPIClient.ts b/src/renderer/src/aiCore/clients/gemini/GeminiAPIClient.ts index bfd2aff3f2..0f1bad0bf8 100644 --- a/src/renderer/src/aiCore/clients/gemini/GeminiAPIClient.ts +++ b/src/renderer/src/aiCore/clients/gemini/GeminiAPIClient.ts @@ -1,7 +1,7 @@ import { Content, + createPartFromUri, File, - FileState, FunctionCall, GenerateContentConfig, GenerateImagesConfig, @@ -10,7 +10,6 @@ import { HarmCategory, Modality, Model as GeminiModel, - Pager, Part, SafetySetting, SendMessageParameters, @@ -26,13 +25,13 @@ import { isSupportedThinkingTokenGeminiModel, isVisionModel } from '@renderer/config/models' -import { CacheService } from '@renderer/services/CacheService' import { estimateTextTokens } from '@renderer/services/TokenService' import { Assistant, EFFORT_RATIO, - FileType, + FileMetadata, FileTypes, + FileUploadResponse, GenerateImageParams, MCPCallToolResponse, MCPTool, @@ -198,7 +197,7 @@ export class GeminiAPIClient extends BaseApiClient< * @param file - The file * @returns The part */ - private async handlePdfFile(file: FileType): Promise { + private async handlePdfFile(file: FileMetadata): Promise { const smallFileSize = 20 * MB const isSmallFile = file.size < smallFileSize @@ -213,26 +212,17 @@ export class GeminiAPIClient extends BaseApiClient< } // Retrieve file from Gemini uploaded files - const fileMetadata: File | undefined = await this.retrieveFile(file) + const fileMetadata: FileUploadResponse = await window.api.fileService.retrieve(this.provider, file.id) - if (fileMetadata) { - return { - fileData: { - fileUri: fileMetadata.uri, - mimeType: fileMetadata.mimeType - } as Part['fileData'] - } + if (fileMetadata.status === 'success') { + const remoteFile = fileMetadata.originalFile?.file as File + return createPartFromUri(remoteFile.uri!, remoteFile.mimeType!) } // If file is not found, upload it to Gemini - const result = await this.uploadFile(file) - - return { - fileData: { - fileUri: result.uri, - mimeType: result.mimeType - } as Part['fileData'] - } + const result = await window.api.fileService.upload(this.provider, file) + const remoteFile = result.originalFile?.file as File + return createPartFromUri(remoteFile.uri!, remoteFile.mimeType!) } /** @@ -767,61 +757,11 @@ export class GeminiAPIClient extends BaseApiClient< return [...(sdkPayload.history || []), messageParam] } - private async uploadFile(file: FileType): Promise { - return await this.sdkInstance!.files.upload({ - file: file.path, - config: { - mimeType: 'application/pdf', - name: file.id, - displayName: file.origin_name - } - }) - } - - private async base64File(file: FileType) { + private async base64File(file: FileMetadata) { const { data } = await window.api.file.base64File(file.id + file.ext) return { data, mimeType: 'application/pdf' } } - - private async retrieveFile(file: FileType): Promise { - const cachedResponse = CacheService.get('gemini_file_list') - - if (cachedResponse) { - return this.processResponse(cachedResponse, file) - } - - const response = await this.sdkInstance!.files.list() - CacheService.set('gemini_file_list', response, 3000) - - return this.processResponse(response, file) - } - - private async processResponse(response: Pager, file: FileType) { - for await (const f of response) { - if (f.state === FileState.ACTIVE) { - if (f.displayName === file.origin_name && Number(f.sizeBytes) === file.size) { - return f - } - } - } - - return undefined - } - - // @ts-ignore unused - private async listFiles(): Promise { - const files: File[] = [] - for await (const f of await this.sdkInstance!.files.list()) { - files.push(f) - } - return files - } - - // @ts-ignore unused - private async deleteFile(fileId: string) { - await this.sdkInstance!.files.delete({ name: fileId }) - } } diff --git a/src/renderer/src/aiCore/clients/openai/OpenAIResponseAPIClient.ts b/src/renderer/src/aiCore/clients/openai/OpenAIResponseAPIClient.ts index 0cf64fb22a..99e40ed818 100644 --- a/src/renderer/src/aiCore/clients/openai/OpenAIResponseAPIClient.ts +++ b/src/renderer/src/aiCore/clients/openai/OpenAIResponseAPIClient.ts @@ -7,7 +7,7 @@ import { } from '@renderer/config/models' import { estimateTextTokens } from '@renderer/services/TokenService' import { - FileType, + FileMetadata, FileTypes, MCPCallToolResponse, MCPTool, @@ -95,7 +95,7 @@ export class OpenAIResponseAPIClient extends OpenAIBaseClient< return await sdk.responses.create(payload, options) } - private async handlePdfFile(file: FileType): Promise { + private async handlePdfFile(file: FileMetadata): Promise { if (file.size > 32 * MB) return undefined try { const pageCount = await window.api.file.pdfInfo(file.id + file.ext) diff --git a/src/renderer/src/assets/fonts/icon-fonts/iconfont.css b/src/renderer/src/assets/fonts/icon-fonts/iconfont.css index 71573edbf2..ae76c0026c 100644 --- a/src/renderer/src/assets/fonts/icon-fonts/iconfont.css +++ b/src/renderer/src/assets/fonts/icon-fonts/iconfont.css @@ -1,6 +1,6 @@ @font-face { font-family: 'iconfont'; /* Project id 4753420 */ - src: url('iconfont.woff2?t=1742184675192') format('woff2'); + src: url('iconfont.woff2?t=1742793497518') format('woff2'); } .iconfont { @@ -11,6 +11,18 @@ -moz-osx-font-smoothing: grayscale; } +.icon-plugin:before { + content: '\e612'; +} + +.icon-tools:before { + content: '\e762'; +} + +.icon-OCRshibie:before { + content: '\e658'; +} + .icon-obsidian:before { content: '\e677'; } diff --git a/src/renderer/src/assets/fonts/icon-fonts/iconfont.woff2 b/src/renderer/src/assets/fonts/icon-fonts/iconfont.woff2 index 9c2ec4a51d..9581311b4c 100644 Binary files a/src/renderer/src/assets/fonts/icon-fonts/iconfont.woff2 and b/src/renderer/src/assets/fonts/icon-fonts/iconfont.woff2 differ diff --git a/src/renderer/src/assets/images/ocr/doc2x.png b/src/renderer/src/assets/images/ocr/doc2x.png new file mode 100644 index 0000000000..4b0d0efa36 Binary files /dev/null and b/src/renderer/src/assets/images/ocr/doc2x.png differ diff --git a/src/renderer/src/assets/images/ocr/mineru.jpg b/src/renderer/src/assets/images/ocr/mineru.jpg new file mode 100644 index 0000000000..c4295d1f65 Binary files /dev/null and b/src/renderer/src/assets/images/ocr/mineru.jpg differ diff --git a/src/renderer/src/assets/images/providers/macos.svg b/src/renderer/src/assets/images/providers/macos.svg new file mode 100644 index 0000000000..3385e73504 --- /dev/null +++ b/src/renderer/src/assets/images/providers/macos.svg @@ -0,0 +1,7 @@ + + \ No newline at end of file diff --git a/src/renderer/src/components/CodeBlockView/HtmlArtifacts.tsx b/src/renderer/src/components/CodeBlockView/HtmlArtifacts.tsx index 0dbb0aabb2..87dc172bd6 100644 --- a/src/renderer/src/components/CodeBlockView/HtmlArtifacts.tsx +++ b/src/renderer/src/components/CodeBlockView/HtmlArtifacts.tsx @@ -19,7 +19,7 @@ const Artifacts: FC = ({ html }) => { * 在应用内打开 */ const handleOpenInApp = async () => { - const path = await window.api.file.create('artifacts-preview.html') + const path = await window.api.file.createTempFile('artifacts-preview.html') await window.api.file.write(path, html) const filePath = `file://${path}` const title = extractTitle(html) || 'Artifacts ' + t('chat.artifacts.button.preview') @@ -35,7 +35,7 @@ const Artifacts: FC = ({ html }) => { * 外部链接打开 */ const handleOpenExternal = async () => { - const path = await window.api.file.create('artifacts-preview.html') + const path = await window.api.file.createTempFile('artifacts-preview.html') await window.api.file.write(path, html) const filePath = `file://${path}` diff --git a/src/renderer/src/components/Icons/OcrIcon.tsx b/src/renderer/src/components/Icons/OcrIcon.tsx new file mode 100644 index 0000000000..41367445a7 --- /dev/null +++ b/src/renderer/src/components/Icons/OcrIcon.tsx @@ -0,0 +1,7 @@ +import { FC } from 'react' + +const OcrIcon: FC, HTMLElement>> = (props) => { + return +} + +export default OcrIcon diff --git a/src/renderer/src/components/Icons/ToolIcon.tsx b/src/renderer/src/components/Icons/ToolIcon.tsx new file mode 100644 index 0000000000..69f8da260c --- /dev/null +++ b/src/renderer/src/components/Icons/ToolIcon.tsx @@ -0,0 +1,7 @@ +import { FC } from 'react' + +const ToolIcon: FC, HTMLElement>> = (props) => { + return +} + +export default ToolIcon diff --git a/src/renderer/src/config/ocrProviders.ts b/src/renderer/src/config/ocrProviders.ts new file mode 100644 index 0000000000..5e482e10ef --- /dev/null +++ b/src/renderer/src/config/ocrProviders.ts @@ -0,0 +1,12 @@ +import MacOSLogo from '@renderer/assets/images/providers/macos.svg' + +export function getOcrProviderLogo(providerId: string) { + switch (providerId) { + case 'system': + return MacOSLogo + default: + return undefined + } +} + +export const OCR_PROVIDER_CONFIG = {} diff --git a/src/renderer/src/config/preprocessProviders.ts b/src/renderer/src/config/preprocessProviders.ts new file mode 100644 index 0000000000..587e6ea7f9 --- /dev/null +++ b/src/renderer/src/config/preprocessProviders.ts @@ -0,0 +1,37 @@ +import Doc2xLogo from '@renderer/assets/images/ocr/doc2x.png' +import MinerULogo from '@renderer/assets/images/ocr/mineru.jpg' +import MistralLogo from '@renderer/assets/images/providers/mistral.png' + +export function getPreprocessProviderLogo(providerId: string) { + switch (providerId) { + case 'doc2x': + return Doc2xLogo + case 'mistral': + return MistralLogo + case 'mineru': + return MinerULogo + default: + return undefined + } +} + +export const PREPROCESS_PROVIDER_CONFIG = { + doc2x: { + websites: { + official: 'https://doc2x.noedgeai.com', + apiKey: 'https://open.noedgeai.com/apiKeys' + } + }, + mistral: { + websites: { + official: 'https://mistral.ai', + apiKey: 'https://mistral.ai/api-keys' + } + }, + mineru: { + websites: { + official: 'https://mineru.net/', + apiKey: 'https://mineru.net/apiManage' + } + } +} diff --git a/src/renderer/src/databases/index.ts b/src/renderer/src/databases/index.ts index b75c3497a9..aa765db05b 100644 --- a/src/renderer/src/databases/index.ts +++ b/src/renderer/src/databases/index.ts @@ -1,4 +1,4 @@ -import { FileType, KnowledgeItem, QuickPhrase, TranslateHistory } from '@renderer/types' +import { FileMetadata, KnowledgeItem, QuickPhrase, TranslateHistory } from '@renderer/types' // Import necessary types for blocks and new message structure import type { Message as NewMessage, MessageBlock } from '@renderer/types/newMessage' import { Dexie, type EntityTable } from 'dexie' @@ -7,7 +7,7 @@ import { upgradeToV5, upgradeToV7 } from './upgrades' // Database declaration (move this to its own module also) export const db = new Dexie('CherryStudio') as Dexie & { - files: EntityTable + files: EntityTable topics: EntityTable<{ id: string; messages: NewMessage[] }, 'id'> // Correct type for topics settings: EntityTable<{ id: string; value: any }, 'id'> knowledge_notes: EntityTable diff --git a/src/renderer/src/hooks/useKnowledge.ts b/src/renderer/src/hooks/useKnowledge.ts index efdc9bd120..9d893da5b4 100644 --- a/src/renderer/src/hooks/useKnowledge.ts +++ b/src/renderer/src/hooks/useKnowledge.ts @@ -1,7 +1,5 @@ -/* eslint-disable react-hooks/rules-of-hooks */ import { db } from '@renderer/databases' import KnowledgeQueue from '@renderer/queue/KnowledgeQueue' -import FileManager from '@renderer/services/FileManager' import { getKnowledgeBaseParams } from '@renderer/services/KnowledgeService' import { RootState } from '@renderer/store' import { @@ -19,10 +17,9 @@ import { updateItemProcessingStatus, updateNotes } from '@renderer/store/knowledge' -import { FileType, KnowledgeBase, KnowledgeItem, ProcessingStatus } from '@renderer/types' +import { FileMetadata, KnowledgeBase, KnowledgeItem, ProcessingStatus } from '@renderer/types' import { runAsyncFunction } from '@renderer/utils' -import { IpcChannel } from '@shared/IpcChannel' -import { useEffect, useState } from 'react' +import { useCallback, useEffect, useState } from 'react' import { useDispatch, useSelector } from 'react-redux' import { v4 as uuidv4 } from 'uuid' @@ -44,7 +41,7 @@ export const useKnowledge = (baseId: string) => { } // 批量添加文件 - const addFiles = (files: FileType[]) => { + const addFiles = (files: FileMetadata[]) => { const filesItems: KnowledgeItem[] = files.map((file) => ({ id: uuidv4(), type: 'file' as const, @@ -56,6 +53,7 @@ export const useKnowledge = (baseId: string) => { processingError: '', retryCount: 0 })) + console.log('Adding files:', filesItems) dispatch(addFilesAction({ baseId, items: filesItems })) setTimeout(() => KnowledgeQueue.checkAllBases(), 0) } @@ -147,7 +145,7 @@ export const useKnowledge = (baseId: string) => { } } if (item.type === 'file' && typeof item.content === 'object') { - await FileManager.deleteFile(item.content.id) + await window.api.file.deleteDir(item.content.id) } } // 刷新项目 @@ -190,41 +188,18 @@ export const useKnowledge = (baseId: string) => { } // 获取特定项目的处理状态 - const getProcessingStatus = (itemId: string) => { - return base?.items.find((item) => item.id === itemId)?.processingStatus - } + const getProcessingStatus = useCallback( + (itemId: string) => { + return base?.items.find((item) => item.id === itemId)?.processingStatus + }, + [base?.items] + ) // 获取特定类型的所有处理项 const getProcessingItemsByType = (type: 'file' | 'url' | 'note') => { return base?.items.filter((item) => item.type === type && item.processingStatus !== undefined) || [] } - // 获取目录处理进度 - const getDirectoryProcessingPercent = (itemId?: string) => { - const [percent, setPercent] = useState(0) - - useEffect(() => { - if (!itemId) { - return - } - - const cleanup = window.electron.ipcRenderer.on( - IpcChannel.DirectoryProcessingPercent, - (_, { itemId: id, percent }: { itemId: string; percent: number }) => { - if (itemId === id) { - setPercent(percent) - } - } - ) - - return () => { - cleanup() - } - }, [itemId]) - - return percent - } - // 清除已完成的项目 const clearCompleted = () => { dispatch(clearCompletedProcessing({ baseId })) @@ -307,7 +282,6 @@ export const useKnowledge = (baseId: string) => { refreshItem, getProcessingStatus, getProcessingItemsByType, - getDirectoryProcessingPercent, clearCompleted, clearAll, removeItem, diff --git a/src/renderer/src/hooks/useKnowledgeFiles.tsx b/src/renderer/src/hooks/useKnowledgeFiles.tsx index 8d3714d3a5..57b454e6b7 100644 --- a/src/renderer/src/hooks/useKnowledgeFiles.tsx +++ b/src/renderer/src/hooks/useKnowledgeFiles.tsx @@ -1,12 +1,12 @@ import FileManager from '@renderer/services/FileManager' -import { FileType } from '@renderer/types' +import { FileMetadata } from '@renderer/types' import { isEmpty } from 'lodash' import { useEffect, useState } from 'react' import { useKnowledgeBases } from './useKnowledge' export const useKnowledgeFiles = () => { - const [knowledgeFiles, setKnowledgeFiles] = useState([]) + const [knowledgeFiles, setKnowledgeFiles] = useState([]) const { bases, updateKnowledgeBases } = useKnowledgeBases() useEffect(() => { @@ -16,7 +16,7 @@ export const useKnowledgeFiles = () => { .filter((item) => item.type === 'file') .filter((item) => item.processingStatus === 'completed') - const files = fileItems.map((item) => item.content as FileType) + const files = fileItems.map((item) => item.content as FileMetadata) !isEmpty(files) && setKnowledgeFiles(files) }, [bases]) @@ -31,7 +31,7 @@ export const useKnowledgeFiles = () => { ? { ...item, content: { - ...(item.content as FileType), + ...(item.content as FileMetadata), size: 0 } } diff --git a/src/renderer/src/hooks/useOcr.ts b/src/renderer/src/hooks/useOcr.ts new file mode 100644 index 0000000000..7f83fd9c28 --- /dev/null +++ b/src/renderer/src/hooks/useOcr.ts @@ -0,0 +1,45 @@ +import { RootState } from '@renderer/store' +import { + setDefaultOcrProvider as _setDefaultOcrProvider, + updateOcrProvider as _updateOcrProvider, + updateOcrProviders as _updateOcrProviders +} from '@renderer/store/ocr' +import { OcrProvider } from '@renderer/types' +import { useDispatch, useSelector } from 'react-redux' + +export const useOcrProvider = (id: string) => { + const dispatch = useDispatch() + const ocrProviders = useSelector((state: RootState) => state.ocr.providers) + const provider = ocrProviders.find((provider) => provider.id === id) + if (!provider) { + throw new Error(`OCR provider with id ${id} not found`) + } + const updateOcrProvider = (ocrProvider: OcrProvider) => { + dispatch(_updateOcrProvider(ocrProvider)) + } + return { provider, updateOcrProvider } +} + +export const useOcrProviders = () => { + const dispatch = useDispatch() + const ocrProviders = useSelector((state: RootState) => state.ocr.providers) + return { + ocrProviders: ocrProviders, + updateOcrProviders: (ocrProviders: OcrProvider[]) => dispatch(_updateOcrProviders(ocrProviders)) + } +} + +export const useDefaultOcrProvider = () => { + const defaultProviderId = useSelector((state: RootState) => state.ocr.defaultProvider) + const { ocrProviders } = useOcrProviders() + const dispatch = useDispatch() + const provider = defaultProviderId ? ocrProviders.find((provider) => provider.id === defaultProviderId) : undefined + + const setDefaultOcrProvider = (ocrProvider: OcrProvider) => { + dispatch(_setDefaultOcrProvider(ocrProvider.id)) + } + const updateDefaultOcrProvider = (ocrProvider: OcrProvider) => { + dispatch(_updateOcrProvider(ocrProvider)) + } + return { provider, setDefaultOcrProvider, updateDefaultOcrProvider } +} diff --git a/src/renderer/src/hooks/usePreprocess.ts b/src/renderer/src/hooks/usePreprocess.ts new file mode 100644 index 0000000000..5a4c6649b5 --- /dev/null +++ b/src/renderer/src/hooks/usePreprocess.ts @@ -0,0 +1,48 @@ +import { RootState } from '@renderer/store' +import { + setDefaultPreprocessProvider as _setDefaultPreprocessProvider, + updatePreprocessProvider as _updatePreprocessProvider, + updatePreprocessProviders as _updatePreprocessProviders +} from '@renderer/store/preprocess' +import { PreprocessProvider } from '@renderer/types' +import { useDispatch, useSelector } from 'react-redux' + +export const usePreprocessProvider = (id: string) => { + const dispatch = useDispatch() + const preprocessProviders = useSelector((state: RootState) => state.preprocess.providers) + const provider = preprocessProviders.find((provider) => provider.id === id) + if (!provider) { + throw new Error(`preprocess provider with id ${id} not found`) + } + const updatePreprocessProvider = (preprocessProvider: PreprocessProvider) => { + dispatch(_updatePreprocessProvider(preprocessProvider)) + } + return { provider, updatePreprocessProvider } +} + +export const usePreprocessProviders = () => { + const dispatch = useDispatch() + const preprocessProviders = useSelector((state: RootState) => state.preprocess.providers) + return { + preprocessProviders: preprocessProviders, + updatePreprocessProviders: (preprocessProviders: PreprocessProvider[]) => + dispatch(_updatePreprocessProviders(preprocessProviders)) + } +} + +export const useDefaultPreprocessProvider = () => { + const defaultProviderId = useSelector((state: RootState) => state.preprocess.defaultProvider) + const { preprocessProviders } = usePreprocessProviders() + const dispatch = useDispatch() + const provider = defaultProviderId + ? preprocessProviders.find((provider) => provider.id === defaultProviderId) + : undefined + + const setDefaultPreprocessProvider = (preprocessProvider: PreprocessProvider) => { + dispatch(_setDefaultPreprocessProvider(preprocessProvider.id)) + } + const updateDefaultPreprocessProvider = (preprocessProvider: PreprocessProvider) => { + dispatch(_updatePreprocessProvider(preprocessProvider)) + } + return { provider, setDefaultPreprocessProvider, updateDefaultPreprocessProvider } +} diff --git a/src/renderer/src/i18n/locales/en-us.json b/src/renderer/src/i18n/locales/en-us.json index 4511878b84..02645faa69 100644 --- a/src/renderer/src/i18n/locales/en-us.json +++ b/src/renderer/src/i18n/locales/en-us.json @@ -190,7 +190,7 @@ "input.translate": "Translate to {{target_language}}", "input.upload": "Upload image or document file", "input.upload.document": "Upload document file (model does not support images)", - "input.web_search": "Web search", + "input.web_search": "Web Search", "input.web_search.settings": "Web Search Settings", "input.web_search.button.ok": "Go to Settings", "input.web_search.enable": "Enable web search", @@ -406,9 +406,11 @@ "prompt": "Prompt", "provider": "Provider", "regenerate": "Regenerate", + "refresh": "Refresh", "rename": "Rename", "reset": "Reset", "save": "Save", + "settings": "Settings", "search": "Search", "select": "Select", "selectedMessages": "Selected {{count}} messages", @@ -423,7 +425,9 @@ "pinyin.asc": "Sort by Pinyin (A-Z)", "pinyin.desc": "Sort by Pinyin (Z-A)" }, - "no_results": "No results" + "no_results": "No results", + "enabled": "Enabled", + "disabled": "Disabled" }, "docs": { "title": "Docs" @@ -498,6 +502,8 @@ "title": "Topics Search" }, "knowledge": { + "name_required": "Knowledge Base Name is required", + "embedding_model_required": "Knowledge Base Embedding Model is required", "add": { "title": "Add Knowledge Base" }, @@ -543,13 +549,21 @@ "rename": "Rename", "search": "Search knowledge base", "search_placeholder": "Enter text to search", - "settings": "Knowledge Base Settings", + "settings": { + "title": "Knowledge Base Settings", + "preprocessing": "Preprocessing", + "preprocessing_tooltip": "Preprocess uploaded files with OCR" + }, "sitemap_placeholder": "Enter Website Map URL", "sitemaps": "Websites", "source": "Source", "status": "Status", "status_completed": "Completed", + "status_embedding_completed": "Embedding Completed", + "status_preprocess_completed": "Preprocessing Completed", "status_failed": "Failed", + "status_embedding_failed": "Embedding Failed", + "status_preprocess_failed": "Preprocessing Failed", "status_new": "Added", "status_pending": "Pending", "status_processing": "Processing", @@ -572,7 +586,9 @@ "dimensions_error_invalid": "Please enter embedding dimension size", "dimensions_size_too_large": "The embedding dimension cannot exceed the model's context limit ({{max_context}}).", "dimensions_set_right": "⚠️ Please ensure the model supports the set embedding dimension size", - "dimensions_default": "The model will use default embedding dimensions" + "dimensions_default": "The model will use default embedding dimensions", + "quota": "{{name}} Left Quota: {{quota}}", + "quota_infinity": "{{name}} Quota: Unlimited" }, "languages": { "arabic": "Arabic", @@ -835,7 +851,7 @@ "notification": { "assistant": "Assistant Response", "knowledge.success": "Successfully added {{type}} to the knowledge base", - "knowledge.error": "Failed to add {{type}} to knowledge base: {{error}}" + "knowledge.error": "{{error}}" }, "ollama": { "keep_alive_time.description": "The time in minutes to keep the connection alive, default is 5 minutes.", @@ -1828,68 +1844,88 @@ "tray.onclose": "Minimize to Tray on Close", "tray.show": "Show Tray Icon", "tray.title": "Tray", - "websearch": { - "blacklist": "Blacklist", - "blacklist_description": "Results from the following websites will not appear in search results", - "blacklist_tooltip": "Please use the following format (separated by newlines)\nPattern matching: *://*.example.com/*\nRegular expression: /example\\.(net|org)/", - "check": "Check", - "check_failed": "Verification failed", - "check_success": "Verification successful", - "get_api_key": "Get API Key", - "no_provider_selected": "Please select a search service provider before checking.", - "search_max_result": "Number of search results", - "search_provider": "Search service provider", - "search_provider_placeholder": "Choose a search service provider.", - "search_result_default": "Default", - "search_with_time": "Search with dates included", - "tavily": { - "api_key": "Tavily API Key", - "api_key.placeholder": "Enter Tavily API Key", - "description": "Tavily is a search engine tailored for AI agents, delivering real-time, accurate results, intelligent query suggestions, and in-depth research capabilities.", - "title": "Tavily" + "tool": { + "title": "Tools Settings", + "preprocessOrOcr.tooltip": "In Settings -> Tools, set a document preprocessing service provider or OCR. Document preprocessing can effectively improve the retrieval performance of complex format documents and scanned documents. OCR can only recognize text within images in documents or scanned PDF text.", + "preprocess": { + "title": "Pre Process", + "provider": "Pre Process Provider", + "provider_placeholder": "Choose a Pre Process provider" }, - "title": "Web Search", - "subscribe": "Blacklist Subscription", - "subscribe_update": "Update", - "subscribe_add": "Add Subscription", - "subscribe_url": "Subscription Url", - "subscribe_name": "Alternative name", - "subscribe_name.placeholder": "Alternative name used when the downloaded subscription feed has no name.", - "subscribe_add_success": "Subscription feed added successfully!", - "subscribe_delete": "Delete", - "subscribe_add_failed": "Failed to add blacklist subscription", - "subscribe_update_success": "Blacklist subscription updated successfully", - "subscribe_update_failed": "Failed to update blacklist subscription", - "subscribe_source_update_failed": "Failed to update blacklist subscription source", - "overwrite": "Override search service", - "overwrite_tooltip": "Force use search service instead of LLM", - "apikey": "API key", - "free": "Free", - "compression": { - "title": "Search Result Compression", - "method": "Compression Method", - "method.none": "None", - "method.cutoff": "Cutoff", - "cutoff.limit": "Cutoff Limit", - "cutoff.limit.placeholder": "Enter length", - "cutoff.limit.tooltip": "Limit the content length of search results, content exceeding the limit will be truncated (e.g., 2000 characters)", - "cutoff.unit.char": "Char", - "cutoff.unit.token": "Token", - "method.rag": "RAG", - "rag.document_count": "Document Count", - "rag.document_count.default": "Default", - "rag.document_count.tooltip": "Expected number of documents to extract from each search result, the actual total number of extracted documents is this value multiplied by the number of search results.", - "rag.embedding_dimensions.auto_get": "Auto Get Dimensions", - "rag.embedding_dimensions.placeholder": "Leave empty", - "rag.embedding_dimensions.tooltip": "If left blank, the dimensions parameter will not be passed", - "info": { - "dimensions_auto_success": "Dimensions auto-obtained successfully, dimensions: {{dimensions}}" + "ocr": { + "title": "OCR", + "provider": "OCR Provider", + "provider_placeholder": "Choose an OCR provider", + "mac_system_ocr_options": { + "mode": { + "title": "Recognition Mode", + "accurate": "Accurate", + "fast": "Fast" + }, + "min_confidence": "Minimum Confidence" + } + }, + "websearch": { + "blacklist": "Blacklist", + "blacklist_description": "Results from the following websites will not appear in search results", + "blacklist_tooltip": "Please use the following format (separated by newlines)\nPattern matching: *://*.example.com/*\nRegular expression: /example\\.(net|org)/", + "check": "Check", + "check_failed": "Verification failed", + "check_success": "Verification successful", + "get_api_key": "Get API Key", + "no_provider_selected": "Please select a search service provider before checking.", + "search_max_result": "Number of search results", + "search_provider": "Search service provider", + "search_provider_placeholder": "Choose a search service provider.", + "search_result_default": "Default", + "search_with_time": "Search with dates included", + "tavily": { + "api_key": "Tavily API Key", + "api_key.placeholder": "Enter Tavily API Key", + "description": "Tavily is a search engine tailored for AI agents, delivering real-time, accurate results, intelligent query suggestions, and in-depth research capabilities.", + "title": "Tavily" }, - "error": { - "embedding_model_required": "Please select an embedding model first", - "dimensions_auto_failed": "Failed to auto-obtain dimensions", - "provider_not_found": "Provider not found", - "rag_failed": "RAG failed" + "content_limit": "Content length limit", + "content_limit_tooltip": "Limit the content length of the search results; content that exceeds the limit will be truncated.", + "title": "Web Search", + "subscribe": "Blacklist Subscription", + "subscribe_update": "Update", + "subscribe_add": "Add Subscription", + "subscribe_url": "Subscription Url", + "subscribe_name": "Alternative name", + "subscribe_name.placeholder": "Alternative name used when the downloaded subscription feed has no name.", + "subscribe_add_success": "Subscription feed added successfully!", + "subscribe_delete": "Delete", + "overwrite": "Override search service", + "overwrite_tooltip": "Force use search service instead of LLM", + "apikey": "API key", + "free": "Free", + "compression": { + "title": "Search Result Compression", + "method": "Compression Method", + "method.none": "None", + "method.cutoff": "Cutoff", + "cutoff.limit": "Cutoff Limit", + "cutoff.limit.placeholder": "Enter length", + "cutoff.limit.tooltip": "Limit the content length of search results, content exceeding the limit will be truncated (e.g., 2000 characters)", + "cutoff.unit.char": "Char", + "cutoff.unit.token": "Token", + "method.rag": "RAG", + "rag.document_count": "Document Count", + "rag.document_count.default": "Default", + "rag.document_count.tooltip": "Expected number of documents to extract from each search result, the actual total number of extracted documents is this value multiplied by the number of search results.", + "rag.embedding_dimensions.auto_get": "Auto Get Dimensions", + "rag.embedding_dimensions.placeholder": "Leave empty", + "rag.embedding_dimensions.tooltip": "If left blank, the dimensions parameter will not be passed", + "info": { + "dimensions_auto_success": "Dimensions auto-obtained successfully, dimensions: {{dimensions}}" + }, + "error": { + "embedding_model_required": "Please select an embedding model first", + "dimensions_auto_failed": "Failed to auto-obtain dimensions", + "provider_not_found": "Provider not found", + "rag_failed": "RAG failed" + } } } }, @@ -1938,7 +1974,8 @@ "service_tier.auto": "auto", "service_tier.default": "default", "service_tier.flex": "flex" - } + }, + "mineru.api_key": "Mineru now offers a daily free quota of 500 pages, and you do not need to enter a key." }, "translate": { "any.language": "Any language", diff --git a/src/renderer/src/i18n/locales/ja-jp.json b/src/renderer/src/i18n/locales/ja-jp.json index 2082cf0c27..f2f802cd26 100644 --- a/src/renderer/src/i18n/locales/ja-jp.json +++ b/src/renderer/src/i18n/locales/ja-jp.json @@ -406,9 +406,11 @@ "prompt": "プロンプト", "provider": "プロバイダー", "regenerate": "再生成", + "refresh": "更新", "rename": "名前を変更", "reset": "リセット", "save": "保存", + "settings": "設定", "search": "検索", "select": "選択", "selectedMessages": "{{count}}件のメッセージを選択しました", @@ -423,7 +425,9 @@ "pinyin.asc": "ピンインで昇順ソート", "pinyin.desc": "ピンインで降順ソート" }, - "no_results": "検索結果なし" + "no_results": "検索結果なし", + "enabled": "有効", + "disabled": "無効" }, "docs": { "title": "ドキュメント" @@ -543,7 +547,11 @@ "rename": "名前を変更", "search": "ナレッジベースを検索", "search_placeholder": "検索するテキストを入力", - "settings": "ナレッジベース設定", + "settings": { + "title": "ナレッジベース設定", + "preprocessing": "預処理", + "preprocessing_tooltip": "アップロードされたファイルのOCR預処理" + }, "sitemap_placeholder": "サイトマップURLを入力", "sitemaps": "サイトマップ", "source": "ソース", @@ -567,12 +575,20 @@ "urls": "URL", "dimensions": "埋め込み次元", "dimensions_size_tooltip": "埋め込み次元のサイズは、数値が大きいほど埋め込み次元も大きくなりますが、消費するトークンも増えます。", + "status_embedding_completed": "埋め込み完了", + "status_preprocess_completed": "前処理完了", + "status_embedding_failed": "埋め込み失敗", + "status_preprocess_failed": "前処理に失敗しました", "dimensions_size_placeholder": " 埋め込み次元のサイズ(例:1024)", "dimensions_auto_set": "埋め込み次元を自動設定", "dimensions_error_invalid": "埋め込み次元のサイズを入力してください", "dimensions_size_too_large": "埋め込み次元はモデルのコンテキスト制限({{max_context}})を超えてはなりません。", "dimensions_set_right": "⚠️ モデルが設定した埋め込み次元のサイズをサポートしていることを確認してください", - "dimensions_default": "モデルはデフォルトの埋め込み次元を使用します" + "dimensions_default": "モデルはデフォルトの埋め込み次元を使用します", + "quota": "{{name}} 残りクォータ: {{quota}}", + "quota_infinity": "{{name}} クォータ: 無制限", + "name_required": "ナレッジベース名は必須です", + "embedding_model_required": "ナレッジベース埋め込みモデルが必要です" }, "languages": { "arabic": "アラビア語", @@ -835,7 +851,7 @@ "notification": { "assistant": "助手回應", "knowledge.success": "ナレッジベースに{{type}}を正常に追加しました", - "knowledge.error": "ナレッジベースへの{{type}}の追加に失敗しました: {{error}}" + "knowledge.error": "{{error}}" }, "ollama": { "keep_alive_time.description": "モデルがメモリに保持される時間(デフォルト:5分)", @@ -1805,6 +1821,91 @@ "theme.window.style.title": "ウィンドウスタイル", "theme.window.style.transparent": "透明ウィンドウ", "title": "設定", + "tool": { + "title": "ツール設定", + "websearch": { + "blacklist": "ブラックリスト", + "blacklist_description": "以下のウェブサイトの結果は検索結果に表示されません", + "blacklist_tooltip": "以下の形式を使用してください(改行区切り)\nexample.com\nhttps://www.example.com\nhttps://example.com\n*://*.example.com", + "check": "チェック", + "check_failed": "検証に失敗しました", + "check_success": "検証に成功しました", + "get_api_key": "APIキーを取得", + "no_provider_selected": "検索サービスプロバイダーを選択してから再確認してください。", + "search_max_result": "検索結果の数", + "search_provider": "検索サービスプロバイダー", + "search_provider_placeholder": "検索サービスプロバイダーを選択する", + "search_result_default": "デフォルト", + "search_with_time": "日付を含む検索", + "tavily": { + "api_key": "Tavily API キー", + "api_key.placeholder": "Tavily API キーを入力してください", + "description": "Tavily は、AI エージェントのために特別に開発された検索エンジンで、最新の結果、インテリジェントな検索提案、そして深い研究能力を提供します", + "title": "Tavily" + }, + "title": "ウェブ検索", + "subscribe": "ブラックリスト購読", + "subscribe_update": "更新", + "subscribe_add": "購読を追加", + "subscribe_url": "購読URL", + "subscribe_name": "代替名", + "subscribe_name.placeholder": "ダウンロードした購読フィードに名前がない場合に使用される代替名。", + "subscribe_add_success": "購読フィードが正常に追加されました!", + "subscribe_delete": "削除", + "overwrite": "検索サービスを上書き", + "overwrite_tooltip": "LLMの代わりに検索サービスを強制的に使用する", + "apikey": "APIキー", + "free": "無料", + "content_limit": "コンテンツ制限", + "content_limit_tooltip": "検索結果のコンテンツの長さを制限します。制限を超えるコンテンツは切り捨てられます。", + "compression": { + "title": "検索結果の圧縮", + "method": "圧縮方法", + "method.none": "圧縮しない", + "method.cutoff": "切り捨て", + "cutoff.limit": "切り捨て長", + "cutoff.limit.placeholder": "長さを入力", + "cutoff.limit.tooltip": "検索結果の内容長を制限し、制限を超える内容は切り捨てられます(例:2000文字)", + "cutoff.unit.char": "文字", + "cutoff.unit.token": "トークン", + "method.rag": "RAG", + "rag.document_count": "文書数", + "rag.document_count.default": "デフォルト", + "rag.document_count.tooltip": "単一の検索結果から抽出する文書数。実際に抽出される文書数は、この値に検索結果数を乗じたものです。", + "rag.embedding_dimensions.auto_get": "次元を自動取得", + "rag.embedding_dimensions.placeholder": "次元を設定しない", + "rag.embedding_dimensions.tooltip": "空の場合、dimensions パラメーターは渡されません", + "info": { + "dimensions_auto_success": "次元が自動取得されました。次元: {{dimensions}}" + }, + "error": { + "embedding_model_required": "まず埋め込みモデルを選択してください", + "dimensions_auto_failed": "次元の自動取得に失敗しました", + "provider_not_found": "プロバイダーが見つかりません", + "rag_failed": "RAG に失敗しました" + } + } + }, + "preprocess": { + "title": "前処理", + "provider": "プレプロセスプロバイダー", + "provider_placeholder": "前処理プロバイダーを選択してください" + }, + "preprocessOrOcr.tooltip": "設定 → ツールで、ドキュメント前処理サービスプロバイダーまたはOCRを設定します。ドキュメント前処理は、複雑な形式のドキュメントやスキャンされたドキュメントの検索性能を効果的に向上させます。OCRは、ドキュメント内の画像内のテキストまたはスキャンされたPDFテキストのみを認識できます。", + "ocr": { + "title": "OCR(オーシーアール)", + "provider": "OCRプロバイダー", + "provider_placeholder": "OCRプロバイダーを選択", + "mac_system_ocr_options": { + "mode": { + "title": "認識モード", + "accurate": "正確", + "fast": "速い" + }, + "min_confidence": "最小信頼度" + } + } + }, "topic.position": "トピックの位置", "topic.position.left": "左", "topic.position.right": "右", @@ -1813,71 +1914,6 @@ "tray.onclose": "閉じるときにトレイに最小化", "tray.show": "トレイアイコンを表示", "tray.title": "トレイ", - "websearch": { - "blacklist": "ブラックリスト", - "blacklist_description": "以下のウェブサイトの結果は検索結果に表示されません", - "check": "チェック", - "check_failed": "検証に失敗しました", - "check_success": "検証に成功しました", - "get_api_key": "APIキーを取得", - "no_provider_selected": "検索サービスプロバイダーを選択してから再確認してください。", - "search_max_result": "検索結果の数", - "search_provider": "検索サービスプロバイダー", - "search_provider_placeholder": "検索サービスプロバイダーを選択する", - "search_result_default": "デフォルト", - "search_with_time": "日付を含む検索", - "tavily": { - "api_key": "Tavily API キー", - "api_key.placeholder": "Tavily API キーを入力してください", - "description": "Tavily は、AI エージェントのために特別に開発された検索エンジンで、最新の結果、インテリジェントな検索提案、そして深い研究能力を提供します", - "title": "Tavily" - }, - "title": "ウェブ検索", - "blacklist_tooltip": "マッチパターン: *://*.example.com/*\n正規表現: /example\\.(net|org)/", - "subscribe": "ブラックリスト購読", - "subscribe_update": "更新", - "subscribe_add": "サブスクリプションを追加", - "subscribe_url": "フィードのURL", - "subscribe_name": "代替名", - "subscribe_name.placeholder": "ダウンロードしたフィードに名前がない場合に使用される代替名", - "subscribe_add_success": "フィードの追加が成功しました!", - "subscribe_delete": "削除", - "overwrite": "サービス検索を上書き", - "overwrite_tooltip": "大規模言語モデルではなく、サービス検索を使用する", - "apikey": "API キー", - "free": "無料", - "compression": { - "title": "検索結果の圧縮", - "method": "圧縮方法", - "method.none": "圧縮しない", - "method.cutoff": "切り捨て", - "cutoff.limit": "切り捨て長", - "cutoff.limit.placeholder": "長さを入力", - "cutoff.limit.tooltip": "検索結果の内容長を制限し、制限を超える内容は切り捨てられます(例:2000文字)", - "cutoff.unit.char": "文字", - "cutoff.unit.token": "トークン", - "method.rag": "RAG", - "rag.document_count": "文書数", - "rag.document_count.default": "デフォルト", - "rag.document_count.tooltip": "単一の検索結果から抽出する文書数。実際に抽出される文書数は、この値に検索結果数を乗じたものです。", - "rag.embedding_dimensions.auto_get": "次元を自動取得", - "rag.embedding_dimensions.placeholder": "次元を設定しない", - "rag.embedding_dimensions.tooltip": "空の場合、dimensions パラメーターは渡されません", - "info": { - "dimensions_auto_success": "次元が自動取得されました。次元: {{dimensions}}" - }, - "error": { - "embedding_model_required": "まず埋め込みモデルを選択してください", - "dimensions_auto_failed": "次元の自動取得に失敗しました", - "provider_not_found": "プロバイダーが見つかりません", - "rag_failed": "RAG に失敗しました" - } - }, - "subscribe_add_failed": "ブラックリスト購読の追加に失敗しました", - "subscribe_update_success": "ブラックリスト購読が正常に更新されました", - "subscribe_update_failed": "ブラックリスト購読の更新に失敗しました", - "subscribe_source_update_failed": "ブラックリスト購読ソースの更新に失敗しました" - }, "general.auto_check_update.title": "自動更新", "general.test_plan.title": "テストプラン", "general.test_plan.tooltip": "テストプランに参加すると、最新の機能をより早く体験できますが、同時により多くのリスクが伴います。データを事前にバックアップしてください。", @@ -1941,7 +1977,8 @@ "assistant": "アシスタントメッセージ", "backup": "バックアップメッセージ", "knowledge_embed": "ナレッジベースメッセージ" - } + }, + "mineru.api_key": "Mineruでは現在、1日500ページの無料クォータを提供しており、キーを入力する必要はありません。" }, "translate": { "any.language": "任意の言語", diff --git a/src/renderer/src/i18n/locales/ru-ru.json b/src/renderer/src/i18n/locales/ru-ru.json index 43b53f4a4d..b5b5bfbd11 100644 --- a/src/renderer/src/i18n/locales/ru-ru.json +++ b/src/renderer/src/i18n/locales/ru-ru.json @@ -406,9 +406,11 @@ "prompt": "Промпт", "provider": "Провайдер", "regenerate": "Пересоздать", + "refresh": "Обновить", "rename": "Переименовать", "reset": "Сбросить", "save": "Сохранить", + "settings": "Настройки", "search": "Поиск", "select": "Выбрать", "selectedMessages": "Выбрано {{count}} сообщений", @@ -423,7 +425,9 @@ "pinyin.asc": "Сортировать по пиньинь (А-Я)", "pinyin.desc": "Сортировать по пиньинь (Я-А)" }, - "no_results": "Результатов не найдено" + "no_results": "Результатов не найдено", + "enabled": "Включено", + "disabled": "Отключено" }, "docs": { "title": "Документация" @@ -543,7 +547,11 @@ "rename": "Переименовать", "search": "Поиск в базе знаний", "search_placeholder": "Введите текст для поиска", - "settings": "Настройки базы знаний", + "settings": { + "title": "Настройки базы знаний", + "preprocessing": "Предварительная обработка", + "preprocessing_tooltip": "Предварительная обработка изображений с помощью OCR" + }, "sitemap_placeholder": "Введите URL карты сайта", "sitemaps": "Сайты", "source": "Источник", @@ -567,12 +575,20 @@ "urls": "URL-адреса", "dimensions": "векторное пространство", "dimensions_size_tooltip": "Размерность вложения, чем больше значение, тем больше размерность вложения, но и потребляемых токенов также становится больше.", + "status_embedding_completed": "Вложение завершено", + "status_preprocess_completed": "Предварительная обработка завершена", + "status_embedding_failed": "Не удалось встроить", + "status_preprocess_failed": "Предварительная обработка не удалась", "dimensions_size_placeholder": " Размерность эмбеддинга, например 1024", "dimensions_auto_set": "Автоматическая установка размерности эмбеддинга", "dimensions_error_invalid": "Пожалуйста, введите размерность эмбеддинга", "dimensions_size_too_large": "Размерность вложения не может превышать ограничение контекста модели ({{max_context}})", "dimensions_set_right": "⚠️ Убедитесь, что модель поддерживает заданный размер эмбеддинга", - "dimensions_default": "Модель будет использовать размер эмбеддинга по умолчанию" + "dimensions_default": "Модель будет использовать размер эмбеддинга по умолчанию", + "quota": "{{name}} Остаток квоты: {{quota}}", + "quota_infinity": "{{name}} Квота: Не ограничена", + "name_required": "Название базы знаний обязательно", + "embedding_model_required": "Модель встраивания базы знаний требуется" }, "languages": { "arabic": "Арабский", @@ -835,7 +851,7 @@ "notification": { "assistant": "Ответ ассистента", "knowledge.success": "Успешно добавлено {{type}} в базу знаний", - "knowledge.error": "Не удалось добавить {{type}} в базу знаний: {{error}}" + "knowledge.error": "{{error}}" }, "ollama": { "keep_alive_time.description": "Время в минутах, в течение которого модель остается активной, по умолчанию 5 минут.", @@ -1805,6 +1821,91 @@ "theme.window.style.title": "Стиль окна", "theme.window.style.transparent": "Прозрачное окно", "title": "Настройки", + "tool": { + "title": "Настройки инструментов", + "websearch": { + "blacklist": "Черный список", + "blacklist_description": "Результаты из следующих веб-сайтов не будут отображаться в результатах поиска", + "blacklist_tooltip": "Пожалуйста, используйте следующий формат (разделенный переносами строк)\nexample.com\nhttps://www.example.com\nhttps://example.com\n*://*.example.com", + "check": "проверка", + "check_failed": "Проверка не прошла", + "check_success": "Проверка успешна", + "get_api_key": "Получить ключ API", + "no_provider_selected": "Пожалуйста, выберите поставщика поисковых услуг, затем проверьте.", + "search_max_result": "Количество результатов поиска", + "search_provider": "поиск сервисного провайдера", + "search_provider_placeholder": "Выберите поставщика поисковых услуг", + "search_result_default": "По умолчанию", + "search_with_time": "Поиск, содержащий дату", + "tavily": { + "api_key": "Ключ API Tavily", + "api_key.placeholder": "Введите ключ API Tavily", + "description": "Tavily — это поисковая система, специально разработанная для ИИ-агентов, предоставляющая актуальные результаты, умные предложения по запросам и глубокие исследовательские возможности", + "title": "Tavily" + }, + "title": "Поиск в Интернете", + "subscribe": "Подписка на черный список", + "subscribe_update": "Обновить", + "subscribe_add": "Добавить подписку", + "subscribe_url": "URL подписки", + "subscribe_name": "Альтернативное имя", + "subscribe_name.placeholder": "Альтернативное имя, используемое, когда в загруженной ленте подписки нет имени.", + "subscribe_add_success": "Лента подписки успешно добавлена!", + "subscribe_delete": "Удалить", + "overwrite": "Переопределить поисковый сервис", + "overwrite_tooltip": "Принудительно использовать поисковый сервис вместо LLM", + "apikey": "API ключ", + "free": "Бесплатно", + "content_limit": "Ограничение длины контента", + "content_limit_tooltip": "Ограничить длину контента в результатах поиска; контент, превышающий лимит, будет усечен.", + "compression": { + "title": "Сжатие результатов поиска", + "method": "Метод сжатия", + "method.none": "Не сжимать", + "method.cutoff": "Обрезка", + "cutoff.limit": "Лимит обрезки", + "cutoff.limit.placeholder": "Введите длину", + "cutoff.limit.tooltip": "Ограничьте длину содержимого результатов поиска, контент, превышающий ограничение, будет обрезан (например, 2000 символов)", + "cutoff.unit.char": "Символы", + "cutoff.unit.token": "Токены", + "method.rag": "RAG", + "rag.document_count": "Количество документов", + "rag.document_count.default": "По умолчанию", + "rag.document_count.tooltip": "Ожидаемое количество документов, которые будут извлечены из каждого результата поиска. Фактическое количество извлеченных документов равно этому значению, умноженному на количество результатов поиска.", + "rag.embedding_dimensions.auto_get": "Автоматически получить размерности", + "rag.embedding_dimensions.placeholder": "Не устанавливать размерности", + "rag.embedding_dimensions.tooltip": "Если оставить пустым, параметр dimensions не будет передан", + "info": { + "dimensions_auto_success": "Размерности успешно получены, размерности: {{dimensions}}" + }, + "error": { + "embedding_model_required": "Пожалуйста, сначала выберите модель встраивания", + "dimensions_auto_failed": "Не удалось получить размерности", + "provider_not_found": "Поставщик не найден", + "rag_failed": "RAG не удалось" + } + } + }, + "preprocess": { + "title": "Предварительная обработка", + "provider": "Предварительная обработка Поставщик", + "provider_placeholder": "Выберите поставщика услуг предварительной обработки" + }, + "preprocessOrOcr.tooltip": "В настройках (Настройки -> Инструменты) укажите поставщика услуги предварительной обработки документов или OCR. Предварительная обработка документов может значительно повысить эффективность поиска для документов сложных форматов и отсканированных документов. OCR способен распознавать только текст внутри изображений в документах или текст в отсканированных PDF.", + "ocr": { + "title": "OCR (оптическое распознавание символов)", + "provider": "Поставщик OCR", + "provider_placeholder": "Выберите провайдера OCR", + "mac_system_ocr_options": { + "mode": { + "title": "Режим распознавания", + "accurate": "Точный", + "fast": "Быстро" + }, + "min_confidence": "Минимальная достоверность" + } + } + }, "topic.position": "Позиция топиков", "topic.position.left": "Слева", "topic.position.right": "Справа", @@ -1813,71 +1914,6 @@ "tray.onclose": "Свернуть в трей при закрытии", "tray.show": "Показать значок в трее", "tray.title": "Трей", - "websearch": { - "blacklist": "Черный список", - "blacklist_description": "Результаты из следующих веб-сайтов не будут отображаться в результатах поиска", - "check": "проверка", - "check_failed": "Проверка не прошла", - "check_success": "Проверка успешна", - "get_api_key": "Получить ключ API", - "no_provider_selected": "Пожалуйста, выберите поставщика поисковых услуг, затем проверьте.", - "search_max_result": "Количество результатов поиска", - "search_provider": "поиск сервисного провайдера", - "search_provider_placeholder": "Выберите поставщика поисковых услуг", - "search_result_default": "По умолчанию", - "search_with_time": "Поиск, содержащий дату", - "tavily": { - "api_key": "Ключ API Tavily", - "api_key.placeholder": "Введите ключ API Tavily", - "description": "Tavily — это поисковая система, специально разработанная для ИИ-агентов, предоставляющая актуальные результаты, умные предложения по запросам и глубокие исследовательские возможности", - "title": "Tavily" - }, - "title": "Поиск в Интернете", - "blacklist_tooltip": "Шаблон: *://*.example.com/*\nРегулярное выражение: /example\\.(net|org)/", - "subscribe": "Подписка на черный список", - "subscribe_update": "Обновить", - "subscribe_add": "Добавить", - "subscribe_url": "URL подписки", - "subscribe_name": "Альтернативное имя", - "subscribe_name.placeholder": "Альтернативное имя, если в подписке нет названия.", - "subscribe_add_success": "Подписка успешно добавлена!", - "subscribe_delete": "Удалить", - "overwrite": "Переопределить провайдера поиска", - "overwrite_tooltip": "Использовать провайдера поиска вместо LLM", - "apikey": "API ключ", - "free": "Бесплатно", - "compression": { - "title": "Сжатие результатов поиска", - "method": "Метод сжатия", - "method.none": "Не сжимать", - "method.cutoff": "Обрезка", - "cutoff.limit": "Лимит обрезки", - "cutoff.limit.placeholder": "Введите длину", - "cutoff.limit.tooltip": "Ограничьте длину содержимого результатов поиска, контент, превышающий ограничение, будет обрезан (например, 2000 символов)", - "cutoff.unit.char": "Символы", - "cutoff.unit.token": "Токены", - "method.rag": "RAG", - "rag.document_count": "Количество документов", - "rag.document_count.default": "По умолчанию", - "rag.document_count.tooltip": "Ожидаемое количество документов, которые будут извлечены из каждого результата поиска. Фактическое количество извлеченных документов равно этому значению, умноженному на количество результатов поиска.", - "rag.embedding_dimensions.auto_get": "Автоматически получить размерности", - "rag.embedding_dimensions.placeholder": "Не устанавливать размерности", - "rag.embedding_dimensions.tooltip": "Если оставить пустым, параметр dimensions не будет передан", - "info": { - "dimensions_auto_success": "Размерности успешно получены, размерности: {{dimensions}}" - }, - "error": { - "embedding_model_required": "Пожалуйста, сначала выберите модель встраивания", - "dimensions_auto_failed": "Не удалось получить размерности", - "provider_not_found": "Поставщик не найден", - "rag_failed": "RAG не удалось" - } - }, - "subscribe_add_failed": "Не удалось добавить подписку на черный список", - "subscribe_update_success": "Подписка на черный список успешно обновлена", - "subscribe_update_failed": "Не удалось обновить подписку на черный список", - "subscribe_source_update_failed": "Не удалось обновить источник подписки на черный список" - }, "general.auto_check_update.title": "Автоматическое обновление", "general.test_plan.title": "Тестовый план", "general.test_plan.tooltip": "Участвовать в тестовом плане, чтобы быстрее получать новые функции, но при этом возникает больше рисков, пожалуйста, сделайте резервную копию данных заранее", @@ -1941,7 +1977,8 @@ "assistant": "Сообщение ассистента", "backup": "Резервное сообщение", "knowledge_embed": "Сообщение базы знаний" - } + }, + "mineru.api_key": "Mineru теперь предлагает ежедневную бесплатную квоту в 500 страниц, и вам не нужно вводить ключ." }, "translate": { "any.language": "Любой язык", diff --git a/src/renderer/src/i18n/locales/zh-cn.json b/src/renderer/src/i18n/locales/zh-cn.json index 2dd0422004..3fec012014 100644 --- a/src/renderer/src/i18n/locales/zh-cn.json +++ b/src/renderer/src/i18n/locales/zh-cn.json @@ -406,9 +406,11 @@ "prompt": "提示词", "provider": "提供商", "regenerate": "重新生成", + "refresh": "刷新", "rename": "重命名", "reset": "重置", "save": "保存", + "settings": "设置", "search": "搜索", "select": "选择", "selectedMessages": "选中 {{count}} 条消息", @@ -423,7 +425,9 @@ "pinyin.asc": "按拼音升序", "pinyin.desc": "按拼音降序" }, - "no_results": "无结果" + "no_results": "无结果", + "enabled": "已启用", + "disabled": "已禁用" }, "docs": { "title": "帮助文档" @@ -551,7 +555,11 @@ "rename": "重命名", "search": "搜索知识库", "search_placeholder": "输入查询内容", - "settings": "知识库设置", + "settings": { + "title": "知识库设置", + "preprocessing": "预处理", + "preprocessing_tooltip": "使用 OCR 预处理上传的文件" + }, "sitemap_placeholder": "请输入站点地图 URL", "sitemaps": "网站", "source": "来源", @@ -571,8 +579,16 @@ "topN_placeholder": "未设置", "topN_tooltip": "返回的匹配结果数量,数值越大,匹配结果越多,但消耗的 Token 也越多", "url_added": "网址已添加", - "url_placeholder": "请输入网址,多个网址用回车分隔", - "urls": "网址" + "url_placeholder": "请输入网址, 多个网址用回车分隔", + "urls": "网址", + "status_embedding_completed": "嵌入完成", + "status_preprocess_completed": "预处理完成", + "status_embedding_failed": "嵌入失败", + "status_preprocess_failed": "预处理失败", + "quota": "{{name}} 剩余额度:{{quota}}", + "quota_infinity": "{{name}} 剩余额度:无限制", + "name_required": "知识库名称为必填项", + "embedding_model_required": "知识库嵌入模型是必需的" }, "languages": { "arabic": "阿拉伯文", @@ -835,7 +851,7 @@ "notification": { "assistant": "助手响应", "knowledge.success": "成功添加 {{type}} 到知识库", - "knowledge.error": "添加 {{type}} 到知识库失败: {{error}}" + "knowledge.error": "{{error}}" }, "ollama": { "keep_alive_time.description": "对话后模型在内存中保持的时间(默认:5 分钟)", @@ -1670,7 +1686,7 @@ "title": "通知设置", "assistant": "助手消息", "backup": "备份", - "knowledge_embed": "知识嵌入" + "knowledge_embed": "知识库" }, "provider": { "add.name": "提供商名称", @@ -1831,71 +1847,6 @@ "tray.onclose": "关闭时最小化到托盘", "tray.show": "显示托盘图标", "tray.title": "托盘", - "websearch": { - "blacklist": "黑名单", - "blacklist_description": "在搜索结果中不会出现以下网站的结果", - "blacklist_tooltip": "请使用以下格式 (换行分隔)\n匹配模式: *://*.example.com/*\n正则表达式: /example\\.(net|org)/", - "check": "检测", - "check_failed": "验证失败", - "check_success": "验证成功", - "overwrite": "覆盖服务商搜索", - "overwrite_tooltip": "强制使用搜索服务商而不是大语言模型进行搜索", - "get_api_key": "点击这里获取密钥", - "no_provider_selected": "请选择搜索服务商后再检测", - "search_max_result": "搜索结果个数", - "search_provider": "搜索服务商", - "search_provider_placeholder": "选择一个搜索服务商", - "subscribe": "黑名单订阅", - "subscribe_update": "立即更新", - "subscribe_add": "添加订阅", - "subscribe_url": "订阅源地址", - "subscribe_name": "替代名字", - "subscribe_name.placeholder": "当下载的订阅源没有名称时所使用的替代名称", - "subscribe_add_success": "订阅源添加成功!", - "subscribe_delete": "删除订阅源", - "search_result_default": "默认", - "search_with_time": "搜索包含日期", - "tavily": { - "api_key": "Tavily API 密钥", - "api_key.placeholder": "请输入 Tavily API 密钥", - "description": "Tavily 是一个为 AI 代理量身定制的搜索引擎,提供实时、准确的结果、智能查询建议和深入的研究能力", - "title": "Tavily" - }, - "title": "网络搜索", - "apikey": "API 密钥", - "free": "免费", - "compression": { - "title": "搜索结果压缩", - "method": "压缩方法", - "method.none": "不压缩", - "method.cutoff": "截断", - "cutoff.limit": "截断长度", - "cutoff.limit.placeholder": "输入长度", - "cutoff.limit.tooltip": "限制搜索结果的内容长度,超过限制的内容将被截断(例如 2000 字符)", - "cutoff.unit.char": "字符", - "cutoff.unit.token": "Token", - "method.rag": "RAG", - "rag.document_count": "文档数量", - "rag.document_count.default": "默认", - "rag.document_count.tooltip": "预期从单个搜索结果中提取的文档数量,实际提取的总数量是这个值乘以搜索结果数量。", - "rag.embedding_dimensions.auto_get": "自动获取维度", - "rag.embedding_dimensions.placeholder": "不设置维度", - "rag.embedding_dimensions.tooltip": "留空则不传递 dimensions 参数", - "info": { - "dimensions_auto_success": "维度自动获取成功,维度为 {{dimensions}}" - }, - "error": { - "embedding_model_required": "请先选择嵌入模型", - "dimensions_auto_failed": "维度自动获取失败", - "provider_not_found": "未找到服务商", - "rag_failed": "RAG 失败" - } - }, - "subscribe_add_failed": "添加黑名单订阅失败", - "subscribe_update_success": "黑名单订阅更新成功", - "subscribe_update_failed": "更新黑名单订阅失败", - "subscribe_source_update_failed": "更新黑名单订阅源失败" - }, "quickPhrase": { "title": "快捷短语", "add": "添加短语", @@ -1941,7 +1892,93 @@ "service_tier.auto": "自动", "service_tier.default": "默认", "service_tier.flex": "灵活" - } + }, + "tool": { + "title": "工具设置", + "preprocess": { + "title": "文档预处理", + "provider": "文档预处理服务商", + "provider_placeholder": "选择一个文档预处理服务商" + }, + "ocr": { + "title": "OCR", + "provider": "OCR 服务商", + "provider_placeholder": "选择一个 OCR 服务商", + "mac_system_ocr_options": { + "mode": { + "title": "识别模式", + "accurate": "准确", + "fast": "快速" + }, + "min_confidence": "最低置信度" + } + }, + "websearch": { + "blacklist": "黑名单", + "blacklist_description": "在搜索结果中不会出现以下网站的结果", + "blacklist_tooltip": "请使用以下格式(换行分隔)\n匹配模式: *://*.example.com/*\n正则表达式: /example\\.(net|org)/", + "check": "检测", + "check_failed": "验证失败", + "check_success": "验证成功", + "overwrite": "覆盖服务商搜索", + "overwrite_tooltip": "强制使用搜索服务商而不是大语言模型进行搜索", + "get_api_key": "点击这里获取密钥", + "no_provider_selected": "请选择搜索服务商后再检测", + "search_max_result": "搜索结果个数", + "search_provider": "搜索服务商", + "search_provider_placeholder": "选择一个搜索服务商", + "subscribe": "黑名单订阅", + "subscribe_update": "立即更新", + "subscribe_add": "添加订阅", + "subscribe_url": "订阅源地址", + "subscribe_name": "替代名字", + "subscribe_name.placeholder": "当下载的订阅源没有名称时所使用的替代名称", + "subscribe_add_success": "订阅源添加成功!", + "subscribe_delete": "删除订阅源", + "search_result_default": "默认", + "search_with_time": "搜索包含日期", + "tavily": { + "api_key": "Tavily API 密钥", + "api_key.placeholder": "请输入 Tavily API 密钥", + "description": "Tavily 是一个为 AI 代理量身定制的搜索引擎,提供实时、准确的结果、智能查询建议和深入的研究能力", + "title": "Tavily" + }, + "title": "网络搜索", + "apikey": "API 密钥", + "free": "免费", + "content_limit": "内容长度限制", + "content_limit_tooltip": "限制搜索结果的内容长度, 超过限制的内容将被截断", + "compression": { + "title": "搜索结果压缩", + "method": "压缩方法", + "method.none": "不压缩", + "method.cutoff": "截断", + "cutoff.limit": "截断长度", + "cutoff.limit.placeholder": "输入长度", + "cutoff.limit.tooltip": "限制搜索结果的内容长度, 超过限制的内容将被截断(例如 2000 字符)", + "cutoff.unit.char": "字符", + "cutoff.unit.token": "Token", + "method.rag": "RAG", + "rag.document_count": "文档数量", + "rag.document_count.default": "默认", + "rag.document_count.tooltip": "预期从单个搜索结果中提取的文档数量,实际提取的总数量是这个值乘以搜索结果数量。", + "rag.embedding_dimensions.auto_get": "自动获取维度", + "rag.embedding_dimensions.placeholder": "不设置维度", + "rag.embedding_dimensions.tooltip": "留空则不传递 dimensions 参数", + "info": { + "dimensions_auto_success": "维度自动获取成功,维度为 {{dimensions}}" + }, + "error": { + "embedding_model_required": "请先选择嵌入模型", + "dimensions_auto_failed": "维度自动获取失败", + "provider_not_found": "未找到服务商", + "rag_failed": "RAG 失败" + } + } + }, + "preprocessOrOcr.tooltip": "在设置 -> 工具中设置文档预处理服务商或OCR,文档预处理可以有效提升复杂格式文档与扫描版文档的检索效果,OCR仅可识别文档内图片或扫描版PDF的文本" + }, + "mineru.api_key": "MinerU现在提供每日500页的免费额度,您不需要填写密钥。" }, "translate": { "any.language": "任意语言", diff --git a/src/renderer/src/i18n/locales/zh-tw.json b/src/renderer/src/i18n/locales/zh-tw.json index 12addbba02..9e1a74d5e8 100644 --- a/src/renderer/src/i18n/locales/zh-tw.json +++ b/src/renderer/src/i18n/locales/zh-tw.json @@ -406,9 +406,11 @@ "prompt": "提示詞", "provider": "供應商", "regenerate": "重新生成", + "refresh": "重新整理", "rename": "重新命名", "reset": "重設", "save": "儲存", + "settings": "設定", "search": "搜尋", "select": "選擇", "selectedMessages": "選中 {{count}} 條訊息", @@ -423,7 +425,9 @@ "pinyin.asc": "按拼音升序", "pinyin.desc": "按拼音降序" }, - "no_results": "沒有結果" + "no_results": "沒有結果", + "enabled": "已啟用", + "disabled": "已停用" }, "docs": { "title": "說明文件" @@ -543,7 +547,11 @@ "rename": "重新命名", "search": "搜尋知識庫", "search_placeholder": "輸入查詢內容", - "settings": "知識庫設定", + "settings": { + "title": "知識庫設定", + "preprocessing": "預處理", + "preprocessing_tooltip": "預處理上傳的文件" + }, "sitemap_placeholder": "請輸入網站地圖 URL", "sitemaps": "網站", "source": "來源", @@ -567,12 +575,20 @@ "urls": "網址", "dimensions": "嵌入維度", "dimensions_size_tooltip": "嵌入維度大小,數值越大,嵌入維度越大,但消耗的 Token 也越多", - "dimensions_size_placeholder": "嵌入維度大小,例如 1024", + "status_embedding_completed": "嵌入完成", + "status_preprocess_completed": "預處理完成", + "status_embedding_failed": "嵌入失敗", + "status_preprocess_failed": "預處理失敗", + "dimensions_size_placeholder": " 嵌入維度大小,例如 1024", "dimensions_auto_set": "自動設定嵌入維度", "dimensions_error_invalid": "請輸入嵌入維度大小", "dimensions_size_too_large": "嵌入維度不能超過模型上下文限制({{max_context}})", "dimensions_set_right": "⚠️ 請確保模型支援所設置的嵌入維度大小", - "dimensions_default": "模型將使用預設嵌入維度" + "dimensions_default": "模型將使用預設嵌入維度", + "quota": "{{name}} 剩餘配額:{{quota}}", + "quota_infinity": "{{name}} 配額:無限制", + "name_required": "知識庫名稱為必填項目", + "embedding_model_required": "知識庫嵌入模型是必需的" }, "languages": { "arabic": "阿拉伯文", @@ -1808,79 +1824,99 @@ "theme.window.style.title": "視窗樣式", "theme.window.style.transparent": "透明視窗", "title": "設定", + "tool": { + "title": "工具設定", + "websearch": { + "blacklist": "黑名單", + "blacklist_description": "以下網站不會出現在搜尋結果中", + "blacklist_tooltip": "請使用以下格式 (換行符號分隔)\nexample.com\nhttps://www.example.com\nhttps://example.com\n*://*.example.com", + "check": "檢查", + "check_failed": "驗證失敗", + "check_success": "驗證成功", + "get_api_key": "點選這裡取得金鑰", + "no_provider_selected": "請選擇搜尋服務商後再檢查", + "search_max_result": "搜尋結果個數", + "search_provider": "搜尋服務商", + "search_provider_placeholder": "選擇一個搜尋服務商", + "search_result_default": "預設", + "search_with_time": "搜尋包含日期", + "tavily": { + "api_key": "Tavily API 金鑰", + "api_key.placeholder": "請輸入 Tavily API 金鑰", + "description": "Tavily 是一個為 AI 代理量身訂製的搜尋引擎,提供即時、準確的結果、智慧查詢建議和深入的研究能力", + "title": "Tavily" + }, + "title": "網路搜尋", + "subscribe": "黑名單訂閱", + "subscribe_update": "更新", + "subscribe_add": "新增訂閱", + "subscribe_url": "訂閱網址", + "subscribe_name": "替代名稱", + "subscribe_name.placeholder": "下載的訂閱源沒有名稱時使用的替代名稱。", + "subscribe_add_success": "訂閱源新增成功!", + "subscribe_delete": "刪除", + "overwrite": "覆蓋搜尋服務", + "overwrite_tooltip": "強制使用搜尋服務而不是 LLM", + "apikey": "API 金鑰", + "free": "免費", + "content_limit": "內容長度限制", + "content_limit_tooltip": "限制搜尋結果的內容長度;超過限制的內容將被截斷。", + "compression": { + "title": "搜尋結果壓縮", + "method": "壓縮方法", + "method.none": "不壓縮", + "method.cutoff": "截斷", + "cutoff.limit": "截斷長度", + "cutoff.limit.placeholder": "輸入長度", + "cutoff.limit.tooltip": "限制搜尋結果的內容長度,超過限制的內容將被截斷(例如 2000 字符)", + "cutoff.unit.char": "字符", + "cutoff.unit.token": "Token", + "method.rag": "RAG", + "rag.document_count": "文檔數量", + "rag.document_count.default": "預設", + "rag.document_count.tooltip": "預期從單個搜尋結果中提取的文檔數量,實際提取的總數量是這個值乘以搜尋結果數量。", + "rag.embedding_dimensions.auto_get": "自動獲取維度", + "rag.embedding_dimensions.placeholder": "不設置維度", + "rag.embedding_dimensions.tooltip": "留空則不傳遞 dimensions 參數", + "info": { + "dimensions_auto_success": "維度自動獲取成功,維度為 {{dimensions}}" + }, + "error": { + "embedding_model_required": "請先選擇嵌入模型", + "dimensions_auto_failed": "維度自動獲取失敗", + "provider_not_found": "未找到服務商", + "rag_failed": "RAG 失敗" + } + } + }, + "preprocess": { + "title": "前置處理", + "provider": "前置處理供應商", + "provider_placeholder": "選擇一個預處理供應商" + }, + "preprocessOrOcr.tooltip": "在「設定」->「工具」中設定文件預處理服務供應商或OCR。文件預處理可有效提升複雜格式文件及掃描文件的檢索效能,而OCR僅能辨識文件內圖片文字或掃描PDF文字。", + "ocr": { + "title": "光學字符識別", + "provider": "OCR 供應商", + "provider_placeholder": "選擇一個OCR服務提供商", + "mac_system_ocr_options": { + "mode": { + "title": "識別模式", + "accurate": "準確", + "fast": "快速" + }, + "min_confidence": "最小置信度" + } + } + }, + "topic.pin_to_top": "固定話題置頂", "topic.position": "話題位置", "topic.position.left": "左側", "topic.position.right": "右側", "topic.show.time": "顯示話題時間", - "topic.pin_to_top": "固定話題置頂", "tray.onclose": "關閉時最小化到系统匣", "tray.show": "顯示系统匣圖示", "tray.title": "系统匣", - "websearch": { - "check_success": "驗證成功", - "get_api_key": "點選這裡取得金鑰", - "search_with_time": "搜尋包含日期", - "tavily": { - "api_key": "Tavily API 金鑰", - "api_key.placeholder": "請輸入 Tavily API 金鑰", - "description": "Tavily 是一個為 AI 代理量身訂製的搜尋引擎,提供即時、準確的結果、智慧查詢建議和深入的研究能力", - "title": "Tavily" - }, - "blacklist": "黑名單", - "blacklist_description": "以下網站不會出現在搜索結果中", - "search_max_result": "搜尋結果個數", - "search_result_default": "預設", - "check": "檢查", - "search_provider": "搜尋服務商", - "search_provider_placeholder": "選擇一個搜尋服務商", - "no_provider_selected": "請選擇搜索服務商後再檢查", - "check_failed": "驗證失敗", - "blacklist_tooltip": "匹配模式: *://*.example.com/*\n正则表达式: /example\\.(net|org)/", - "subscribe": "黑名單訂閱", - "subscribe_update": "更新", - "subscribe_add": "添加訂閱", - "subscribe_url": "訂閱源地址", - "subscribe_name": "替代名稱", - "subscribe_name.placeholder": "當下載的訂閱源沒有名稱時所使用的替代名稱", - "subscribe_add_success": "訂閱源添加成功!", - "subscribe_delete": "刪除", - "title": "網路搜尋", - "overwrite": "覆蓋搜尋服務商", - "overwrite_tooltip": "強制使用搜尋服務商而不是大語言模型進行搜尋", - "apikey": "API 金鑰", - "free": "免費", - "compression": { - "title": "搜尋結果壓縮", - "method": "壓縮方法", - "method.none": "不壓縮", - "method.cutoff": "截斷", - "cutoff.limit": "截斷長度", - "cutoff.limit.placeholder": "輸入長度", - "cutoff.limit.tooltip": "限制搜尋結果的內容長度,超過限制的內容將被截斷(例如 2000 字符)", - "cutoff.unit.char": "字符", - "cutoff.unit.token": "Token", - "method.rag": "RAG", - "rag.document_count": "文檔數量", - "rag.document_count.default": "預設", - "rag.document_count.tooltip": "預期從單個搜尋結果中提取的文檔數量,實際提取的總數量是這個值乘以搜尋結果數量。", - "rag.embedding_dimensions.auto_get": "自動獲取維度", - "rag.embedding_dimensions.placeholder": "不設置維度", - "rag.embedding_dimensions.tooltip": "留空則不傳遞 dimensions 參數", - "info": { - "dimensions_auto_success": "維度自動獲取成功,維度為 {{dimensions}}" - }, - "error": { - "embedding_model_required": "請先選擇嵌入模型", - "dimensions_auto_failed": "維度自動獲取失敗", - "provider_not_found": "未找到服務商", - "rag_failed": "RAG 失敗" - } - }, - "subscribe_add_failed": "加入黑名單訂閱失敗", - "subscribe_update_success": "黑名單訂閱更新成功", - "subscribe_update_failed": "更新黑名單訂閱失敗", - "subscribe_source_update_failed": "更新黑名單訂閱來源失敗" - }, "general.auto_check_update.title": "自動更新", "general.test_plan.title": "測試計畫", "general.test_plan.tooltip": "參與測試計畫,體驗最新功能,但同時也帶來更多風險,請務必提前備份數據", @@ -1941,7 +1977,8 @@ "assistant": "助手訊息", "backup": "備份訊息", "knowledge_embed": "知識庫訊息" - } + }, + "mineru.api_key": "Mineru 現在每天提供 500 頁的免費配額,且無需輸入金鑰。" }, "translate": { "any.language": "任意語言", diff --git a/src/renderer/src/pages/files/ContentView.tsx b/src/renderer/src/pages/files/ContentView.tsx index 6630962921..a1c1f00aaa 100644 --- a/src/renderer/src/pages/files/ContentView.tsx +++ b/src/renderer/src/pages/files/ContentView.tsx @@ -1,5 +1,5 @@ import FileManager from '@renderer/services/FileManager' -import { FileType, FileTypes } from '@renderer/types' +import { FileMetadata, FileTypes } from '@renderer/types' import { formatFileSize } from '@renderer/utils' import { Col, Image, Row, Spin, Table } from 'antd' import React, { memo } from 'react' @@ -7,7 +7,7 @@ import styled from 'styled-components' interface ContentViewProps { id: FileTypes | 'all' | string - files?: FileType[] + files?: FileMetadata[] dataSource?: any[] columns: any[] } diff --git a/src/renderer/src/pages/files/FileList.tsx b/src/renderer/src/pages/files/FileList.tsx index a08de9912f..f34c2b5bdf 100644 --- a/src/renderer/src/pages/files/FileList.tsx +++ b/src/renderer/src/pages/files/FileList.tsx @@ -1,7 +1,7 @@ import { DeleteOutlined, ExclamationCircleOutlined } from '@ant-design/icons' import { handleDelete } from '@renderer/services/FileAction' import FileManager from '@renderer/services/FileManager' -import { FileType, FileTypes } from '@renderer/types' +import { FileMetadata, FileTypes } from '@renderer/types' import { formatFileSize } from '@renderer/utils' import { Col, Image, Row, Spin } from 'antd' import { t } from 'i18next' @@ -16,14 +16,14 @@ interface FileItemProps { list: { key: FileTypes | 'all' | string file: React.ReactNode - files?: FileType[] + files?: FileMetadata[] count?: number size: string ext: string created_at: string actions: React.ReactNode }[] - files?: FileType[] + files?: FileMetadata[] } const FileList: React.FC = ({ id, list, files }) => { diff --git a/src/renderer/src/pages/files/FilesPage.tsx b/src/renderer/src/pages/files/FilesPage.tsx index 2890a0cb85..550f8d551c 100644 --- a/src/renderer/src/pages/files/FilesPage.tsx +++ b/src/renderer/src/pages/files/FilesPage.tsx @@ -10,7 +10,7 @@ import ListItem from '@renderer/components/ListItem' import db from '@renderer/databases' import { handleDelete, handleRename, sortFiles, tempFilesSort } from '@renderer/services/FileAction' import FileManager from '@renderer/services/FileManager' -import { FileType, FileTypes } from '@renderer/types' +import { FileMetadata, FileTypes } from '@renderer/types' import { formatFileSize } from '@renderer/utils' import { Button, Empty, Flex, Popconfirm } from 'antd' import dayjs from 'dayjs' @@ -31,7 +31,7 @@ const FilesPage: FC = () => { const [sortField, setSortField] = useState('created_at') const [sortOrder, setSortOrder] = useState('desc') - const files = useLiveQuery(() => { + const files = useLiveQuery(() => { if (fileType === 'all') { return db.files.orderBy('count').toArray().then(tempFilesSort) } diff --git a/src/renderer/src/pages/home/Inputbar/AttachmentPreview.tsx b/src/renderer/src/pages/home/Inputbar/AttachmentPreview.tsx index 4f69ca7815..b2004994df 100644 --- a/src/renderer/src/pages/home/Inputbar/AttachmentPreview.tsx +++ b/src/renderer/src/pages/home/Inputbar/AttachmentPreview.tsx @@ -14,7 +14,7 @@ import { } from '@ant-design/icons' import CustomTag from '@renderer/components/CustomTag' import FileManager from '@renderer/services/FileManager' -import { FileType } from '@renderer/types' +import { FileMetadata } from '@renderer/types' import { formatFileSize } from '@renderer/utils' import { Flex, Image, Tooltip } from 'antd' import { isEmpty } from 'lodash' @@ -22,8 +22,8 @@ import { FC, useState } from 'react' import styled from 'styled-components' interface Props { - files: FileType[] - setFiles: (files: FileType[]) => void + files: FileMetadata[] + setFiles: (files: FileMetadata[]) => void } const MAX_FILENAME_DISPLAY_LENGTH = 20 @@ -80,7 +80,7 @@ export const getFileIcon = (type?: string) => { return } -export const FileNameRender: FC<{ file: FileType }> = ({ file }) => { +export const FileNameRender: FC<{ file: FileMetadata }> = ({ file }) => { const [visible, setVisible] = useState(false) const isImage = (ext: string) => { return ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp'].includes(ext) diff --git a/src/renderer/src/pages/home/Inputbar/WebSearchButton.tsx b/src/renderer/src/pages/home/Inputbar/WebSearchButton.tsx index 906c7aa5aa..ec18054047 100644 --- a/src/renderer/src/pages/home/Inputbar/WebSearchButton.tsx +++ b/src/renderer/src/pages/home/Inputbar/WebSearchButton.tsx @@ -55,8 +55,8 @@ const WebSearchButton: FC = ({ ref, assistant, ToolbarButton }) => { label: p.name, description: WebSearchService.isWebSearchEnabled(p.id) ? hasObjectKey(p, 'apiKey') - ? t('settings.websearch.apikey') - : t('settings.websearch.free') + ? t('settings.tool.websearch.apikey') + : t('settings.tool.websearch.free') : t('chat.input.web_search.enable_content'), icon: , isSelected: p.id === assistant?.webSearchProviderId, @@ -81,7 +81,7 @@ const WebSearchButton: FC = ({ ref, assistant, ToolbarButton }) => { items.push({ label: t('chat.input.web_search.settings'), icon: , - action: () => navigate('/settings/web-search') + action: () => navigate('/settings/tool/websearch') }) items.unshift({ diff --git a/src/renderer/src/pages/home/Messages/CitationsList.tsx b/src/renderer/src/pages/home/Messages/CitationsList.tsx index 60b2959b4b..5a61fecb72 100644 --- a/src/renderer/src/pages/home/Messages/CitationsList.tsx +++ b/src/renderer/src/pages/home/Messages/CitationsList.tsx @@ -182,7 +182,8 @@ const KnowledgeCitation: React.FC<{ citation: Citation }> = ({ citation }) => { {citation.showFavicon && } handleLinkClick(citation.url, e)}> - {citation.title} + {/* example title: User/path/example.pdf */} + {citation.title?.split('/').pop()} {citation.number} {citation.content && } diff --git a/src/renderer/src/pages/home/Messages/MessageAttachments.tsx b/src/renderer/src/pages/home/Messages/MessageAttachments.tsx index 0e17221423..e13e56e6ca 100644 --- a/src/renderer/src/pages/home/Messages/MessageAttachments.tsx +++ b/src/renderer/src/pages/home/Messages/MessageAttachments.tsx @@ -20,7 +20,7 @@ const StyledUpload = styled(Upload)` ` const MessageAttachments: FC = ({ block }) => { - // const handleCopyImage = async (image: FileType) => { + // const handleCopyImage = async (image: FileMetadata) => { // const data = await FileManager.readFile(image) // const blob = new Blob([data], { type: 'image/png' }) // const item = new ClipboardItem({ [blob.type]: blob }) diff --git a/src/renderer/src/pages/home/Messages/MessageEditor.tsx b/src/renderer/src/pages/home/Messages/MessageEditor.tsx index dc90ebe517..8639855bce 100644 --- a/src/renderer/src/pages/home/Messages/MessageEditor.tsx +++ b/src/renderer/src/pages/home/Messages/MessageEditor.tsx @@ -7,7 +7,7 @@ import FileManager from '@renderer/services/FileManager' import PasteService from '@renderer/services/PasteService' import { useAppSelector } from '@renderer/store' import { selectMessagesForTopic } from '@renderer/store/newMessage' -import { FileType, FileTypes } from '@renderer/types' +import { FileMetadata, FileTypes } from '@renderer/types' import { Message, MessageBlock, MessageBlockStatus, MessageBlockType } from '@renderer/types/newMessage' import { classNames, getFileExtension } from '@renderer/utils' import { getFilesFromDropEvent, isSendMessageKeyPressed } from '@renderer/utils/input' @@ -36,7 +36,7 @@ interface Props { const MessageBlockEditor: FC = ({ message, topicId, onSave, onResend, onCancel }) => { const allBlocks = findAllBlocks(message) const [editedBlocks, setEditedBlocks] = useState(allBlocks) - const [files, setFiles] = useState([]) + const [files, setFiles] = useState([]) const [isProcessing, setIsProcessing] = useState(false) const [isFileDragging, setIsFileDragging] = useState(false) const { assistant } = useAssistant(message.assistantId) diff --git a/src/renderer/src/pages/knowledge/KnowledgeContent.tsx b/src/renderer/src/pages/knowledge/KnowledgeContent.tsx index 71e0125c8d..4d4e702d4e 100644 --- a/src/renderer/src/pages/knowledge/KnowledgeContent.tsx +++ b/src/renderer/src/pages/knowledge/KnowledgeContent.tsx @@ -7,12 +7,13 @@ import { getProviderName } from '@renderer/services/ProviderService' import { KnowledgeBase } from '@renderer/types' import { Button, Empty, Tabs, Tag, Tooltip } from 'antd' import { Book, Folder, Globe, Link, Notebook, Search, Settings } from 'lucide-react' -import { FC, useState } from 'react' +import { FC, useEffect, useState } from 'react' import { useTranslation } from 'react-i18next' import styled from 'styled-components' import KnowledgeSearchPopup from './components/KnowledgeSearchPopup' -import KnowledgeSettingsPopup from './components/KnowledgeSettingsPopup' +import KnowledgeSettings from './components/KnowledgeSettings' +import QuotaTag from './components/QuotaTag' import KnowledgeDirectories from './items/KnowledgeDirectories' import KnowledgeFiles from './items/KnowledgeFiles' import KnowledgeNotes from './items/KnowledgeNotes' @@ -27,16 +28,46 @@ const KnowledgeContent: FC = ({ selectedBase }) => { const { t } = useTranslation() const { base, urlItems, fileItems, directoryItems, noteItems, sitemapItems } = useKnowledge(selectedBase.id || '') const [activeKey, setActiveKey] = useState('files') + const [quota, setQuota] = useState(undefined) + const [progressMap, setProgressMap] = useState>(new Map()) + const [preprocessMap, setPreprocessMap] = useState>(new Map()) const providerName = getProviderName(base?.model.provider || '') + useEffect(() => { + const handlers = [ + window.electron.ipcRenderer.on('file-preprocess-finished', (_, { itemId, quota }) => { + setPreprocessMap((prev) => new Map(prev).set(itemId, true)) + if (quota) { + setQuota(quota) + } + }), + + window.electron.ipcRenderer.on('file-preprocess-progress', (_, { itemId, progress }) => { + setProgressMap((prev) => new Map(prev).set(itemId, progress)) + }), + + window.electron.ipcRenderer.on('file-ocr-progress', (_, { itemId, progress }) => { + setProgressMap((prev) => new Map(prev).set(itemId, progress)) + }), + + window.electron.ipcRenderer.on('directory-processing-percent', (_, { itemId, percent }) => { + console.log('[Progress] Directory:', itemId, percent) + setProgressMap((prev) => new Map(prev).set(itemId, percent)) + }) + ] + + return () => { + handlers.forEach((cleanup) => cleanup()) + } + }, []) const knowledgeItems = [ { key: 'files', title: t('files.title'), icon: activeKey === 'files' ? : , items: fileItems, - content: + content: }, { key: 'notes', @@ -50,7 +81,7 @@ const KnowledgeContent: FC = ({ selectedBase }) => { title: t('knowledge.directories'), icon: activeKey === 'directories' ? : , items: directoryItems, - content: + content: }, { key: 'urls', @@ -93,7 +124,7 @@ const KnowledgeContent: FC = ({ selectedBase }) => { - -
- maxContext) { - return Promise.reject(new Error(t('knowledge.chunk_size_too_large', { max_context: maxContext }))) - } - return Promise.resolve() - } - } - ]}> - - - ({ - validator(_, value) { - if (!value || getFieldValue('chunkSize') > value) { - return Promise.resolve() - } - return Promise.reject(new Error(t('message.error.chunk_overlap_too_large'))) - } - }) - ]} - dependencies={['chunkSize']}> - - - - 1 || value < 0)) { - return Promise.reject(new Error(t('knowledge.threshold_too_large_or_small'))) - } - return Promise.resolve() - } - } - ]}> - - - - } - /> -
- - - ) -} - -const TopViewKey = 'KnowledgeSettingsPopup' - -export default class KnowledgeSettingsPopup { - static hide() { - TopView.hide(TopViewKey) - } - - static show(props: ShowParams) { - return new Promise((resolve) => { - TopView.show( - { - resolve(v) - TopView.hide(TopViewKey) - }} - />, - TopViewKey - ) - }) - } -} diff --git a/src/renderer/src/pages/knowledge/components/QuotaTag.tsx b/src/renderer/src/pages/knowledge/components/QuotaTag.tsx new file mode 100644 index 0000000000..6db268cc6e --- /dev/null +++ b/src/renderer/src/pages/knowledge/components/QuotaTag.tsx @@ -0,0 +1,66 @@ +import { usePreprocessProvider } from '@renderer/hooks/usePreprocess' +import { getStoreSetting } from '@renderer/hooks/useSettings' +import { getKnowledgeBaseParams } from '@renderer/services/KnowledgeService' +import { KnowledgeBase } from '@renderer/types' +import { Tag } from 'antd' +import { FC, useEffect, useState } from 'react' +import { useTranslation } from 'react-i18next' + +const QuotaTag: FC<{ base: KnowledgeBase; providerId: string; quota?: number }> = ({ + base, + providerId, + quota: _quota +}) => { + const { t } = useTranslation() + const { provider, updatePreprocessProvider } = usePreprocessProvider(providerId) + const [quota, setQuota] = useState(_quota) + + useEffect(() => { + const checkQuota = async () => { + if (provider.id !== 'mineru') return + // 使用用户的key时quota为无限 + if (provider.apiKey) { + setQuota(-9999) + updatePreprocessProvider({ ...provider, quota: -9999 }) + return + } + if (quota === undefined) { + const userId = getStoreSetting('userId') + const baseParams = getKnowledgeBaseParams(base) + try { + const response = await window.api.knowledgeBase.checkQuota({ + base: baseParams, + userId: userId as string + }) + setQuota(response) + } catch (error) { + console.error('[KnowledgeContent] Error checking quota:', error) + } + } + } + if (_quota) { + updatePreprocessProvider({ ...provider, quota: _quota }) + return + } + checkQuota() + }, [_quota, base, provider, quota, updatePreprocessProvider]) + + return ( + <> + {quota && ( + + {quota === -9999 + ? t('knowledge.quota_infinity', { + name: provider.name + }) + : t('knowledge.quota', { + name: provider.name, + quota: quota + })} + + )} + + ) +} + +export default QuotaTag diff --git a/src/renderer/src/pages/knowledge/components/StatusIcon.tsx b/src/renderer/src/pages/knowledge/components/StatusIcon.tsx index 5bf98f5a35..69435d4e14 100644 --- a/src/renderer/src/pages/knowledge/components/StatusIcon.tsx +++ b/src/renderer/src/pages/knowledge/components/StatusIcon.tsx @@ -1,7 +1,7 @@ import { CheckCircleOutlined, CloseCircleOutlined } from '@ant-design/icons' import { KnowledgeBase, ProcessingStatus } from '@renderer/types' import { Progress, Tooltip } from 'antd' -import { FC } from 'react' +import React, { FC, useMemo } from 'react' import { useTranslation } from 'react-i18next' import styled from 'styled-components' @@ -9,64 +9,83 @@ interface StatusIconProps { sourceId: string base: KnowledgeBase getProcessingStatus: (sourceId: string) => ProcessingStatus | undefined - getProcessingPercent?: (sourceId: string) => number | undefined type: string + progress?: number + isPreprocessed?: boolean } -const StatusIcon: FC = ({ sourceId, base, getProcessingStatus, getProcessingPercent, type }) => { +const StatusIcon: FC = ({ + sourceId, + base, + getProcessingStatus, + type, + progress = 0, + isPreprocessed +}) => { const { t } = useTranslation() const status = getProcessingStatus(sourceId) - const percent = getProcessingPercent?.(sourceId) const item = base.items.find((item) => item.id === sourceId) const errorText = item?.processingError + console.log('[StatusIcon] Rendering for item:', item?.id, 'Status:', status, 'Progress:', progress) - if (!status) { - if (item?.uniqueId) { + const statusDisplay = useMemo(() => { + if (!status) { + if (item?.uniqueId) { + if (isPreprocessed && item.type === 'file') { + return ( + + + + ) + } + return ( + + + + ) + } return ( - - + + ) } - return ( - - - - ) - } - switch (status) { - case 'pending': - return ( - - - - ) + switch (status) { + case 'pending': + return ( + + + + ) - case 'processing': { - return type === 'directory' ? ( - - ) : ( - - - - ) + case 'processing': { + return type === 'directory' || type === 'file' ? ( + + ) : ( + + + + ) + } + case 'completed': + return ( + + + + ) + case 'failed': + return ( + + + + ) + default: + return null } - case 'completed': - return ( - - - - ) - case 'failed': - return ( - - - - ) - default: - return null - } + }, [status, item?.uniqueId, type, progress, errorText, t]) + + return statusDisplay } const StatusDot = styled.div<{ $status: 'pending' | 'processing' | 'new' }>` @@ -91,4 +110,14 @@ const StatusDot = styled.div<{ $status: 'pending' | 'processing' | 'new' }>` } ` -export default StatusIcon +export default React.memo(StatusIcon, (prevProps, nextProps) => { + return ( + prevProps.sourceId === nextProps.sourceId && + prevProps.type === nextProps.type && + prevProps.base.id === nextProps.base.id && + prevProps.progress === nextProps.progress && + prevProps.getProcessingStatus(prevProps.sourceId) === nextProps.getProcessingStatus(nextProps.sourceId) && + prevProps.base.items.find((item) => item.id === prevProps.sourceId)?.processingError === + nextProps.base.items.find((item) => item.id === nextProps.sourceId)?.processingError + ) +}) diff --git a/src/renderer/src/pages/knowledge/items/KnowledgeDirectories.tsx b/src/renderer/src/pages/knowledge/items/KnowledgeDirectories.tsx index 27829edb61..3cdd480c04 100644 --- a/src/renderer/src/pages/knowledge/items/KnowledgeDirectories.tsx +++ b/src/renderer/src/pages/knowledge/items/KnowledgeDirectories.tsx @@ -26,6 +26,7 @@ import { interface KnowledgeContentProps { selectedBase: KnowledgeBase + progressMap: Map } const getDisplayTime = (item: KnowledgeItem) => { @@ -33,18 +34,12 @@ const getDisplayTime = (item: KnowledgeItem) => { return dayjs(timestamp).format('MM-DD HH:mm') } -const KnowledgeDirectories: FC = ({ selectedBase }) => { +const KnowledgeDirectories: FC = ({ selectedBase, progressMap }) => { const { t } = useTranslation() - const { - base, - directoryItems, - refreshItem, - removeItem, - getProcessingStatus, - getDirectoryProcessingPercent, - addDirectory - } = useKnowledge(selectedBase.id || '') + const { base, directoryItems, refreshItem, removeItem, getProcessingStatus, addDirectory } = useKnowledge( + selectedBase.id || '' + ) const providerName = getProviderName(base?.model.provider || '') const disabled = !base?.version || !providerName @@ -53,8 +48,6 @@ const KnowledgeDirectories: FC = ({ selectedBase }) => { return null } - const getProgressingPercentForItem = (itemId: string) => getDirectoryProcessingPercent(itemId) - const handleAddDirectory = async () => { if (disabled) { return @@ -102,7 +95,7 @@ const KnowledgeDirectories: FC = ({ selectedBase }) => { sourceId={item.id} base={base} getProcessingStatus={getProcessingStatus} - getProcessingPercent={getProgressingPercentForItem} + progress={progressMap.get(item.id)} type="directory" /> diff --git a/src/renderer/src/pages/knowledge/items/KnowledgeFiles.tsx b/src/renderer/src/pages/knowledge/items/KnowledgeFiles.tsx index 074d5972a2..0bcbe5184c 100644 --- a/src/renderer/src/pages/knowledge/items/KnowledgeFiles.tsx +++ b/src/renderer/src/pages/knowledge/items/KnowledgeFiles.tsx @@ -5,8 +5,8 @@ import FileItem from '@renderer/pages/files/FileItem' import StatusIcon from '@renderer/pages/knowledge/components/StatusIcon' import FileManager from '@renderer/services/FileManager' import { getProviderName } from '@renderer/services/ProviderService' -import { FileType, FileTypes, KnowledgeBase, KnowledgeItem } from '@renderer/types' -import { formatFileSize } from '@renderer/utils' +import { FileMetadata, FileType, FileTypes, KnowledgeBase, KnowledgeItem } from '@renderer/types' +import { formatFileSize, uuid } from '@renderer/utils' import { bookExts, documentExts, textExts, thirdPartyApplicationExts } from '@shared/config/constant' import { Button, Tooltip, Upload } from 'antd' import dayjs from 'dayjs' @@ -30,6 +30,8 @@ const { Dragger } = Upload interface KnowledgeContentProps { selectedBase: KnowledgeBase + progressMap: Map + preprocessMap: Map } const fileTypes = [...bookExts, ...thirdPartyApplicationExts, ...documentExts, ...textExts] @@ -39,7 +41,7 @@ const getDisplayTime = (item: KnowledgeItem) => { return dayjs(timestamp).format('MM-DD HH:mm') } -const KnowledgeFiles: FC = ({ selectedBase }) => { +const KnowledgeFiles: FC = ({ selectedBase, progressMap, preprocessMap }) => { const { t } = useTranslation() const [windowHeight, setWindowHeight] = useState(window.innerHeight) @@ -82,26 +84,49 @@ const KnowledgeFiles: FC = ({ selectedBase }) => { if (disabled) { return } - if (files) { - const _files: FileType[] = files - .map((file) => ({ - id: file.name, - name: file.name, - path: window.api.file.getPathForFile(file), - size: file.size, - ext: `.${file.name.split('.').pop()}`.toLowerCase(), - count: 1, - origin_name: file.name, - type: file.type as FileTypes, - created_at: new Date().toISOString() - })) + const _files: FileMetadata[] = files + .map((file) => { + // 这个路径 filePath 很可能是在文件选择时的原始路径。 + const filePath = window.api.file.getPathForFile(file) + let nameFromPath = filePath + const lastSlash = filePath.lastIndexOf('/') + const lastBackslash = filePath.lastIndexOf('\\') + if (lastSlash !== -1 || lastBackslash !== -1) { + nameFromPath = filePath.substring(Math.max(lastSlash, lastBackslash) + 1) + } + + // 从派生的文件名中获取扩展名 + const extFromPath = nameFromPath.includes('.') ? `.${nameFromPath.split('.').pop()}` : '' + + return { + id: uuid(), + name: nameFromPath, // 使用从路径派生的文件名 + path: filePath, + size: file.size, + ext: extFromPath.toLowerCase(), + count: 1, + origin_name: file.name, // 保存 File 对象中原始的文件名 + type: file.type as FileTypes, + created_at: new Date().toISOString() + } + }) .filter(({ ext }) => fileTypes.includes(ext)) - const uploadedFiles = await FileManager.uploadFiles(_files) - addFiles(uploadedFiles) + // const uploadedFiles = await FileManager.uploadFiles(_files) + addFiles(_files) } } + const showPreprocessIcon = (item: KnowledgeItem) => { + if (base.preprocessOrOcrProvider && item.isPreprocessed !== false) { + return true + } + if (!base.preprocessOrOcrProvider && item.isPreprocessed === true) { + return true + } + return false + } + return ( @@ -161,6 +186,18 @@ const KnowledgeFiles: FC = ({ selectedBase }) => { {item.uniqueId && ( diff --git a/src/renderer/src/pages/settings/WebSearchSettings/BasicSettings.tsx b/src/renderer/src/pages/settings/ToolSettings/WebSearchSettings/BasicSettings.tsx similarity index 78% rename from src/renderer/src/pages/settings/WebSearchSettings/BasicSettings.tsx rename to src/renderer/src/pages/settings/ToolSettings/WebSearchSettings/BasicSettings.tsx index f891e2aee1..967a4e1ae0 100644 --- a/src/renderer/src/pages/settings/WebSearchSettings/BasicSettings.tsx +++ b/src/renderer/src/pages/settings/ToolSettings/WebSearchSettings/BasicSettings.tsx @@ -6,7 +6,7 @@ import { Slider, Switch } from 'antd' import { t } from 'i18next' import { FC } from 'react' -import { SettingDivider, SettingGroup, SettingRow, SettingRowTitle, SettingTitle } from '..' +import { SettingDivider, SettingGroup, SettingRow, SettingRowTitle, SettingTitle } from '../..' const BasicSettings: FC = () => { const { theme } = useTheme() @@ -20,19 +20,19 @@ const BasicSettings: FC = () => { {t('settings.general.title')} - {t('settings.websearch.search_with_time')} + {t('settings.tool.websearch.search_with_time')} dispatch(setSearchWithTime(checked))} /> - {t('settings.websearch.search_max_result')} + {t('settings.tool.websearch.search_max_result')} dispatch(setMaxResult(value))} /> diff --git a/src/renderer/src/pages/settings/WebSearchSettings/BlacklistSettings.tsx b/src/renderer/src/pages/settings/ToolSettings/WebSearchSettings/BlacklistSettings.tsx similarity index 88% rename from src/renderer/src/pages/settings/WebSearchSettings/BlacklistSettings.tsx rename to src/renderer/src/pages/settings/ToolSettings/WebSearchSettings/BlacklistSettings.tsx index a4a062e463..00ffe3e34a 100644 --- a/src/renderer/src/pages/settings/WebSearchSettings/BlacklistSettings.tsx +++ b/src/renderer/src/pages/settings/ToolSettings/WebSearchSettings/BlacklistSettings.tsx @@ -10,7 +10,7 @@ import TextArea from 'antd/es/input/TextArea' import { t } from 'i18next' import { FC, useEffect, useState } from 'react' -import { SettingDivider, SettingGroup, SettingRow, SettingRowTitle, SettingTitle } from '..' +import { SettingDivider, SettingGroup, SettingRow, SettingRowTitle, SettingTitle } from '../..' import AddSubscribePopup from './AddSubscribePopup' type TableRowSelection = TableProps['rowSelection'] @@ -131,7 +131,7 @@ const BlacklistSettings: FC = () => { console.error(`Error updating subscribe source ${source.url}:`, error) // 显示具体源更新失败的消息 window.message.warning({ - content: t('settings.websearch.subscribe_source_update_failed', { url: source.url }), + content: t('settings.tool.websearch.subscribe_source_update_failed', { url: source.url }), duration: 3 }) } @@ -143,7 +143,7 @@ const BlacklistSettings: FC = () => { setSubscribeValid(true) // 显示成功消息 window.message.success({ - content: t('settings.websearch.subscribe_update_success'), + content: t('settings.tool.websearch.subscribe_update_success'), duration: 2 }) setTimeout(() => setSubscribeValid(false), 3000) @@ -154,7 +154,7 @@ const BlacklistSettings: FC = () => { } catch (error) { console.error('Error updating subscribes:', error) window.message.error({ - content: t('settings.websearch.subscribe_update_failed'), + content: t('settings.tool.websearch.subscribe_update_failed'), duration: 2 }) } @@ -165,7 +165,7 @@ const BlacklistSettings: FC = () => { async function handleAddSubscribe() { setSubscribeChecking(true) const result = await AddSubscribePopup.show({ - title: t('settings.websearch.subscribe_add') + title: t('settings.tool.websearch.subscribe_add') }) if (result && result.url) { @@ -185,14 +185,14 @@ const BlacklistSettings: FC = () => { setSubscribeValid(true) // 显示成功消息 window.message.success({ - content: t('settings.websearch.subscribe_add_success'), + content: t('settings.tool.websearch.subscribe_add_success'), duration: 2 }) setTimeout(() => setSubscribeValid(false), 3000) } catch (error) { setSubscribeValid(false) window.message.error({ - content: t('settings.websearch.subscribe_add_failed'), + content: t('settings.tool.websearch.subscribe_add_failed'), duration: 2 }) } @@ -218,32 +218,32 @@ const BlacklistSettings: FC = () => { return ( <> - {t('settings.websearch.blacklist')} + {t('settings.tool.websearch.blacklist')} - {t('settings.websearch.blacklist_description')} + {t('settings.tool.websearch.blacklist_description')}