diff --git a/packages/shared/IpcChannel.ts b/packages/shared/IpcChannel.ts index 8a0f266579..403c20aaf8 100644 --- a/packages/shared/IpcChannel.ts +++ b/packages/shared/IpcChannel.ts @@ -118,6 +118,7 @@ export enum IpcChannel { File_Copy = 'file:copy', File_BinaryImage = 'file:binaryImage', File_Base64File = 'file:base64File', + File_GetPdfInfo = 'file:getPdfInfo', Fs_Read = 'fs:read', Export_Word = 'export:word', diff --git a/src/main/ipc.ts b/src/main/ipc.ts index 466c5f35a8..6e7ba9b2b2 100644 --- a/src/main/ipc.ts +++ b/src/main/ipc.ts @@ -226,6 +226,7 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) { ipcMain.handle(IpcChannel.File_Base64Image, fileManager.base64Image) ipcMain.handle(IpcChannel.File_SaveBase64Image, fileManager.saveBase64Image) ipcMain.handle(IpcChannel.File_Base64File, fileManager.base64File) + ipcMain.handle(IpcChannel.File_GetPdfInfo, fileManager.pdfPageCount) ipcMain.handle(IpcChannel.File_Download, fileManager.downloadFile) ipcMain.handle(IpcChannel.File_Copy, fileManager.copyFile) ipcMain.handle(IpcChannel.File_BinaryImage, fileManager.binaryImage) diff --git a/src/main/services/FileStorage.ts b/src/main/services/FileStorage.ts index 0ea36abc09..2ac689b8cc 100644 --- a/src/main/services/FileStorage.ts +++ b/src/main/services/FileStorage.ts @@ -15,6 +15,7 @@ import * as fs from 'fs' import { writeFileSync } from 'fs' import { readFile } from 'fs/promises' import officeParser from 'officeparser' +import { getDocument } from 'officeparser/pdfjs-dist-build/pdf.js' import * as path from 'path' import { chdir } from 'process' import { v4 as uuidv4 } from 'uuid' @@ -321,6 +322,16 @@ class FileStorage { return { data: base64, mime } } + public pdfPageCount = async (_: Electron.IpcMainInvokeEvent, id: string): Promise => { + const filePath = path.join(this.storageDir, id) + const buffer = await fs.promises.readFile(filePath) + + const doc = await getDocument({ data: buffer }).promise + const pages = doc.numPages + await doc.destroy() + return pages + } + public binaryImage = async (_: Electron.IpcMainInvokeEvent, id: string): Promise<{ data: Buffer; mime: string }> => { const filePath = path.join(this.storageDir, id) const data = await fs.promises.readFile(filePath) diff --git a/src/preload/index.ts b/src/preload/index.ts index 59496df4b8..54ae53cef7 100644 --- a/src/preload/index.ts +++ b/src/preload/index.ts @@ -83,6 +83,7 @@ const api = { copy: (fileId: string, destPath: string) => ipcRenderer.invoke(IpcChannel.File_Copy, fileId, destPath), binaryImage: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_BinaryImage, fileId), base64File: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_Base64File, fileId), + pdfInfo: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_GetPdfInfo, fileId), getPathForFile: (file: File) => webUtils.getPathForFile(file) }, fs: { diff --git a/src/renderer/src/aiCore/clients/openai/OpenAIResponseAPIClient.ts b/src/renderer/src/aiCore/clients/openai/OpenAIResponseAPIClient.ts index a0f4d8077d..958fde9ec3 100644 --- a/src/renderer/src/aiCore/clients/openai/OpenAIResponseAPIClient.ts +++ b/src/renderer/src/aiCore/clients/openai/OpenAIResponseAPIClient.ts @@ -6,6 +6,7 @@ import { } from '@renderer/config/models' import { estimateTextTokens } from '@renderer/services/TokenService' import { + FileType, FileTypes, MCPCallToolResponse, MCPTool, @@ -34,6 +35,7 @@ import { } from '@renderer/utils/mcp-tools' import { findFileBlocks, findImageBlocks } from '@renderer/utils/messageUtils/find' import { buildSystemPrompt } from '@renderer/utils/prompt' +import { MB } from '@shared/config/constant' import { isEmpty } from 'lodash' import OpenAI from 'openai' @@ -90,6 +92,23 @@ export class OpenAIResponseAPIClient extends OpenAIBaseClient< return await sdk.responses.create(payload, options) } + private async handlePdfFile(file: FileType): Promise { + if (file.size > 32 * MB) return undefined + try { + const pageCount = await window.api.file.pdfInfo(file.id + file.ext) + if (pageCount > 100) return undefined + } catch { + return undefined + } + + const { data } = await window.api.file.base64File(file.id + file.ext) + return { + type: 'input_file', + filename: file.origin_name, + file_data: `data:application/pdf;base64,${data}` + } as OpenAI.Responses.ResponseInputFile + } + public async convertMessageToSdkParam(message: Message, model: Model): Promise { const isVision = isVisionModel(model) const content = await this.getMessageContent(message) @@ -141,6 +160,14 @@ export class OpenAIResponseAPIClient extends OpenAIBaseClient< const file = fileBlock.file if (!file) continue + if (isVision && file.ext === '.pdf') { + const pdfPart = await this.handlePdfFile(file) + if (pdfPart) { + parts.push(pdfPart) + continue + } + } + if ([FileTypes.TEXT, FileTypes.DOCUMENT].includes(file.type)) { const fileContent = (await window.api.file.read(file.id + file.ext)).trim() parts.push({