feat: Add PDF file support for OpenAI vision models (#7217)

* feat: add base64 PDF support for OpenAI vision models Signed-off-by: MurphyLo <1335758958@qq.com> * sort imports in OpenAIResponseAPIClient.ts * sort imports in OpenAIResponseAPIClient.ts * remove pdf-parse * modify pdfPageCount implementation to use officeparser built-in pdf.js * chore: update yarn.lock to remove pdf-parse dependency --------- Signed-off-by: MurphyLo <1335758958@qq.com> Co-authored-by: suyao <sy20010504@gmail.com>
2025-12-25 19:30:17 +08:00 · 2025-06-16 11:09:51 +08:00 · 2025-06-16 11:09:51 +08:00 · 00e395f252
commit 00e395f252
parent b6b1b43094
5 changed files with 41 additions and 0 deletions
--- a/packages/shared/IpcChannel.ts
+++ b/packages/shared/IpcChannel.ts
@ -118,6 +118,7 @@ export enum IpcChannel {
  File_Copy = 'file:copy',
  File_BinaryImage = 'file:binaryImage',
  File_Base64File = 'file:base64File',
+  File_GetPdfInfo = 'file:getPdfInfo',
  Fs_Read = 'fs:read',

  Export_Word = 'export:word',
--- a/src/main/ipc.ts
+++ b/src/main/ipc.ts
@ -226,6 +226,7 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) {
  ipcMain.handle(IpcChannel.File_Base64Image, fileManager.base64Image)
  ipcMain.handle(IpcChannel.File_SaveBase64Image, fileManager.saveBase64Image)
  ipcMain.handle(IpcChannel.File_Base64File, fileManager.base64File)
+  ipcMain.handle(IpcChannel.File_GetPdfInfo, fileManager.pdfPageCount)
  ipcMain.handle(IpcChannel.File_Download, fileManager.downloadFile)
  ipcMain.handle(IpcChannel.File_Copy, fileManager.copyFile)
  ipcMain.handle(IpcChannel.File_BinaryImage, fileManager.binaryImage)
--- a/src/main/services/FileStorage.ts
+++ b/src/main/services/FileStorage.ts
@ -15,6 +15,7 @@ import * as fs from 'fs'
 import { writeFileSync } from 'fs'
 import { readFile } from 'fs/promises'
 import officeParser from 'officeparser'
+import { getDocument } from 'officeparser/pdfjs-dist-build/pdf.js'
 import * as path from 'path'
 import { chdir } from 'process'
 import { v4 as uuidv4 } from 'uuid'
@ -321,6 +322,16 @@ class FileStorage {
    return { data: base64, mime }
  }

+  public pdfPageCount = async (_: Electron.IpcMainInvokeEvent, id: string): Promise<number> => {
+    const filePath = path.join(this.storageDir, id)
+    const buffer = await fs.promises.readFile(filePath)
+
+    const doc = await getDocument({ data: buffer }).promise
+    const pages = doc.numPages
+    await doc.destroy()
+    return pages
+  }
+
  public binaryImage = async (_: Electron.IpcMainInvokeEvent, id: string): Promise<{ data: Buffer; mime: string }> => {
    const filePath = path.join(this.storageDir, id)
    const data = await fs.promises.readFile(filePath)
--- a/src/preload/index.ts
+++ b/src/preload/index.ts
@ -83,6 +83,7 @@ const api = {
    copy: (fileId: string, destPath: string) => ipcRenderer.invoke(IpcChannel.File_Copy, fileId, destPath),
    binaryImage: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_BinaryImage, fileId),
    base64File: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_Base64File, fileId),
+    pdfInfo: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_GetPdfInfo, fileId),
    getPathForFile: (file: File) => webUtils.getPathForFile(file)
  },
  fs: {
--- a/src/renderer/src/aiCore/clients/openai/OpenAIResponseAPIClient.ts
+++ b/src/renderer/src/aiCore/clients/openai/OpenAIResponseAPIClient.ts
@ -6,6 +6,7 @@ import {
 } from '@renderer/config/models'
 import { estimateTextTokens } from '@renderer/services/TokenService'
 import {
+  FileType,
  FileTypes,
  MCPCallToolResponse,
  MCPTool,
@ -34,6 +35,7 @@ import {
 } from '@renderer/utils/mcp-tools'
 import { findFileBlocks, findImageBlocks } from '@renderer/utils/messageUtils/find'
 import { buildSystemPrompt } from '@renderer/utils/prompt'
+import { MB } from '@shared/config/constant'
 import { isEmpty } from 'lodash'
 import OpenAI from 'openai'

@ -90,6 +92,23 @@ export class OpenAIResponseAPIClient extends OpenAIBaseClient<
    return await sdk.responses.create(payload, options)
  }

+  private async handlePdfFile(file: FileType): Promise<OpenAI.Responses.ResponseInputFile | undefined> {
+    if (file.size > 32 * MB) return undefined
+    try {
+      const pageCount = await window.api.file.pdfInfo(file.id + file.ext)
+      if (pageCount > 100) return undefined
+    } catch {
+      return undefined
+    }
+
+    const { data } = await window.api.file.base64File(file.id + file.ext)
+    return {
+      type: 'input_file',
+      filename: file.origin_name,
+      file_data: `data:application/pdf;base64,${data}`
+    } as OpenAI.Responses.ResponseInputFile
+  }
+
  public async convertMessageToSdkParam(message: Message, model: Model): Promise<OpenAIResponseSdkMessageParam> {
    const isVision = isVisionModel(model)
    const content = await this.getMessageContent(message)
@ -141,6 +160,14 @@ export class OpenAIResponseAPIClient extends OpenAIBaseClient<
      const file = fileBlock.file
      if (!file) continue

+      if (isVision && file.ext === '.pdf') {
+        const pdfPart = await this.handlePdfFile(file)
+        if (pdfPart) {
+          parts.push(pdfPart)
+          continue
+        }
+      }
+
      if ([FileTypes.TEXT, FileTypes.DOCUMENT].includes(file.type)) {
        const fileContent = (await window.api.file.read(file.id + file.ext)).trim()
        parts.push({