mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-25 19:30:17 +08:00
feat: Add PDF file support for OpenAI vision models (#7217)
* feat: add base64 PDF support for OpenAI vision models Signed-off-by: MurphyLo <1335758958@qq.com> * sort imports in OpenAIResponseAPIClient.ts * sort imports in OpenAIResponseAPIClient.ts * remove pdf-parse * modify pdfPageCount implementation to use officeparser built-in pdf.js * chore: update yarn.lock to remove pdf-parse dependency --------- Signed-off-by: MurphyLo <1335758958@qq.com> Co-authored-by: suyao <sy20010504@gmail.com>
This commit is contained in:
parent
b6b1b43094
commit
00e395f252
@ -118,6 +118,7 @@ export enum IpcChannel {
|
||||
File_Copy = 'file:copy',
|
||||
File_BinaryImage = 'file:binaryImage',
|
||||
File_Base64File = 'file:base64File',
|
||||
File_GetPdfInfo = 'file:getPdfInfo',
|
||||
Fs_Read = 'fs:read',
|
||||
|
||||
Export_Word = 'export:word',
|
||||
|
||||
@ -226,6 +226,7 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) {
|
||||
ipcMain.handle(IpcChannel.File_Base64Image, fileManager.base64Image)
|
||||
ipcMain.handle(IpcChannel.File_SaveBase64Image, fileManager.saveBase64Image)
|
||||
ipcMain.handle(IpcChannel.File_Base64File, fileManager.base64File)
|
||||
ipcMain.handle(IpcChannel.File_GetPdfInfo, fileManager.pdfPageCount)
|
||||
ipcMain.handle(IpcChannel.File_Download, fileManager.downloadFile)
|
||||
ipcMain.handle(IpcChannel.File_Copy, fileManager.copyFile)
|
||||
ipcMain.handle(IpcChannel.File_BinaryImage, fileManager.binaryImage)
|
||||
|
||||
@ -15,6 +15,7 @@ import * as fs from 'fs'
|
||||
import { writeFileSync } from 'fs'
|
||||
import { readFile } from 'fs/promises'
|
||||
import officeParser from 'officeparser'
|
||||
import { getDocument } from 'officeparser/pdfjs-dist-build/pdf.js'
|
||||
import * as path from 'path'
|
||||
import { chdir } from 'process'
|
||||
import { v4 as uuidv4 } from 'uuid'
|
||||
@ -321,6 +322,16 @@ class FileStorage {
|
||||
return { data: base64, mime }
|
||||
}
|
||||
|
||||
public pdfPageCount = async (_: Electron.IpcMainInvokeEvent, id: string): Promise<number> => {
|
||||
const filePath = path.join(this.storageDir, id)
|
||||
const buffer = await fs.promises.readFile(filePath)
|
||||
|
||||
const doc = await getDocument({ data: buffer }).promise
|
||||
const pages = doc.numPages
|
||||
await doc.destroy()
|
||||
return pages
|
||||
}
|
||||
|
||||
public binaryImage = async (_: Electron.IpcMainInvokeEvent, id: string): Promise<{ data: Buffer; mime: string }> => {
|
||||
const filePath = path.join(this.storageDir, id)
|
||||
const data = await fs.promises.readFile(filePath)
|
||||
|
||||
@ -83,6 +83,7 @@ const api = {
|
||||
copy: (fileId: string, destPath: string) => ipcRenderer.invoke(IpcChannel.File_Copy, fileId, destPath),
|
||||
binaryImage: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_BinaryImage, fileId),
|
||||
base64File: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_Base64File, fileId),
|
||||
pdfInfo: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_GetPdfInfo, fileId),
|
||||
getPathForFile: (file: File) => webUtils.getPathForFile(file)
|
||||
},
|
||||
fs: {
|
||||
|
||||
@ -6,6 +6,7 @@ import {
|
||||
} from '@renderer/config/models'
|
||||
import { estimateTextTokens } from '@renderer/services/TokenService'
|
||||
import {
|
||||
FileType,
|
||||
FileTypes,
|
||||
MCPCallToolResponse,
|
||||
MCPTool,
|
||||
@ -34,6 +35,7 @@ import {
|
||||
} from '@renderer/utils/mcp-tools'
|
||||
import { findFileBlocks, findImageBlocks } from '@renderer/utils/messageUtils/find'
|
||||
import { buildSystemPrompt } from '@renderer/utils/prompt'
|
||||
import { MB } from '@shared/config/constant'
|
||||
import { isEmpty } from 'lodash'
|
||||
import OpenAI from 'openai'
|
||||
|
||||
@ -90,6 +92,23 @@ export class OpenAIResponseAPIClient extends OpenAIBaseClient<
|
||||
return await sdk.responses.create(payload, options)
|
||||
}
|
||||
|
||||
private async handlePdfFile(file: FileType): Promise<OpenAI.Responses.ResponseInputFile | undefined> {
|
||||
if (file.size > 32 * MB) return undefined
|
||||
try {
|
||||
const pageCount = await window.api.file.pdfInfo(file.id + file.ext)
|
||||
if (pageCount > 100) return undefined
|
||||
} catch {
|
||||
return undefined
|
||||
}
|
||||
|
||||
const { data } = await window.api.file.base64File(file.id + file.ext)
|
||||
return {
|
||||
type: 'input_file',
|
||||
filename: file.origin_name,
|
||||
file_data: `data:application/pdf;base64,${data}`
|
||||
} as OpenAI.Responses.ResponseInputFile
|
||||
}
|
||||
|
||||
public async convertMessageToSdkParam(message: Message, model: Model): Promise<OpenAIResponseSdkMessageParam> {
|
||||
const isVision = isVisionModel(model)
|
||||
const content = await this.getMessageContent(message)
|
||||
@ -141,6 +160,14 @@ export class OpenAIResponseAPIClient extends OpenAIBaseClient<
|
||||
const file = fileBlock.file
|
||||
if (!file) continue
|
||||
|
||||
if (isVision && file.ext === '.pdf') {
|
||||
const pdfPart = await this.handlePdfFile(file)
|
||||
if (pdfPart) {
|
||||
parts.push(pdfPart)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if ([FileTypes.TEXT, FileTypes.DOCUMENT].includes(file.type)) {
|
||||
const fileContent = (await window.api.file.read(file.id + file.ext)).trim()
|
||||
parts.push({
|
||||
|
||||
Loading…
Reference in New Issue
Block a user