mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-01-03 02:59:07 +08:00
feat: Add PDF file support for OpenAI vision models (#7217)
* feat: add base64 PDF support for OpenAI vision models Signed-off-by: MurphyLo <1335758958@qq.com> * sort imports in OpenAIResponseAPIClient.ts * sort imports in OpenAIResponseAPIClient.ts * remove pdf-parse * modify pdfPageCount implementation to use officeparser built-in pdf.js * chore: update yarn.lock to remove pdf-parse dependency --------- Signed-off-by: MurphyLo <1335758958@qq.com> Co-authored-by: suyao <sy20010504@gmail.com>
This commit is contained in:
parent
b6b1b43094
commit
00e395f252
@ -118,6 +118,7 @@ export enum IpcChannel {
|
|||||||
File_Copy = 'file:copy',
|
File_Copy = 'file:copy',
|
||||||
File_BinaryImage = 'file:binaryImage',
|
File_BinaryImage = 'file:binaryImage',
|
||||||
File_Base64File = 'file:base64File',
|
File_Base64File = 'file:base64File',
|
||||||
|
File_GetPdfInfo = 'file:getPdfInfo',
|
||||||
Fs_Read = 'fs:read',
|
Fs_Read = 'fs:read',
|
||||||
|
|
||||||
Export_Word = 'export:word',
|
Export_Word = 'export:word',
|
||||||
|
|||||||
@ -226,6 +226,7 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) {
|
|||||||
ipcMain.handle(IpcChannel.File_Base64Image, fileManager.base64Image)
|
ipcMain.handle(IpcChannel.File_Base64Image, fileManager.base64Image)
|
||||||
ipcMain.handle(IpcChannel.File_SaveBase64Image, fileManager.saveBase64Image)
|
ipcMain.handle(IpcChannel.File_SaveBase64Image, fileManager.saveBase64Image)
|
||||||
ipcMain.handle(IpcChannel.File_Base64File, fileManager.base64File)
|
ipcMain.handle(IpcChannel.File_Base64File, fileManager.base64File)
|
||||||
|
ipcMain.handle(IpcChannel.File_GetPdfInfo, fileManager.pdfPageCount)
|
||||||
ipcMain.handle(IpcChannel.File_Download, fileManager.downloadFile)
|
ipcMain.handle(IpcChannel.File_Download, fileManager.downloadFile)
|
||||||
ipcMain.handle(IpcChannel.File_Copy, fileManager.copyFile)
|
ipcMain.handle(IpcChannel.File_Copy, fileManager.copyFile)
|
||||||
ipcMain.handle(IpcChannel.File_BinaryImage, fileManager.binaryImage)
|
ipcMain.handle(IpcChannel.File_BinaryImage, fileManager.binaryImage)
|
||||||
|
|||||||
@ -15,6 +15,7 @@ import * as fs from 'fs'
|
|||||||
import { writeFileSync } from 'fs'
|
import { writeFileSync } from 'fs'
|
||||||
import { readFile } from 'fs/promises'
|
import { readFile } from 'fs/promises'
|
||||||
import officeParser from 'officeparser'
|
import officeParser from 'officeparser'
|
||||||
|
import { getDocument } from 'officeparser/pdfjs-dist-build/pdf.js'
|
||||||
import * as path from 'path'
|
import * as path from 'path'
|
||||||
import { chdir } from 'process'
|
import { chdir } from 'process'
|
||||||
import { v4 as uuidv4 } from 'uuid'
|
import { v4 as uuidv4 } from 'uuid'
|
||||||
@ -321,6 +322,16 @@ class FileStorage {
|
|||||||
return { data: base64, mime }
|
return { data: base64, mime }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public pdfPageCount = async (_: Electron.IpcMainInvokeEvent, id: string): Promise<number> => {
|
||||||
|
const filePath = path.join(this.storageDir, id)
|
||||||
|
const buffer = await fs.promises.readFile(filePath)
|
||||||
|
|
||||||
|
const doc = await getDocument({ data: buffer }).promise
|
||||||
|
const pages = doc.numPages
|
||||||
|
await doc.destroy()
|
||||||
|
return pages
|
||||||
|
}
|
||||||
|
|
||||||
public binaryImage = async (_: Electron.IpcMainInvokeEvent, id: string): Promise<{ data: Buffer; mime: string }> => {
|
public binaryImage = async (_: Electron.IpcMainInvokeEvent, id: string): Promise<{ data: Buffer; mime: string }> => {
|
||||||
const filePath = path.join(this.storageDir, id)
|
const filePath = path.join(this.storageDir, id)
|
||||||
const data = await fs.promises.readFile(filePath)
|
const data = await fs.promises.readFile(filePath)
|
||||||
|
|||||||
@ -83,6 +83,7 @@ const api = {
|
|||||||
copy: (fileId: string, destPath: string) => ipcRenderer.invoke(IpcChannel.File_Copy, fileId, destPath),
|
copy: (fileId: string, destPath: string) => ipcRenderer.invoke(IpcChannel.File_Copy, fileId, destPath),
|
||||||
binaryImage: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_BinaryImage, fileId),
|
binaryImage: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_BinaryImage, fileId),
|
||||||
base64File: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_Base64File, fileId),
|
base64File: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_Base64File, fileId),
|
||||||
|
pdfInfo: (fileId: string) => ipcRenderer.invoke(IpcChannel.File_GetPdfInfo, fileId),
|
||||||
getPathForFile: (file: File) => webUtils.getPathForFile(file)
|
getPathForFile: (file: File) => webUtils.getPathForFile(file)
|
||||||
},
|
},
|
||||||
fs: {
|
fs: {
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import {
|
|||||||
} from '@renderer/config/models'
|
} from '@renderer/config/models'
|
||||||
import { estimateTextTokens } from '@renderer/services/TokenService'
|
import { estimateTextTokens } from '@renderer/services/TokenService'
|
||||||
import {
|
import {
|
||||||
|
FileType,
|
||||||
FileTypes,
|
FileTypes,
|
||||||
MCPCallToolResponse,
|
MCPCallToolResponse,
|
||||||
MCPTool,
|
MCPTool,
|
||||||
@ -34,6 +35,7 @@ import {
|
|||||||
} from '@renderer/utils/mcp-tools'
|
} from '@renderer/utils/mcp-tools'
|
||||||
import { findFileBlocks, findImageBlocks } from '@renderer/utils/messageUtils/find'
|
import { findFileBlocks, findImageBlocks } from '@renderer/utils/messageUtils/find'
|
||||||
import { buildSystemPrompt } from '@renderer/utils/prompt'
|
import { buildSystemPrompt } from '@renderer/utils/prompt'
|
||||||
|
import { MB } from '@shared/config/constant'
|
||||||
import { isEmpty } from 'lodash'
|
import { isEmpty } from 'lodash'
|
||||||
import OpenAI from 'openai'
|
import OpenAI from 'openai'
|
||||||
|
|
||||||
@ -90,6 +92,23 @@ export class OpenAIResponseAPIClient extends OpenAIBaseClient<
|
|||||||
return await sdk.responses.create(payload, options)
|
return await sdk.responses.create(payload, options)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private async handlePdfFile(file: FileType): Promise<OpenAI.Responses.ResponseInputFile | undefined> {
|
||||||
|
if (file.size > 32 * MB) return undefined
|
||||||
|
try {
|
||||||
|
const pageCount = await window.api.file.pdfInfo(file.id + file.ext)
|
||||||
|
if (pageCount > 100) return undefined
|
||||||
|
} catch {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
const { data } = await window.api.file.base64File(file.id + file.ext)
|
||||||
|
return {
|
||||||
|
type: 'input_file',
|
||||||
|
filename: file.origin_name,
|
||||||
|
file_data: `data:application/pdf;base64,${data}`
|
||||||
|
} as OpenAI.Responses.ResponseInputFile
|
||||||
|
}
|
||||||
|
|
||||||
public async convertMessageToSdkParam(message: Message, model: Model): Promise<OpenAIResponseSdkMessageParam> {
|
public async convertMessageToSdkParam(message: Message, model: Model): Promise<OpenAIResponseSdkMessageParam> {
|
||||||
const isVision = isVisionModel(model)
|
const isVision = isVisionModel(model)
|
||||||
const content = await this.getMessageContent(message)
|
const content = await this.getMessageContent(message)
|
||||||
@ -141,6 +160,14 @@ export class OpenAIResponseAPIClient extends OpenAIBaseClient<
|
|||||||
const file = fileBlock.file
|
const file = fileBlock.file
|
||||||
if (!file) continue
|
if (!file) continue
|
||||||
|
|
||||||
|
if (isVision && file.ext === '.pdf') {
|
||||||
|
const pdfPart = await this.handlePdfFile(file)
|
||||||
|
if (pdfPart) {
|
||||||
|
parts.push(pdfPart)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ([FileTypes.TEXT, FileTypes.DOCUMENT].includes(file.type)) {
|
if ([FileTypes.TEXT, FileTypes.DOCUMENT].includes(file.type)) {
|
||||||
const fileContent = (await window.api.file.read(file.id + file.ext)).trim()
|
const fileContent = (await window.api.file.read(file.id + file.ext)).trim()
|
||||||
parts.push({
|
parts.push({
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user