mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-25 19:30:17 +08:00
* refactor: replace axios and node fetch with electron's net module for network requests in preprocess providers - Updated Doc2xPreprocessProvider and MineruPreprocessProvider to use net.fetch instead of axios for making HTTP requests. - Improved error handling for network responses across various methods. - Removed unnecessary AxiosRequestConfig and related code to streamline the implementation. * lint * refactor(Doc2xPreprocessProvider): enhance file validation and upload process - Added file size validation to prevent loading files larger than 300MB into memory. - Implemented file size check before reading the PDF to ensure efficient memory usage. - Updated the file upload method to use a stream, setting the 'Content-Length' header for better handling of large files. * refactor(brave-search): update net.fetch calls to use url.toString() - Modified all instances of net.fetch to use url.toString() for better URL handling. - Ensured consistency in how URLs are passed to the fetch method across various functions. * refactor(MCPService): improve URL handling in net.fetch calls - Updated net.fetch to use url.toString() for better type handling of URLs. - Ensured consistent URL processing across the MCPService class. * feat(ProxyManager): integrate axios with fetch proxy support - Added axios as a dependency to enable fetch proxy usage. - Implemented logic to set axios's adapter to 'fetch' for proxy handling. - Preserved original axios adapter for restoration when disabling the proxy.
381 lines
11 KiB
TypeScript
381 lines
11 KiB
TypeScript
import fs from 'node:fs'
|
||
import path from 'node:path'
|
||
|
||
import { loggerService } from '@logger'
|
||
import { fileStorage } from '@main/services/FileStorage'
|
||
import { FileMetadata, PreprocessProvider } from '@types'
|
||
import AdmZip from 'adm-zip'
|
||
import { net } from 'electron'
|
||
|
||
import BasePreprocessProvider from './BasePreprocessProvider'
|
||
|
||
const logger = loggerService.withContext('Doc2xPreprocessProvider')
|
||
|
||
type ApiResponse<T> = {
|
||
code: string
|
||
data: T
|
||
message?: string
|
||
}
|
||
|
||
type PreuploadResponse = {
|
||
uid: string
|
||
url: string
|
||
}
|
||
|
||
type StatusResponse = {
|
||
status: string
|
||
progress: number
|
||
}
|
||
|
||
type ParsedFileResponse = {
|
||
status: string
|
||
url: string
|
||
}
|
||
|
||
export default class Doc2xPreprocessProvider extends BasePreprocessProvider {
|
||
constructor(provider: PreprocessProvider) {
|
||
super(provider)
|
||
}
|
||
|
||
private async validateFile(filePath: string): Promise<void> {
|
||
// 首先检查文件大小,避免读取大文件到内存
|
||
const stats = await fs.promises.stat(filePath)
|
||
const fileSizeBytes = stats.size
|
||
|
||
// 文件大小小于300MB
|
||
if (fileSizeBytes >= 300 * 1024 * 1024) {
|
||
const fileSizeMB = Math.round(fileSizeBytes / (1024 * 1024))
|
||
throw new Error(`PDF file size (${fileSizeMB}MB) exceeds the limit of 300MB`)
|
||
}
|
||
|
||
// 只有在文件大小合理的情况下才读取文件内容检查页数
|
||
const pdfBuffer = await fs.promises.readFile(filePath)
|
||
const doc = await this.readPdf(pdfBuffer)
|
||
|
||
// 文件页数小于1000页
|
||
if (doc.numPages >= 1000) {
|
||
throw new Error(`PDF page count (${doc.numPages}) exceeds the limit of 1000 pages`)
|
||
}
|
||
}
|
||
|
||
public async parseFile(sourceId: string, file: FileMetadata): Promise<{ processedFile: FileMetadata }> {
|
||
try {
|
||
const filePath = fileStorage.getFilePathById(file)
|
||
logger.info(`Preprocess processing started: ${filePath}`)
|
||
|
||
// 步骤1: 准备上传
|
||
const { uid, url } = await this.preupload()
|
||
logger.info(`Preprocess preupload completed: uid=${uid}`)
|
||
|
||
await this.validateFile(filePath)
|
||
|
||
// 步骤2: 上传文件
|
||
await this.putFile(filePath, url)
|
||
|
||
// 步骤3: 等待处理完成
|
||
await this.waitForProcessing(sourceId, uid)
|
||
logger.info(`Preprocess parsing completed successfully for: ${filePath}`)
|
||
|
||
// 步骤4: 导出文件
|
||
const { path: outputPath } = await this.exportFile(file, uid)
|
||
|
||
// 步骤5: 创建处理后的文件信息
|
||
return {
|
||
processedFile: this.createProcessedFileInfo(file, outputPath)
|
||
}
|
||
} catch (error) {
|
||
logger.error(`Preprocess processing failed for:`, error as Error)
|
||
throw error
|
||
}
|
||
}
|
||
|
||
private createProcessedFileInfo(file: FileMetadata, outputPath: string): FileMetadata {
|
||
const outputFilePath = `${outputPath}/${file.name.split('.').slice(0, -1).join('.')}.md`
|
||
return {
|
||
...file,
|
||
name: file.name.replace('.pdf', '.md'),
|
||
path: outputFilePath,
|
||
ext: '.md',
|
||
size: fs.statSync(outputFilePath).size
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 导出文件
|
||
* @param file 文件信息
|
||
* @param uid 预上传响应的uid
|
||
* @returns 导出文件的路径
|
||
*/
|
||
public async exportFile(file: FileMetadata, uid: string): Promise<{ path: string }> {
|
||
const filePath = fileStorage.getFilePathById(file)
|
||
logger.info(`Exporting file: ${filePath}`)
|
||
|
||
// 步骤1: 转换文件
|
||
await this.convertFile(uid, filePath)
|
||
logger.info(`File conversion completed for: ${filePath}`)
|
||
|
||
// 步骤2: 等待导出并获取URL
|
||
const exportUrl = await this.waitForExport(uid)
|
||
|
||
// 步骤3: 下载并解压文件
|
||
return this.downloadFile(exportUrl, file)
|
||
}
|
||
|
||
/**
|
||
* 等待处理完成
|
||
* @param sourceId 源文件ID
|
||
* @param uid 预上传响应的uid
|
||
*/
|
||
private async waitForProcessing(sourceId: string, uid: string): Promise<void> {
|
||
while (true) {
|
||
await this.delay(1000)
|
||
const { status, progress } = await this.getStatus(uid)
|
||
await this.sendPreprocessProgress(sourceId, progress)
|
||
logger.info(`Preprocess processing status: ${status}, progress: ${progress}%`)
|
||
|
||
if (status === 'success') {
|
||
return
|
||
} else if (status === 'failed') {
|
||
throw new Error('Preprocess processing failed')
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 等待导出完成
|
||
* @param uid 预上传响应的uid
|
||
* @returns 导出文件的url
|
||
*/
|
||
private async waitForExport(uid: string): Promise<string> {
|
||
while (true) {
|
||
await this.delay(1000)
|
||
const { status, url } = await this.getParsedFile(uid)
|
||
logger.info(`Export status: ${status}`)
|
||
|
||
if (status === 'success' && url) {
|
||
return url
|
||
} else if (status === 'failed') {
|
||
throw new Error('Export failed')
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 预上传文件
|
||
* @returns 预上传响应的url和uid
|
||
*/
|
||
private async preupload(): Promise<PreuploadResponse> {
|
||
const endpoint = `${this.provider.apiHost}/api/v2/parse/preupload`
|
||
|
||
try {
|
||
const response = await net.fetch(endpoint, {
|
||
method: 'POST',
|
||
headers: {
|
||
'Content-Type': 'application/json',
|
||
Authorization: `Bearer ${this.provider.apiKey}`
|
||
},
|
||
body: null
|
||
})
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
|
||
}
|
||
|
||
const data = (await response.json()) as ApiResponse<PreuploadResponse>
|
||
|
||
if (data.code === 'success' && data.data) {
|
||
return data.data
|
||
} else {
|
||
throw new Error(`API returned error: ${data.message || JSON.stringify(data)}`)
|
||
}
|
||
} catch (error) {
|
||
logger.error(`Failed to get preupload URL: ${error instanceof Error ? error.message : String(error)}`)
|
||
throw new Error('Failed to get preupload URL')
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 上传文件(使用流式上传)
|
||
* @param filePath 文件路径
|
||
* @param url 预上传响应的url
|
||
*/
|
||
private async putFile(filePath: string, url: string): Promise<void> {
|
||
try {
|
||
// 获取文件大小用于设置 Content-Length
|
||
const stats = await fs.promises.stat(filePath)
|
||
const fileSize = stats.size
|
||
|
||
// 创建可读流
|
||
const fileStream = fs.createReadStream(filePath)
|
||
|
||
const response = await net.fetch(url, {
|
||
method: 'PUT',
|
||
body: fileStream as any, // TypeScript 类型转换,net.fetch 支持 ReadableStream
|
||
headers: {
|
||
'Content-Length': fileSize.toString()
|
||
}
|
||
})
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
|
||
}
|
||
} catch (error) {
|
||
logger.error(`Failed to upload file ${filePath}: ${error instanceof Error ? error.message : String(error)}`)
|
||
throw new Error('Failed to upload file')
|
||
}
|
||
}
|
||
|
||
private async getStatus(uid: string): Promise<StatusResponse> {
|
||
const endpoint = `${this.provider.apiHost}/api/v2/parse/status?uid=${uid}`
|
||
|
||
try {
|
||
const response = await net.fetch(endpoint, {
|
||
method: 'GET',
|
||
headers: {
|
||
Authorization: `Bearer ${this.provider.apiKey}`
|
||
}
|
||
})
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
|
||
}
|
||
|
||
const data = (await response.json()) as ApiResponse<StatusResponse>
|
||
if (data.code === 'success' && data.data) {
|
||
return data.data
|
||
} else {
|
||
throw new Error(`API returned error: ${data.message || JSON.stringify(data)}`)
|
||
}
|
||
} catch (error) {
|
||
logger.error(`Failed to get status for uid ${uid}: ${error instanceof Error ? error.message : String(error)}`)
|
||
throw new Error('Failed to get processing status')
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Preprocess文件
|
||
* @param uid 预上传响应的uid
|
||
* @param filePath 文件路径
|
||
*/
|
||
private async convertFile(uid: string, filePath: string): Promise<void> {
|
||
const fileName = path.parse(filePath).name
|
||
|
||
const payload = {
|
||
uid,
|
||
to: 'md',
|
||
formula_mode: 'normal',
|
||
filename: fileName
|
||
}
|
||
|
||
const endpoint = `${this.provider.apiHost}/api/v2/convert/parse`
|
||
|
||
try {
|
||
const response = await net.fetch(endpoint, {
|
||
method: 'POST',
|
||
headers: {
|
||
'Content-Type': 'application/json',
|
||
Authorization: `Bearer ${this.provider.apiKey}`
|
||
},
|
||
body: JSON.stringify(payload)
|
||
})
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
|
||
}
|
||
|
||
const data = (await response.json()) as ApiResponse<any>
|
||
if (data.code !== 'success') {
|
||
throw new Error(`API returned error: ${data.message || JSON.stringify(data)}`)
|
||
}
|
||
} catch (error) {
|
||
logger.error(`Failed to convert file ${filePath}: ${error instanceof Error ? error.message : String(error)}`)
|
||
throw new Error('Failed to convert file')
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 获取解析后的文件信息
|
||
* @param uid 预上传响应的uid
|
||
* @returns 解析后的文件信息
|
||
*/
|
||
private async getParsedFile(uid: string): Promise<ParsedFileResponse> {
|
||
const endpoint = `${this.provider.apiHost}/api/v2/convert/parse/result?uid=${uid}`
|
||
|
||
try {
|
||
const response = await net.fetch(endpoint, {
|
||
method: 'GET',
|
||
headers: {
|
||
Authorization: `Bearer ${this.provider.apiKey}`
|
||
}
|
||
})
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
|
||
}
|
||
|
||
const data = (await response.json()) as ApiResponse<ParsedFileResponse>
|
||
if (data.data) {
|
||
return data.data
|
||
} else {
|
||
throw new Error(`No data in response`)
|
||
}
|
||
} catch (error) {
|
||
logger.error(
|
||
`Failed to get parsed file for uid ${uid}: ${error instanceof Error ? error.message : String(error)}`
|
||
)
|
||
throw new Error('Failed to get parsed file information')
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 下载文件
|
||
* @param url 导出文件的url
|
||
* @param file 文件信息
|
||
* @returns 下载文件的路径
|
||
*/
|
||
private async downloadFile(url: string, file: FileMetadata): Promise<{ path: string }> {
|
||
const dirPath = this.storageDir
|
||
// 使用统一的存储路径:Data/Files/{file.id}/
|
||
const extractPath = path.join(dirPath, file.id)
|
||
const zipPath = path.join(dirPath, `${file.id}.zip`)
|
||
|
||
// 确保目录存在
|
||
fs.mkdirSync(dirPath, { recursive: true })
|
||
fs.mkdirSync(extractPath, { recursive: true })
|
||
|
||
logger.info(`Downloading to export path: ${zipPath}`)
|
||
|
||
try {
|
||
// 下载文件
|
||
const response = await net.fetch(url, { method: 'GET' })
|
||
if (!response.ok) {
|
||
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
|
||
}
|
||
const arrayBuffer = await response.arrayBuffer()
|
||
fs.writeFileSync(zipPath, Buffer.from(arrayBuffer))
|
||
|
||
// 确保提取目录存在
|
||
if (!fs.existsSync(extractPath)) {
|
||
fs.mkdirSync(extractPath, { recursive: true })
|
||
}
|
||
|
||
// 解压文件
|
||
const zip = new AdmZip(zipPath)
|
||
zip.extractAllTo(extractPath, true)
|
||
logger.info(`Extracted files to: ${extractPath}`)
|
||
|
||
// 删除临时ZIP文件
|
||
fs.unlinkSync(zipPath)
|
||
|
||
return { path: extractPath }
|
||
} catch (error) {
|
||
logger.error(`Failed to download and extract file: ${error instanceof Error ? error.message : String(error)}`)
|
||
throw new Error('Failed to download and extract file')
|
||
}
|
||
}
|
||
|
||
public checkQuota(): Promise<number> {
|
||
throw new Error('Method not implemented.')
|
||
}
|
||
}
|