mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-19 06:30:10 +08:00
refactor: update pdfjs import and improve file filtering in electron-builder configuration (#8198)
* refactor: update pdfjs import and improve file filtering in electron-builder configuration - Changed pdfjs import to a direct import from 'pdfjs-dist' for consistency across modules. - Updated file filtering in electron-builder.yml to exclude all legacy files from pdfjs-dist instead of just specific ones. * format code
This commit is contained in:
parent
7b58883d33
commit
47a0dbf87a
@ -54,7 +54,7 @@ files:
|
||||
- '!node_modules/mammoth/{mammoth.browser.js,mammoth.browser.min.js}'
|
||||
- '!node_modules/selection-hook/prebuilds/**/*' # we rebuild .node, don't use prebuilds
|
||||
- '!node_modules/pdfjs-dist/web/**/*'
|
||||
- '!node_modules/pdfjs-dist/legacy/web/*'
|
||||
- '!node_modules/pdfjs-dist/legacy/**/*'
|
||||
- '!node_modules/selection-hook/node_modules' # we don't need what in the node_modules dir
|
||||
- '!node_modules/selection-hook/src' # we don't need source files
|
||||
- '!**/*.{h,iobj,ipdb,tlog,recipe,vcxproj,vcxproj.filters,Makefile,*.Makefile}' # filter .node build files
|
||||
|
||||
12
package.json
12
package.json
@ -67,18 +67,12 @@
|
||||
"prepare": "git config blame.ignoreRevsFile .git-blame-ignore-revs && husky"
|
||||
},
|
||||
"dependencies": {
|
||||
"@aws-sdk/client-s3": "^3.840.0",
|
||||
"@cherrystudio/pdf-to-img-napi": "^0.0.1",
|
||||
"@libsql/client": "0.14.0",
|
||||
"@libsql/win32-x64-msvc": "^0.4.7",
|
||||
"@strongtz/win32-arm64-msvc": "^0.4.7",
|
||||
"iconv-lite": "^0.6.3",
|
||||
"jaison": "^2.0.2",
|
||||
"jschardet": "^3.1.4",
|
||||
"jsdom": "26.1.0",
|
||||
"macos-release": "^3.4.0",
|
||||
"node-stream-zip": "^1.15.0",
|
||||
"notion-helper": "^1.3.22",
|
||||
"os-proxy-config": "^1.1.2",
|
||||
"pdfjs-dist": "4.10.38",
|
||||
"react-json-view": "^1.21.3",
|
||||
@ -91,6 +85,7 @@
|
||||
"@agentic/tavily": "^7.3.3",
|
||||
"@ant-design/v5-patch-for-react-19": "^1.0.3",
|
||||
"@anthropic-ai/sdk": "^0.41.0",
|
||||
"@aws-sdk/client-s3": "^3.840.0",
|
||||
"@cherrystudio/embedjs": "^0.1.31",
|
||||
"@cherrystudio/embedjs-libsql": "^0.1.31",
|
||||
"@cherrystudio/embedjs-loader-csv": "^0.1.31",
|
||||
@ -199,15 +194,20 @@
|
||||
"html-to-image": "^1.11.13",
|
||||
"husky": "^9.1.7",
|
||||
"i18next": "^23.11.5",
|
||||
"iconv-lite": "^0.6.3",
|
||||
"jaison": "^2.0.2",
|
||||
"jest-styled-components": "^7.2.0",
|
||||
"jschardet": "^3.1.4",
|
||||
"lint-staged": "^15.5.0",
|
||||
"lodash": "^4.17.21",
|
||||
"lru-cache": "^11.1.0",
|
||||
"lucide-react": "^0.487.0",
|
||||
"macos-release": "^3.4.0",
|
||||
"markdown-it": "^14.1.0",
|
||||
"mermaid": "^11.7.0",
|
||||
"mime": "^4.0.4",
|
||||
"motion": "^12.10.5",
|
||||
"notion-helper": "^1.3.22",
|
||||
"npx-scope-finder": "^1.2.0",
|
||||
"officeparser": "^4.2.0",
|
||||
"openai": "patch:openai@npm%3A5.1.0#~/.yarn/patches/openai-npm-5.1.0-0e7b3ccb07.patch",
|
||||
|
||||
@ -5,6 +5,7 @@ import { windowService } from '@main/services/WindowService'
|
||||
import { getFileExt } from '@main/utils/file'
|
||||
import { FileMetadata, OcrProvider } from '@types'
|
||||
import { app } from 'electron'
|
||||
import pdfjs from 'pdfjs-dist'
|
||||
import { TypedArray } from 'pdfjs-dist/types/src/display/api'
|
||||
|
||||
export default abstract class BaseOcrProvider {
|
||||
@ -76,8 +77,7 @@ export default abstract class BaseOcrProvider {
|
||||
source: string | URL | TypedArray,
|
||||
passwordCallback?: (fn: (password: string) => void, reason: string) => string
|
||||
) {
|
||||
const { getDocument } = await import('pdfjs-dist/legacy/build/pdf.mjs')
|
||||
const documentLoadingTask = getDocument(source)
|
||||
const documentLoadingTask = pdfjs.getDocument(source)
|
||||
if (passwordCallback) {
|
||||
documentLoadingTask.onPassword = passwordCallback
|
||||
}
|
||||
|
||||
@ -5,6 +5,7 @@ import { windowService } from '@main/services/WindowService'
|
||||
import { getFileExt } from '@main/utils/file'
|
||||
import { FileMetadata, PreprocessProvider } from '@types'
|
||||
import { app } from 'electron'
|
||||
import pdfjs from 'pdfjs-dist'
|
||||
import { TypedArray } from 'pdfjs-dist/types/src/display/api'
|
||||
|
||||
export default abstract class BasePreprocessProvider {
|
||||
@ -80,8 +81,7 @@ export default abstract class BasePreprocessProvider {
|
||||
source: string | URL | TypedArray,
|
||||
passwordCallback?: (fn: (password: string) => void, reason: string) => string
|
||||
) {
|
||||
const { getDocument } = await import('pdfjs-dist/legacy/build/pdf.mjs')
|
||||
const documentLoadingTask = getDocument(source)
|
||||
const documentLoadingTask = pdfjs.getDocument(source)
|
||||
if (passwordCallback) {
|
||||
documentLoadingTask.onPassword = passwordCallback
|
||||
}
|
||||
|
||||
@ -15,8 +15,8 @@ import * as fs from 'fs'
|
||||
import { writeFileSync } from 'fs'
|
||||
import { readFile } from 'fs/promises'
|
||||
import officeParser from 'officeparser'
|
||||
import { getDocument } from 'officeparser/pdfjs-dist-build/pdf.js'
|
||||
import * as path from 'path'
|
||||
import pdfjs from 'pdfjs-dist'
|
||||
import { chdir } from 'process'
|
||||
import { v4 as uuidv4 } from 'uuid'
|
||||
import WordExtractor from 'word-extractor'
|
||||
@ -367,7 +367,7 @@ class FileStorage {
|
||||
const filePath = path.join(this.storageDir, id)
|
||||
const buffer = await fs.promises.readFile(filePath)
|
||||
|
||||
const doc = await getDocument({ data: buffer }).promise
|
||||
const doc = await pdfjs.getDocument({ data: buffer }).promise
|
||||
const pages = doc.numPages
|
||||
await doc.destroy()
|
||||
return pages
|
||||
|
||||
Loading…
Reference in New Issue
Block a user