diff --git a/electron-builder.yml b/electron-builder.yml index 17a731d94a..8af0ea5a6c 100644 --- a/electron-builder.yml +++ b/electron-builder.yml @@ -54,7 +54,7 @@ files: - '!node_modules/mammoth/{mammoth.browser.js,mammoth.browser.min.js}' - '!node_modules/selection-hook/prebuilds/**/*' # we rebuild .node, don't use prebuilds - '!node_modules/pdfjs-dist/web/**/*' - - '!node_modules/pdfjs-dist/legacy/web/*' + - '!node_modules/pdfjs-dist/legacy/**/*' - '!node_modules/selection-hook/node_modules' # we don't need what in the node_modules dir - '!node_modules/selection-hook/src' # we don't need source files - '!**/*.{h,iobj,ipdb,tlog,recipe,vcxproj,vcxproj.filters,Makefile,*.Makefile}' # filter .node build files diff --git a/package.json b/package.json index 85db145e19..c36b0e36ff 100644 --- a/package.json +++ b/package.json @@ -67,18 +67,12 @@ "prepare": "git config blame.ignoreRevsFile .git-blame-ignore-revs && husky" }, "dependencies": { - "@aws-sdk/client-s3": "^3.840.0", "@cherrystudio/pdf-to-img-napi": "^0.0.1", "@libsql/client": "0.14.0", "@libsql/win32-x64-msvc": "^0.4.7", "@strongtz/win32-arm64-msvc": "^0.4.7", - "iconv-lite": "^0.6.3", - "jaison": "^2.0.2", - "jschardet": "^3.1.4", "jsdom": "26.1.0", - "macos-release": "^3.4.0", "node-stream-zip": "^1.15.0", - "notion-helper": "^1.3.22", "os-proxy-config": "^1.1.2", "pdfjs-dist": "4.10.38", "react-json-view": "^1.21.3", @@ -91,6 +85,7 @@ "@agentic/tavily": "^7.3.3", "@ant-design/v5-patch-for-react-19": "^1.0.3", "@anthropic-ai/sdk": "^0.41.0", + "@aws-sdk/client-s3": "^3.840.0", "@cherrystudio/embedjs": "^0.1.31", "@cherrystudio/embedjs-libsql": "^0.1.31", "@cherrystudio/embedjs-loader-csv": "^0.1.31", @@ -199,15 +194,20 @@ "html-to-image": "^1.11.13", "husky": "^9.1.7", "i18next": "^23.11.5", + "iconv-lite": "^0.6.3", + "jaison": "^2.0.2", "jest-styled-components": "^7.2.0", + "jschardet": "^3.1.4", "lint-staged": "^15.5.0", "lodash": "^4.17.21", "lru-cache": "^11.1.0", "lucide-react": "^0.487.0", + "macos-release": "^3.4.0", "markdown-it": "^14.1.0", "mermaid": "^11.7.0", "mime": "^4.0.4", "motion": "^12.10.5", + "notion-helper": "^1.3.22", "npx-scope-finder": "^1.2.0", "officeparser": "^4.2.0", "openai": "patch:openai@npm%3A5.1.0#~/.yarn/patches/openai-npm-5.1.0-0e7b3ccb07.patch", diff --git a/src/main/knowledage/ocr/BaseOcrProvider.ts b/src/main/knowledage/ocr/BaseOcrProvider.ts index 1bc7ce8530..14f05cd202 100644 --- a/src/main/knowledage/ocr/BaseOcrProvider.ts +++ b/src/main/knowledage/ocr/BaseOcrProvider.ts @@ -5,6 +5,7 @@ import { windowService } from '@main/services/WindowService' import { getFileExt } from '@main/utils/file' import { FileMetadata, OcrProvider } from '@types' import { app } from 'electron' +import pdfjs from 'pdfjs-dist' import { TypedArray } from 'pdfjs-dist/types/src/display/api' export default abstract class BaseOcrProvider { @@ -76,8 +77,7 @@ export default abstract class BaseOcrProvider { source: string | URL | TypedArray, passwordCallback?: (fn: (password: string) => void, reason: string) => string ) { - const { getDocument } = await import('pdfjs-dist/legacy/build/pdf.mjs') - const documentLoadingTask = getDocument(source) + const documentLoadingTask = pdfjs.getDocument(source) if (passwordCallback) { documentLoadingTask.onPassword = passwordCallback } diff --git a/src/main/knowledage/preprocess/BasePreprocessProvider.ts b/src/main/knowledage/preprocess/BasePreprocessProvider.ts index 016e4d10d0..f8f31e67f3 100644 --- a/src/main/knowledage/preprocess/BasePreprocessProvider.ts +++ b/src/main/knowledage/preprocess/BasePreprocessProvider.ts @@ -5,6 +5,7 @@ import { windowService } from '@main/services/WindowService' import { getFileExt } from '@main/utils/file' import { FileMetadata, PreprocessProvider } from '@types' import { app } from 'electron' +import pdfjs from 'pdfjs-dist' import { TypedArray } from 'pdfjs-dist/types/src/display/api' export default abstract class BasePreprocessProvider { @@ -80,8 +81,7 @@ export default abstract class BasePreprocessProvider { source: string | URL | TypedArray, passwordCallback?: (fn: (password: string) => void, reason: string) => string ) { - const { getDocument } = await import('pdfjs-dist/legacy/build/pdf.mjs') - const documentLoadingTask = getDocument(source) + const documentLoadingTask = pdfjs.getDocument(source) if (passwordCallback) { documentLoadingTask.onPassword = passwordCallback } diff --git a/src/main/services/FileStorage.ts b/src/main/services/FileStorage.ts index 005fa7523a..ee61cd94c3 100644 --- a/src/main/services/FileStorage.ts +++ b/src/main/services/FileStorage.ts @@ -15,8 +15,8 @@ import * as fs from 'fs' import { writeFileSync } from 'fs' import { readFile } from 'fs/promises' import officeParser from 'officeparser' -import { getDocument } from 'officeparser/pdfjs-dist-build/pdf.js' import * as path from 'path' +import pdfjs from 'pdfjs-dist' import { chdir } from 'process' import { v4 as uuidv4 } from 'uuid' import WordExtractor from 'word-extractor' @@ -367,7 +367,7 @@ class FileStorage { const filePath = path.join(this.storageDir, id) const buffer = await fs.promises.readFile(filePath) - const doc = await getDocument({ data: buffer }).promise + const doc = await pdfjs.getDocument({ data: buffer }).promise const pages = doc.numPages await doc.destroy() return pages