diff --git a/package.json b/package.json index 1cdfc9296b..7a673b8417 100644 --- a/package.json +++ b/package.json @@ -124,6 +124,7 @@ "@types/react-infinite-scroll-component": "^5.0.0", "@types/react-window": "^1", "@types/tinycolor2": "^1", + "@types/word-extractor": "^1", "@uiw/codemirror-extensions-langs": "^4.23.12", "@uiw/codemirror-themes-all": "^4.23.12", "@uiw/react-codemirror": "^4.23.12", @@ -218,6 +219,7 @@ "vite": "6.2.6", "vitest": "^3.1.4", "webdav": "^5.8.0", + "word-extractor": "^1.0.4", "zipread": "^1.3.3" }, "resolutions": { diff --git a/packages/shared/config/constant.ts b/packages/shared/config/constant.ts index 5a3465f648..719600650e 100644 --- a/packages/shared/config/constant.ts +++ b/packages/shared/config/constant.ts @@ -1,7 +1,7 @@ export const imageExts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp'] export const videoExts = ['.mp4', '.avi', '.mov', '.wmv', '.flv', '.mkv'] export const audioExts = ['.mp3', '.wav', '.ogg', '.flac', '.aac'] -export const documentExts = ['.pdf', '.docx', '.pptx', '.xlsx', '.odt', '.odp', '.ods'] +export const documentExts = ['.pdf', '.doc', '.docx', '.pptx', '.xlsx', '.odt', '.odp', '.ods'] export const thirdPartyApplicationExts = ['.draftsExport'] export const bookExts = ['.epub'] const textExtsByCategory = new Map([ diff --git a/src/main/loader/index.ts b/src/main/loader/index.ts index db837f414f..ba66b33e3d 100644 --- a/src/main/loader/index.ts +++ b/src/main/loader/index.ts @@ -16,6 +16,7 @@ const FILE_LOADER_MAP: Record = { // 内置类型 '.pdf': 'common', '.csv': 'common', + '.doc': 'common', '.docx': 'common', '.pptx': 'common', '.xlsx': 'common', diff --git a/src/main/services/FileStorage.ts b/src/main/services/FileStorage.ts index 2ac689b8cc..437f25f78c 100644 --- a/src/main/services/FileStorage.ts +++ b/src/main/services/FileStorage.ts @@ -220,10 +220,21 @@ class FileStorage { public readFile = async (_: Electron.IpcMainInvokeEvent, id: string): Promise => { const filePath = path.join(this.storageDir, id) - if (documentExts.includes(path.extname(filePath))) { + const fileExtension = path.extname(filePath) + + if (documentExts.includes(fileExtension)) { const originalCwd = process.cwd() try { chdir(this.tempDir) + + if (fileExtension === '.doc') { + const WordExtractor = require('word-extractor') + const extractor = new WordExtractor() + const extracted = await extractor.extract(filePath) + chdir(originalCwd) + return extracted.getBody() + } + const data = await officeParser.parseOfficeAsync(filePath) chdir(originalCwd) return data diff --git a/src/main/utils/__tests__/file.test.ts b/src/main/utils/__tests__/file.test.ts index aae00e85d4..14f4801524 100644 --- a/src/main/utils/__tests__/file.test.ts +++ b/src/main/utils/__tests__/file.test.ts @@ -92,6 +92,7 @@ describe('file', () => { it('should return DOCUMENT for document extensions', () => { expect(getFileType('.pdf')).toBe(FileTypes.DOCUMENT) expect(getFileType('.pptx')).toBe(FileTypes.DOCUMENT) + expect(getFileType('.doc')).toBe(FileTypes.DOCUMENT) expect(getFileType('.docx')).toBe(FileTypes.DOCUMENT) expect(getFileType('.xlsx')).toBe(FileTypes.DOCUMENT) expect(getFileType('.odt')).toBe(FileTypes.DOCUMENT) diff --git a/yarn.lock b/yarn.lock index b066f44a11..eefde56f9c 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4754,6 +4754,15 @@ __metadata: languageName: node linkType: hard +"@types/word-extractor@npm:^1": + version: 1.0.6 + resolution: "@types/word-extractor@npm:1.0.6" + dependencies: + "@types/node": "npm:*" + checksum: 10c0/84f89c458213db5aec4d6badad14e0f2c07ac4b92f16165d19a95548f2b98fd5fff00419d49547464cb75c9432b5e9cb3b452d75eb5f07d808e31b44be390453 + languageName: node + linkType: hard + "@types/ws@npm:^8.5.4": version: 8.18.1 resolution: "@types/ws@npm:8.18.1" @@ -5642,6 +5651,7 @@ __metadata: "@types/react-infinite-scroll-component": "npm:^5.0.0" "@types/react-window": "npm:^1" "@types/tinycolor2": "npm:^1" + "@types/word-extractor": "npm:^1" "@uiw/codemirror-extensions-langs": "npm:^4.23.12" "@uiw/codemirror-themes-all": "npm:^4.23.12" "@uiw/react-codemirror": "npm:^4.23.12" @@ -5742,6 +5752,7 @@ __metadata: vite: "npm:6.2.6" vitest: "npm:^3.1.4" webdav: "npm:^5.8.0" + word-extractor: "npm:^1.0.4" zipread: "npm:^1.3.3" languageName: unknown linkType: soft @@ -16428,6 +16439,15 @@ __metadata: languageName: node linkType: hard +"saxes@npm:^5.0.1": + version: 5.0.1 + resolution: "saxes@npm:5.0.1" + dependencies: + xmlchars: "npm:^2.2.0" + checksum: 10c0/b7476c41dbe1c3a89907d2546fecfba234de5e66743ef914cde2603f47b19bed09732ab51b528ad0f98b958369d8be72b6f5af5c9cfad69972a73d061f0b3952 + languageName: node + linkType: hard + "saxes@npm:^6.0.0": version: 6.0.0 resolution: "saxes@npm:6.0.0" @@ -18632,6 +18652,16 @@ __metadata: languageName: node linkType: hard +"word-extractor@npm:^1.0.4": + version: 1.0.4 + resolution: "word-extractor@npm:1.0.4" + dependencies: + saxes: "npm:^5.0.1" + yauzl: "npm:^2.10.0" + checksum: 10c0/f8c6b4f9278802d0c803479c1441713e351e67f7b0d2f85bd8cbe94b76298d4adb058b5f23ee0a01faa02f3b1f01c507a4a2f44fa39cfcbd498a51769dd9e8e7 + languageName: node + linkType: hard + "word-wrap@npm:^1.2.5": version: 1.2.5 resolution: "word-wrap@npm:1.2.5"