mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-19 14:41:24 +08:00
feat(FileStorage): add support for .doc files using word-extractor (#7374)
* feat(FileStorage): add support for .doc files and integrate word-extractor * chore(package): add word-extractor to devdependencies
This commit is contained in:
parent
2350919f36
commit
a8e23966fa
@ -124,6 +124,7 @@
|
||||
"@types/react-infinite-scroll-component": "^5.0.0",
|
||||
"@types/react-window": "^1",
|
||||
"@types/tinycolor2": "^1",
|
||||
"@types/word-extractor": "^1",
|
||||
"@uiw/codemirror-extensions-langs": "^4.23.12",
|
||||
"@uiw/codemirror-themes-all": "^4.23.12",
|
||||
"@uiw/react-codemirror": "^4.23.12",
|
||||
@ -218,6 +219,7 @@
|
||||
"vite": "6.2.6",
|
||||
"vitest": "^3.1.4",
|
||||
"webdav": "^5.8.0",
|
||||
"word-extractor": "^1.0.4",
|
||||
"zipread": "^1.3.3"
|
||||
},
|
||||
"resolutions": {
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
export const imageExts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']
|
||||
export const videoExts = ['.mp4', '.avi', '.mov', '.wmv', '.flv', '.mkv']
|
||||
export const audioExts = ['.mp3', '.wav', '.ogg', '.flac', '.aac']
|
||||
export const documentExts = ['.pdf', '.docx', '.pptx', '.xlsx', '.odt', '.odp', '.ods']
|
||||
export const documentExts = ['.pdf', '.doc', '.docx', '.pptx', '.xlsx', '.odt', '.odp', '.ods']
|
||||
export const thirdPartyApplicationExts = ['.draftsExport']
|
||||
export const bookExts = ['.epub']
|
||||
const textExtsByCategory = new Map([
|
||||
|
||||
@ -16,6 +16,7 @@ const FILE_LOADER_MAP: Record<string, string> = {
|
||||
// 内置类型
|
||||
'.pdf': 'common',
|
||||
'.csv': 'common',
|
||||
'.doc': 'common',
|
||||
'.docx': 'common',
|
||||
'.pptx': 'common',
|
||||
'.xlsx': 'common',
|
||||
|
||||
@ -220,10 +220,21 @@ class FileStorage {
|
||||
public readFile = async (_: Electron.IpcMainInvokeEvent, id: string): Promise<string> => {
|
||||
const filePath = path.join(this.storageDir, id)
|
||||
|
||||
if (documentExts.includes(path.extname(filePath))) {
|
||||
const fileExtension = path.extname(filePath)
|
||||
|
||||
if (documentExts.includes(fileExtension)) {
|
||||
const originalCwd = process.cwd()
|
||||
try {
|
||||
chdir(this.tempDir)
|
||||
|
||||
if (fileExtension === '.doc') {
|
||||
const WordExtractor = require('word-extractor')
|
||||
const extractor = new WordExtractor()
|
||||
const extracted = await extractor.extract(filePath)
|
||||
chdir(originalCwd)
|
||||
return extracted.getBody()
|
||||
}
|
||||
|
||||
const data = await officeParser.parseOfficeAsync(filePath)
|
||||
chdir(originalCwd)
|
||||
return data
|
||||
|
||||
@ -92,6 +92,7 @@ describe('file', () => {
|
||||
it('should return DOCUMENT for document extensions', () => {
|
||||
expect(getFileType('.pdf')).toBe(FileTypes.DOCUMENT)
|
||||
expect(getFileType('.pptx')).toBe(FileTypes.DOCUMENT)
|
||||
expect(getFileType('.doc')).toBe(FileTypes.DOCUMENT)
|
||||
expect(getFileType('.docx')).toBe(FileTypes.DOCUMENT)
|
||||
expect(getFileType('.xlsx')).toBe(FileTypes.DOCUMENT)
|
||||
expect(getFileType('.odt')).toBe(FileTypes.DOCUMENT)
|
||||
|
||||
30
yarn.lock
30
yarn.lock
@ -4754,6 +4754,15 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@types/word-extractor@npm:^1":
|
||||
version: 1.0.6
|
||||
resolution: "@types/word-extractor@npm:1.0.6"
|
||||
dependencies:
|
||||
"@types/node": "npm:*"
|
||||
checksum: 10c0/84f89c458213db5aec4d6badad14e0f2c07ac4b92f16165d19a95548f2b98fd5fff00419d49547464cb75c9432b5e9cb3b452d75eb5f07d808e31b44be390453
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@types/ws@npm:^8.5.4":
|
||||
version: 8.18.1
|
||||
resolution: "@types/ws@npm:8.18.1"
|
||||
@ -5642,6 +5651,7 @@ __metadata:
|
||||
"@types/react-infinite-scroll-component": "npm:^5.0.0"
|
||||
"@types/react-window": "npm:^1"
|
||||
"@types/tinycolor2": "npm:^1"
|
||||
"@types/word-extractor": "npm:^1"
|
||||
"@uiw/codemirror-extensions-langs": "npm:^4.23.12"
|
||||
"@uiw/codemirror-themes-all": "npm:^4.23.12"
|
||||
"@uiw/react-codemirror": "npm:^4.23.12"
|
||||
@ -5742,6 +5752,7 @@ __metadata:
|
||||
vite: "npm:6.2.6"
|
||||
vitest: "npm:^3.1.4"
|
||||
webdav: "npm:^5.8.0"
|
||||
word-extractor: "npm:^1.0.4"
|
||||
zipread: "npm:^1.3.3"
|
||||
languageName: unknown
|
||||
linkType: soft
|
||||
@ -16428,6 +16439,15 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"saxes@npm:^5.0.1":
|
||||
version: 5.0.1
|
||||
resolution: "saxes@npm:5.0.1"
|
||||
dependencies:
|
||||
xmlchars: "npm:^2.2.0"
|
||||
checksum: 10c0/b7476c41dbe1c3a89907d2546fecfba234de5e66743ef914cde2603f47b19bed09732ab51b528ad0f98b958369d8be72b6f5af5c9cfad69972a73d061f0b3952
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"saxes@npm:^6.0.0":
|
||||
version: 6.0.0
|
||||
resolution: "saxes@npm:6.0.0"
|
||||
@ -18632,6 +18652,16 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"word-extractor@npm:^1.0.4":
|
||||
version: 1.0.4
|
||||
resolution: "word-extractor@npm:1.0.4"
|
||||
dependencies:
|
||||
saxes: "npm:^5.0.1"
|
||||
yauzl: "npm:^2.10.0"
|
||||
checksum: 10c0/f8c6b4f9278802d0c803479c1441713e351e67f7b0d2f85bd8cbe94b76298d4adb058b5f23ee0a01faa02f3b1f01c507a4a2f44fa39cfcbd498a51769dd9e8e7
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"word-wrap@npm:^1.2.5":
|
||||
version: 1.2.5
|
||||
resolution: "word-wrap@npm:1.2.5"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user