mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-20 23:22:05 +08:00
feat(FileStorage): add support for .doc files using word-extractor (#7374)
* feat(FileStorage): add support for .doc files and integrate word-extractor * chore(package): add word-extractor to devdependencies
This commit is contained in:
parent
2350919f36
commit
a8e23966fa
@ -124,6 +124,7 @@
|
|||||||
"@types/react-infinite-scroll-component": "^5.0.0",
|
"@types/react-infinite-scroll-component": "^5.0.0",
|
||||||
"@types/react-window": "^1",
|
"@types/react-window": "^1",
|
||||||
"@types/tinycolor2": "^1",
|
"@types/tinycolor2": "^1",
|
||||||
|
"@types/word-extractor": "^1",
|
||||||
"@uiw/codemirror-extensions-langs": "^4.23.12",
|
"@uiw/codemirror-extensions-langs": "^4.23.12",
|
||||||
"@uiw/codemirror-themes-all": "^4.23.12",
|
"@uiw/codemirror-themes-all": "^4.23.12",
|
||||||
"@uiw/react-codemirror": "^4.23.12",
|
"@uiw/react-codemirror": "^4.23.12",
|
||||||
@ -218,6 +219,7 @@
|
|||||||
"vite": "6.2.6",
|
"vite": "6.2.6",
|
||||||
"vitest": "^3.1.4",
|
"vitest": "^3.1.4",
|
||||||
"webdav": "^5.8.0",
|
"webdav": "^5.8.0",
|
||||||
|
"word-extractor": "^1.0.4",
|
||||||
"zipread": "^1.3.3"
|
"zipread": "^1.3.3"
|
||||||
},
|
},
|
||||||
"resolutions": {
|
"resolutions": {
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
export const imageExts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']
|
export const imageExts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']
|
||||||
export const videoExts = ['.mp4', '.avi', '.mov', '.wmv', '.flv', '.mkv']
|
export const videoExts = ['.mp4', '.avi', '.mov', '.wmv', '.flv', '.mkv']
|
||||||
export const audioExts = ['.mp3', '.wav', '.ogg', '.flac', '.aac']
|
export const audioExts = ['.mp3', '.wav', '.ogg', '.flac', '.aac']
|
||||||
export const documentExts = ['.pdf', '.docx', '.pptx', '.xlsx', '.odt', '.odp', '.ods']
|
export const documentExts = ['.pdf', '.doc', '.docx', '.pptx', '.xlsx', '.odt', '.odp', '.ods']
|
||||||
export const thirdPartyApplicationExts = ['.draftsExport']
|
export const thirdPartyApplicationExts = ['.draftsExport']
|
||||||
export const bookExts = ['.epub']
|
export const bookExts = ['.epub']
|
||||||
const textExtsByCategory = new Map([
|
const textExtsByCategory = new Map([
|
||||||
|
|||||||
@ -16,6 +16,7 @@ const FILE_LOADER_MAP: Record<string, string> = {
|
|||||||
// 内置类型
|
// 内置类型
|
||||||
'.pdf': 'common',
|
'.pdf': 'common',
|
||||||
'.csv': 'common',
|
'.csv': 'common',
|
||||||
|
'.doc': 'common',
|
||||||
'.docx': 'common',
|
'.docx': 'common',
|
||||||
'.pptx': 'common',
|
'.pptx': 'common',
|
||||||
'.xlsx': 'common',
|
'.xlsx': 'common',
|
||||||
|
|||||||
@ -220,10 +220,21 @@ class FileStorage {
|
|||||||
public readFile = async (_: Electron.IpcMainInvokeEvent, id: string): Promise<string> => {
|
public readFile = async (_: Electron.IpcMainInvokeEvent, id: string): Promise<string> => {
|
||||||
const filePath = path.join(this.storageDir, id)
|
const filePath = path.join(this.storageDir, id)
|
||||||
|
|
||||||
if (documentExts.includes(path.extname(filePath))) {
|
const fileExtension = path.extname(filePath)
|
||||||
|
|
||||||
|
if (documentExts.includes(fileExtension)) {
|
||||||
const originalCwd = process.cwd()
|
const originalCwd = process.cwd()
|
||||||
try {
|
try {
|
||||||
chdir(this.tempDir)
|
chdir(this.tempDir)
|
||||||
|
|
||||||
|
if (fileExtension === '.doc') {
|
||||||
|
const WordExtractor = require('word-extractor')
|
||||||
|
const extractor = new WordExtractor()
|
||||||
|
const extracted = await extractor.extract(filePath)
|
||||||
|
chdir(originalCwd)
|
||||||
|
return extracted.getBody()
|
||||||
|
}
|
||||||
|
|
||||||
const data = await officeParser.parseOfficeAsync(filePath)
|
const data = await officeParser.parseOfficeAsync(filePath)
|
||||||
chdir(originalCwd)
|
chdir(originalCwd)
|
||||||
return data
|
return data
|
||||||
|
|||||||
@ -92,6 +92,7 @@ describe('file', () => {
|
|||||||
it('should return DOCUMENT for document extensions', () => {
|
it('should return DOCUMENT for document extensions', () => {
|
||||||
expect(getFileType('.pdf')).toBe(FileTypes.DOCUMENT)
|
expect(getFileType('.pdf')).toBe(FileTypes.DOCUMENT)
|
||||||
expect(getFileType('.pptx')).toBe(FileTypes.DOCUMENT)
|
expect(getFileType('.pptx')).toBe(FileTypes.DOCUMENT)
|
||||||
|
expect(getFileType('.doc')).toBe(FileTypes.DOCUMENT)
|
||||||
expect(getFileType('.docx')).toBe(FileTypes.DOCUMENT)
|
expect(getFileType('.docx')).toBe(FileTypes.DOCUMENT)
|
||||||
expect(getFileType('.xlsx')).toBe(FileTypes.DOCUMENT)
|
expect(getFileType('.xlsx')).toBe(FileTypes.DOCUMENT)
|
||||||
expect(getFileType('.odt')).toBe(FileTypes.DOCUMENT)
|
expect(getFileType('.odt')).toBe(FileTypes.DOCUMENT)
|
||||||
|
|||||||
30
yarn.lock
30
yarn.lock
@ -4754,6 +4754,15 @@ __metadata:
|
|||||||
languageName: node
|
languageName: node
|
||||||
linkType: hard
|
linkType: hard
|
||||||
|
|
||||||
|
"@types/word-extractor@npm:^1":
|
||||||
|
version: 1.0.6
|
||||||
|
resolution: "@types/word-extractor@npm:1.0.6"
|
||||||
|
dependencies:
|
||||||
|
"@types/node": "npm:*"
|
||||||
|
checksum: 10c0/84f89c458213db5aec4d6badad14e0f2c07ac4b92f16165d19a95548f2b98fd5fff00419d49547464cb75c9432b5e9cb3b452d75eb5f07d808e31b44be390453
|
||||||
|
languageName: node
|
||||||
|
linkType: hard
|
||||||
|
|
||||||
"@types/ws@npm:^8.5.4":
|
"@types/ws@npm:^8.5.4":
|
||||||
version: 8.18.1
|
version: 8.18.1
|
||||||
resolution: "@types/ws@npm:8.18.1"
|
resolution: "@types/ws@npm:8.18.1"
|
||||||
@ -5642,6 +5651,7 @@ __metadata:
|
|||||||
"@types/react-infinite-scroll-component": "npm:^5.0.0"
|
"@types/react-infinite-scroll-component": "npm:^5.0.0"
|
||||||
"@types/react-window": "npm:^1"
|
"@types/react-window": "npm:^1"
|
||||||
"@types/tinycolor2": "npm:^1"
|
"@types/tinycolor2": "npm:^1"
|
||||||
|
"@types/word-extractor": "npm:^1"
|
||||||
"@uiw/codemirror-extensions-langs": "npm:^4.23.12"
|
"@uiw/codemirror-extensions-langs": "npm:^4.23.12"
|
||||||
"@uiw/codemirror-themes-all": "npm:^4.23.12"
|
"@uiw/codemirror-themes-all": "npm:^4.23.12"
|
||||||
"@uiw/react-codemirror": "npm:^4.23.12"
|
"@uiw/react-codemirror": "npm:^4.23.12"
|
||||||
@ -5742,6 +5752,7 @@ __metadata:
|
|||||||
vite: "npm:6.2.6"
|
vite: "npm:6.2.6"
|
||||||
vitest: "npm:^3.1.4"
|
vitest: "npm:^3.1.4"
|
||||||
webdav: "npm:^5.8.0"
|
webdav: "npm:^5.8.0"
|
||||||
|
word-extractor: "npm:^1.0.4"
|
||||||
zipread: "npm:^1.3.3"
|
zipread: "npm:^1.3.3"
|
||||||
languageName: unknown
|
languageName: unknown
|
||||||
linkType: soft
|
linkType: soft
|
||||||
@ -16428,6 +16439,15 @@ __metadata:
|
|||||||
languageName: node
|
languageName: node
|
||||||
linkType: hard
|
linkType: hard
|
||||||
|
|
||||||
|
"saxes@npm:^5.0.1":
|
||||||
|
version: 5.0.1
|
||||||
|
resolution: "saxes@npm:5.0.1"
|
||||||
|
dependencies:
|
||||||
|
xmlchars: "npm:^2.2.0"
|
||||||
|
checksum: 10c0/b7476c41dbe1c3a89907d2546fecfba234de5e66743ef914cde2603f47b19bed09732ab51b528ad0f98b958369d8be72b6f5af5c9cfad69972a73d061f0b3952
|
||||||
|
languageName: node
|
||||||
|
linkType: hard
|
||||||
|
|
||||||
"saxes@npm:^6.0.0":
|
"saxes@npm:^6.0.0":
|
||||||
version: 6.0.0
|
version: 6.0.0
|
||||||
resolution: "saxes@npm:6.0.0"
|
resolution: "saxes@npm:6.0.0"
|
||||||
@ -18632,6 +18652,16 @@ __metadata:
|
|||||||
languageName: node
|
languageName: node
|
||||||
linkType: hard
|
linkType: hard
|
||||||
|
|
||||||
|
"word-extractor@npm:^1.0.4":
|
||||||
|
version: 1.0.4
|
||||||
|
resolution: "word-extractor@npm:1.0.4"
|
||||||
|
dependencies:
|
||||||
|
saxes: "npm:^5.0.1"
|
||||||
|
yauzl: "npm:^2.10.0"
|
||||||
|
checksum: 10c0/f8c6b4f9278802d0c803479c1441713e351e67f7b0d2f85bd8cbe94b76298d4adb058b5f23ee0a01faa02f3b1f01c507a4a2f44fa39cfcbd498a51769dd9e8e7
|
||||||
|
languageName: node
|
||||||
|
linkType: hard
|
||||||
|
|
||||||
"word-wrap@npm:^1.2.5":
|
"word-wrap@npm:^1.2.5":
|
||||||
version: 1.2.5
|
version: 1.2.5
|
||||||
resolution: "word-wrap@npm:1.2.5"
|
resolution: "word-wrap@npm:1.2.5"
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user