From ea36b918f1c37c95b74292f3c923d6551896263c Mon Sep 17 00:00:00 2001 From: Phantom Date: Fri, 5 Dec 2025 13:56:54 +0800 Subject: [PATCH] feat(translate): support document files and refactor file reading logic (#11615) * refactor(FileStorage): extract file reading logic into reusable method Move common file reading functionality from readFile and readExternalFile into a new private readFileCore method Improve error logging by distinguishing between document and text file failures Add comprehensive JSDoc documentation for all file reading methods * feat(translate): support document files and increase size limit Add support for document file types in translation file selection. Increase maximum file size limit to 20MB for documents while keeping text files at 5MB. Implement separate handling for document and text file reading. --- src/main/services/FileStorage.ts | 117 +++++++++++------- .../src/pages/translate/TranslatePage.tsx | 64 +++++++--- 2 files changed, 118 insertions(+), 63 deletions(-) diff --git a/src/main/services/FileStorage.ts b/src/main/services/FileStorage.ts index 3165fcf27e..c8eb6abb03 100644 --- a/src/main/services/FileStorage.ts +++ b/src/main/services/FileStorage.ts @@ -478,13 +478,16 @@ class FileStorage { } } - public readFile = async ( - _: Electron.IpcMainInvokeEvent, - id: string, - detectEncoding: boolean = false - ): Promise => { - const filePath = path.join(this.storageDir, id) - + /** + * Core file reading logic that handles both documents and text files. + * + * @private + * @param filePath - Full path to the file + * @param detectEncoding - Whether to auto-detect text file encoding + * @returns Promise resolving to the extracted text content + * @throws Error if file reading fails + */ + private async readFileCore(filePath: string, detectEncoding: boolean = false): Promise { const fileExtension = path.extname(filePath) if (documentExts.includes(fileExtension)) { @@ -504,7 +507,7 @@ class FileStorage { return data } catch (error) { chdir(originalCwd) - logger.error('Failed to read file:', error as Error) + logger.error('Failed to read document file:', error as Error) throw error } } @@ -516,11 +519,72 @@ class FileStorage { return fs.readFileSync(filePath, 'utf-8') } } catch (error) { - logger.error('Failed to read file:', error as Error) + logger.error('Failed to read text file:', error as Error) throw new Error(`Failed to read file: ${filePath}.`) } } + /** + * Reads and extracts content from a stored file. + * + * Supports multiple file formats including: + * - Complex documents: .pdf, .doc, .docx, .pptx, .xlsx, .odt, .odp, .ods + * - Text files: .txt, .md, .json, .csv, etc. + * - Code files: .js, .ts, .py, .java, etc. + * + * For document formats, extracts text content using specialized parsers: + * - .doc files: Uses word-extractor library + * - Other Office formats: Uses officeparser library + * + * For text files, can optionally detect encoding automatically. + * + * @param _ - Electron IPC invoke event (unused) + * @param id - File identifier with extension (e.g., "uuid.docx") + * @param detectEncoding - Whether to auto-detect text file encoding (default: false) + * @returns Promise resolving to the extracted text content of the file + * @throws Error if file reading fails or file is not found + * + * @example + * // Read a DOCX file + * const content = await readFile(event, "document.docx"); + * + * @example + * // Read a text file with encoding detection + * const content = await readFile(event, "text.txt", true); + * + * @example + * // Read a PDF file + * const content = await readFile(event, "manual.pdf"); + */ + public readFile = async ( + _: Electron.IpcMainInvokeEvent, + id: string, + detectEncoding: boolean = false + ): Promise => { + const filePath = path.join(this.storageDir, id) + return this.readFileCore(filePath, detectEncoding) + } + + /** + * Reads and extracts content from an external file path. + * + * Similar to readFile, but operates on external file paths instead of stored files. + * Supports the same file formats including complex documents and text files. + * + * @param _ - Electron IPC invoke event (unused) + * @param filePath - Absolute path to the external file + * @param detectEncoding - Whether to auto-detect text file encoding (default: false) + * @returns Promise resolving to the extracted text content of the file + * @throws Error if file does not exist or reading fails + * + * @example + * // Read an external DOCX file + * const content = await readExternalFile(event, "/path/to/document.docx"); + * + * @example + * // Read an external text file with encoding detection + * const content = await readExternalFile(event, "/path/to/text.txt", true); + */ public readExternalFile = async ( _: Electron.IpcMainInvokeEvent, filePath: string, @@ -530,40 +594,7 @@ class FileStorage { throw new Error(`File does not exist: ${filePath}`) } - const fileExtension = path.extname(filePath) - - if (documentExts.includes(fileExtension)) { - const originalCwd = process.cwd() - try { - chdir(this.tempDir) - - if (fileExtension === '.doc') { - const extractor = new WordExtractor() - const extracted = await extractor.extract(filePath) - chdir(originalCwd) - return extracted.getBody() - } - - const data = await officeParser.parseOfficeAsync(filePath) - chdir(originalCwd) - return data - } catch (error) { - chdir(originalCwd) - logger.error('Failed to read file:', error as Error) - throw error - } - } - - try { - if (detectEncoding) { - return readTextFileWithAutoEncoding(filePath) - } else { - return fs.readFileSync(filePath, 'utf-8') - } - } catch (error) { - logger.error('Failed to read file:', error as Error) - throw new Error(`Failed to read file: ${filePath}.`) - } + return this.readFileCore(filePath, detectEncoding) } public createTempFile = async (_: Electron.IpcMainInvokeEvent, fileName: string): Promise => { diff --git a/src/renderer/src/pages/translate/TranslatePage.tsx b/src/renderer/src/pages/translate/TranslatePage.tsx index 61b49893fa..dd47d41c9b 100644 --- a/src/renderer/src/pages/translate/TranslatePage.tsx +++ b/src/renderer/src/pages/translate/TranslatePage.tsx @@ -39,6 +39,7 @@ import { detectLanguage, determineTargetLanguage } from '@renderer/utils/translate' +import { documentExts } from '@shared/config/constant' import { imageExts, MB, textExts } from '@shared/config/constant' import { Button, Flex, FloatButton, Popover, Tooltip, Typography } from 'antd' import type { TextAreaRef } from 'antd/es/input/TextArea' @@ -66,7 +67,7 @@ const TranslatePage: FC = () => { const { prompt, getLanguageByLangcode, settings } = useTranslate() const { autoCopy } = settings const { shikiMarkdownIt } = useCodeStyle() - const { onSelectFile, selecting, clearFiles } = useFiles({ extensions: [...imageExts, ...textExts] }) + const { onSelectFile, selecting, clearFiles } = useFiles({ extensions: [...imageExts, ...textExts, ...documentExts] }) const { ocr } = useOcr() const { setTimeoutTimer } = useTimer() @@ -484,33 +485,56 @@ const TranslatePage: FC = () => { const readFile = useCallback( async (file: FileMetadata) => { const _readFile = async () => { - let isText: boolean try { - // 检查文件是否为文本文件 - isText = await isTextFile(file.path) - } catch (e) { - logger.error('Failed to check if file is text.', e as Error) - window.toast.error(t('translate.files.error.check_type') + ': ' + formatErrorMessage(e)) - return - } + const fileExtension = getFileExtension(file.path) - if (!isText) { - window.toast.error(t('common.file.not_supported', { type: getFileExtension(file.path) })) - logger.error('Unsupported file type.') - return - } + // Check if file is supported format (text file or document file) + let isText: boolean + const isDocument: boolean = documentExts.includes(fileExtension) - // the threshold may be too large - if (file.size > 5 * MB) { - window.toast.error(t('translate.files.error.too_large') + ' (0 ~ 5 MB)') - } else { + if (!isDocument) { + try { + // For non-document files, check if it's a text file + isText = await isTextFile(file.path) + } catch (e) { + logger.error('Failed to check file type.', e as Error) + window.toast.error(t('translate.files.error.check_type') + ': ' + formatErrorMessage(e)) + return + } + } else { + isText = false + } + + if (!isText && !isDocument) { + window.toast.error(t('common.file.not_supported', { type: fileExtension })) + logger.error('Unsupported file type.') + return + } + + // File size check - document files allowed to be larger + const maxSize = isDocument ? 20 * MB : 5 * MB + if (file.size > maxSize) { + window.toast.error(t('translate.files.error.too_large') + ` (0 ~ ${maxSize / MB} MB)`) + return + } + + let result: string try { - const result = await window.api.fs.readText(file.path) + if (isDocument) { + // Use the new document reading API + result = await window.api.file.readExternal(file.path, true) + } else { + // Read text file + result = await window.api.fs.readText(file.path) + } setText(text + result) } catch (e) { - logger.error('Failed to read text file.', e as Error) + logger.error('Failed to read file.', e as Error) window.toast.error(t('translate.files.error.unknown') + ': ' + formatErrorMessage(e)) } + } catch (e) { + logger.error('Failed to read file.', e as Error) + window.toast.error(t('translate.files.error.unknown') + ': ' + formatErrorMessage(e)) } } const promise = _readFile()