diff --git a/src/main/services/FileStorage.ts b/src/main/services/FileStorage.ts index 3165fcf27e..c8eb6abb03 100644 --- a/src/main/services/FileStorage.ts +++ b/src/main/services/FileStorage.ts @@ -478,13 +478,16 @@ class FileStorage { } } - public readFile = async ( - _: Electron.IpcMainInvokeEvent, - id: string, - detectEncoding: boolean = false - ): Promise => { - const filePath = path.join(this.storageDir, id) - + /** + * Core file reading logic that handles both documents and text files. + * + * @private + * @param filePath - Full path to the file + * @param detectEncoding - Whether to auto-detect text file encoding + * @returns Promise resolving to the extracted text content + * @throws Error if file reading fails + */ + private async readFileCore(filePath: string, detectEncoding: boolean = false): Promise { const fileExtension = path.extname(filePath) if (documentExts.includes(fileExtension)) { @@ -504,7 +507,7 @@ class FileStorage { return data } catch (error) { chdir(originalCwd) - logger.error('Failed to read file:', error as Error) + logger.error('Failed to read document file:', error as Error) throw error } } @@ -516,11 +519,72 @@ class FileStorage { return fs.readFileSync(filePath, 'utf-8') } } catch (error) { - logger.error('Failed to read file:', error as Error) + logger.error('Failed to read text file:', error as Error) throw new Error(`Failed to read file: ${filePath}.`) } } + /** + * Reads and extracts content from a stored file. + * + * Supports multiple file formats including: + * - Complex documents: .pdf, .doc, .docx, .pptx, .xlsx, .odt, .odp, .ods + * - Text files: .txt, .md, .json, .csv, etc. + * - Code files: .js, .ts, .py, .java, etc. + * + * For document formats, extracts text content using specialized parsers: + * - .doc files: Uses word-extractor library + * - Other Office formats: Uses officeparser library + * + * For text files, can optionally detect encoding automatically. + * + * @param _ - Electron IPC invoke event (unused) + * @param id - File identifier with extension (e.g., "uuid.docx") + * @param detectEncoding - Whether to auto-detect text file encoding (default: false) + * @returns Promise resolving to the extracted text content of the file + * @throws Error if file reading fails or file is not found + * + * @example + * // Read a DOCX file + * const content = await readFile(event, "document.docx"); + * + * @example + * // Read a text file with encoding detection + * const content = await readFile(event, "text.txt", true); + * + * @example + * // Read a PDF file + * const content = await readFile(event, "manual.pdf"); + */ + public readFile = async ( + _: Electron.IpcMainInvokeEvent, + id: string, + detectEncoding: boolean = false + ): Promise => { + const filePath = path.join(this.storageDir, id) + return this.readFileCore(filePath, detectEncoding) + } + + /** + * Reads and extracts content from an external file path. + * + * Similar to readFile, but operates on external file paths instead of stored files. + * Supports the same file formats including complex documents and text files. + * + * @param _ - Electron IPC invoke event (unused) + * @param filePath - Absolute path to the external file + * @param detectEncoding - Whether to auto-detect text file encoding (default: false) + * @returns Promise resolving to the extracted text content of the file + * @throws Error if file does not exist or reading fails + * + * @example + * // Read an external DOCX file + * const content = await readExternalFile(event, "/path/to/document.docx"); + * + * @example + * // Read an external text file with encoding detection + * const content = await readExternalFile(event, "/path/to/text.txt", true); + */ public readExternalFile = async ( _: Electron.IpcMainInvokeEvent, filePath: string, @@ -530,40 +594,7 @@ class FileStorage { throw new Error(`File does not exist: ${filePath}`) } - const fileExtension = path.extname(filePath) - - if (documentExts.includes(fileExtension)) { - const originalCwd = process.cwd() - try { - chdir(this.tempDir) - - if (fileExtension === '.doc') { - const extractor = new WordExtractor() - const extracted = await extractor.extract(filePath) - chdir(originalCwd) - return extracted.getBody() - } - - const data = await officeParser.parseOfficeAsync(filePath) - chdir(originalCwd) - return data - } catch (error) { - chdir(originalCwd) - logger.error('Failed to read file:', error as Error) - throw error - } - } - - try { - if (detectEncoding) { - return readTextFileWithAutoEncoding(filePath) - } else { - return fs.readFileSync(filePath, 'utf-8') - } - } catch (error) { - logger.error('Failed to read file:', error as Error) - throw new Error(`Failed to read file: ${filePath}.`) - } + return this.readFileCore(filePath, detectEncoding) } public createTempFile = async (_: Electron.IpcMainInvokeEvent, fileName: string): Promise => { diff --git a/src/renderer/src/pages/translate/TranslatePage.tsx b/src/renderer/src/pages/translate/TranslatePage.tsx index 61b49893fa..dd47d41c9b 100644 --- a/src/renderer/src/pages/translate/TranslatePage.tsx +++ b/src/renderer/src/pages/translate/TranslatePage.tsx @@ -39,6 +39,7 @@ import { detectLanguage, determineTargetLanguage } from '@renderer/utils/translate' +import { documentExts } from '@shared/config/constant' import { imageExts, MB, textExts } from '@shared/config/constant' import { Button, Flex, FloatButton, Popover, Tooltip, Typography } from 'antd' import type { TextAreaRef } from 'antd/es/input/TextArea' @@ -66,7 +67,7 @@ const TranslatePage: FC = () => { const { prompt, getLanguageByLangcode, settings } = useTranslate() const { autoCopy } = settings const { shikiMarkdownIt } = useCodeStyle() - const { onSelectFile, selecting, clearFiles } = useFiles({ extensions: [...imageExts, ...textExts] }) + const { onSelectFile, selecting, clearFiles } = useFiles({ extensions: [...imageExts, ...textExts, ...documentExts] }) const { ocr } = useOcr() const { setTimeoutTimer } = useTimer() @@ -484,33 +485,56 @@ const TranslatePage: FC = () => { const readFile = useCallback( async (file: FileMetadata) => { const _readFile = async () => { - let isText: boolean try { - // 检查文件是否为文本文件 - isText = await isTextFile(file.path) - } catch (e) { - logger.error('Failed to check if file is text.', e as Error) - window.toast.error(t('translate.files.error.check_type') + ': ' + formatErrorMessage(e)) - return - } + const fileExtension = getFileExtension(file.path) - if (!isText) { - window.toast.error(t('common.file.not_supported', { type: getFileExtension(file.path) })) - logger.error('Unsupported file type.') - return - } + // Check if file is supported format (text file or document file) + let isText: boolean + const isDocument: boolean = documentExts.includes(fileExtension) - // the threshold may be too large - if (file.size > 5 * MB) { - window.toast.error(t('translate.files.error.too_large') + ' (0 ~ 5 MB)') - } else { + if (!isDocument) { + try { + // For non-document files, check if it's a text file + isText = await isTextFile(file.path) + } catch (e) { + logger.error('Failed to check file type.', e as Error) + window.toast.error(t('translate.files.error.check_type') + ': ' + formatErrorMessage(e)) + return + } + } else { + isText = false + } + + if (!isText && !isDocument) { + window.toast.error(t('common.file.not_supported', { type: fileExtension })) + logger.error('Unsupported file type.') + return + } + + // File size check - document files allowed to be larger + const maxSize = isDocument ? 20 * MB : 5 * MB + if (file.size > maxSize) { + window.toast.error(t('translate.files.error.too_large') + ` (0 ~ ${maxSize / MB} MB)`) + return + } + + let result: string try { - const result = await window.api.fs.readText(file.path) + if (isDocument) { + // Use the new document reading API + result = await window.api.file.readExternal(file.path, true) + } else { + // Read text file + result = await window.api.fs.readText(file.path) + } setText(text + result) } catch (e) { - logger.error('Failed to read text file.', e as Error) + logger.error('Failed to read file.', e as Error) window.toast.error(t('translate.files.error.unknown') + ': ' + formatErrorMessage(e)) } + } catch (e) { + logger.error('Failed to read file.', e as Error) + window.toast.error(t('translate.files.error.unknown') + ': ' + formatErrorMessage(e)) } } const promise = _readFile()