feat(translate): support document files and refactor file reading logic (#11615)

* refactor(FileStorage): extract file reading logic into reusable method

Move common file reading functionality from readFile and readExternalFile into a new private readFileCore method
Improve error logging by distinguishing between document and text file failures
Add comprehensive JSDoc documentation for all file reading methods

* feat(translate): support document files and increase size limit

Add support for document file types in translation file selection. Increase maximum file size limit to 20MB for documents while keeping text files at 5MB. Implement separate handling for document and text file reading.
This commit is contained in:
Phantom 2025-12-05 13:56:54 +08:00 committed by GitHub
parent 92bb05950d
commit ea36b918f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 118 additions and 63 deletions

View File

@ -478,13 +478,16 @@ class FileStorage {
} }
} }
public readFile = async ( /**
_: Electron.IpcMainInvokeEvent, * Core file reading logic that handles both documents and text files.
id: string, *
detectEncoding: boolean = false * @private
): Promise<string> => { * @param filePath - Full path to the file
const filePath = path.join(this.storageDir, id) * @param detectEncoding - Whether to auto-detect text file encoding
* @returns Promise resolving to the extracted text content
* @throws Error if file reading fails
*/
private async readFileCore(filePath: string, detectEncoding: boolean = false): Promise<string> {
const fileExtension = path.extname(filePath) const fileExtension = path.extname(filePath)
if (documentExts.includes(fileExtension)) { if (documentExts.includes(fileExtension)) {
@ -504,7 +507,7 @@ class FileStorage {
return data return data
} catch (error) { } catch (error) {
chdir(originalCwd) chdir(originalCwd)
logger.error('Failed to read file:', error as Error) logger.error('Failed to read document file:', error as Error)
throw error throw error
} }
} }
@ -516,11 +519,72 @@ class FileStorage {
return fs.readFileSync(filePath, 'utf-8') return fs.readFileSync(filePath, 'utf-8')
} }
} catch (error) { } catch (error) {
logger.error('Failed to read file:', error as Error) logger.error('Failed to read text file:', error as Error)
throw new Error(`Failed to read file: ${filePath}.`) throw new Error(`Failed to read file: ${filePath}.`)
} }
} }
/**
* Reads and extracts content from a stored file.
*
* Supports multiple file formats including:
* - Complex documents: .pdf, .doc, .docx, .pptx, .xlsx, .odt, .odp, .ods
* - Text files: .txt, .md, .json, .csv, etc.
* - Code files: .js, .ts, .py, .java, etc.
*
* For document formats, extracts text content using specialized parsers:
* - .doc files: Uses word-extractor library
* - Other Office formats: Uses officeparser library
*
* For text files, can optionally detect encoding automatically.
*
* @param _ - Electron IPC invoke event (unused)
* @param id - File identifier with extension (e.g., "uuid.docx")
* @param detectEncoding - Whether to auto-detect text file encoding (default: false)
* @returns Promise resolving to the extracted text content of the file
* @throws Error if file reading fails or file is not found
*
* @example
* // Read a DOCX file
* const content = await readFile(event, "document.docx");
*
* @example
* // Read a text file with encoding detection
* const content = await readFile(event, "text.txt", true);
*
* @example
* // Read a PDF file
* const content = await readFile(event, "manual.pdf");
*/
public readFile = async (
_: Electron.IpcMainInvokeEvent,
id: string,
detectEncoding: boolean = false
): Promise<string> => {
const filePath = path.join(this.storageDir, id)
return this.readFileCore(filePath, detectEncoding)
}
/**
* Reads and extracts content from an external file path.
*
* Similar to readFile, but operates on external file paths instead of stored files.
* Supports the same file formats including complex documents and text files.
*
* @param _ - Electron IPC invoke event (unused)
* @param filePath - Absolute path to the external file
* @param detectEncoding - Whether to auto-detect text file encoding (default: false)
* @returns Promise resolving to the extracted text content of the file
* @throws Error if file does not exist or reading fails
*
* @example
* // Read an external DOCX file
* const content = await readExternalFile(event, "/path/to/document.docx");
*
* @example
* // Read an external text file with encoding detection
* const content = await readExternalFile(event, "/path/to/text.txt", true);
*/
public readExternalFile = async ( public readExternalFile = async (
_: Electron.IpcMainInvokeEvent, _: Electron.IpcMainInvokeEvent,
filePath: string, filePath: string,
@ -530,40 +594,7 @@ class FileStorage {
throw new Error(`File does not exist: ${filePath}`) throw new Error(`File does not exist: ${filePath}`)
} }
const fileExtension = path.extname(filePath) return this.readFileCore(filePath, detectEncoding)
if (documentExts.includes(fileExtension)) {
const originalCwd = process.cwd()
try {
chdir(this.tempDir)
if (fileExtension === '.doc') {
const extractor = new WordExtractor()
const extracted = await extractor.extract(filePath)
chdir(originalCwd)
return extracted.getBody()
}
const data = await officeParser.parseOfficeAsync(filePath)
chdir(originalCwd)
return data
} catch (error) {
chdir(originalCwd)
logger.error('Failed to read file:', error as Error)
throw error
}
}
try {
if (detectEncoding) {
return readTextFileWithAutoEncoding(filePath)
} else {
return fs.readFileSync(filePath, 'utf-8')
}
} catch (error) {
logger.error('Failed to read file:', error as Error)
throw new Error(`Failed to read file: ${filePath}.`)
}
} }
public createTempFile = async (_: Electron.IpcMainInvokeEvent, fileName: string): Promise<string> => { public createTempFile = async (_: Electron.IpcMainInvokeEvent, fileName: string): Promise<string> => {

View File

@ -39,6 +39,7 @@ import {
detectLanguage, detectLanguage,
determineTargetLanguage determineTargetLanguage
} from '@renderer/utils/translate' } from '@renderer/utils/translate'
import { documentExts } from '@shared/config/constant'
import { imageExts, MB, textExts } from '@shared/config/constant' import { imageExts, MB, textExts } from '@shared/config/constant'
import { Button, Flex, FloatButton, Popover, Tooltip, Typography } from 'antd' import { Button, Flex, FloatButton, Popover, Tooltip, Typography } from 'antd'
import type { TextAreaRef } from 'antd/es/input/TextArea' import type { TextAreaRef } from 'antd/es/input/TextArea'
@ -66,7 +67,7 @@ const TranslatePage: FC = () => {
const { prompt, getLanguageByLangcode, settings } = useTranslate() const { prompt, getLanguageByLangcode, settings } = useTranslate()
const { autoCopy } = settings const { autoCopy } = settings
const { shikiMarkdownIt } = useCodeStyle() const { shikiMarkdownIt } = useCodeStyle()
const { onSelectFile, selecting, clearFiles } = useFiles({ extensions: [...imageExts, ...textExts] }) const { onSelectFile, selecting, clearFiles } = useFiles({ extensions: [...imageExts, ...textExts, ...documentExts] })
const { ocr } = useOcr() const { ocr } = useOcr()
const { setTimeoutTimer } = useTimer() const { setTimeoutTimer } = useTimer()
@ -484,33 +485,56 @@ const TranslatePage: FC = () => {
const readFile = useCallback( const readFile = useCallback(
async (file: FileMetadata) => { async (file: FileMetadata) => {
const _readFile = async () => { const _readFile = async () => {
let isText: boolean
try { try {
// 检查文件是否为文本文件 const fileExtension = getFileExtension(file.path)
// Check if file is supported format (text file or document file)
let isText: boolean
const isDocument: boolean = documentExts.includes(fileExtension)
if (!isDocument) {
try {
// For non-document files, check if it's a text file
isText = await isTextFile(file.path) isText = await isTextFile(file.path)
} catch (e) { } catch (e) {
logger.error('Failed to check if file is text.', e as Error) logger.error('Failed to check file type.', e as Error)
window.toast.error(t('translate.files.error.check_type') + ': ' + formatErrorMessage(e)) window.toast.error(t('translate.files.error.check_type') + ': ' + formatErrorMessage(e))
return return
} }
} else {
isText = false
}
if (!isText) { if (!isText && !isDocument) {
window.toast.error(t('common.file.not_supported', { type: getFileExtension(file.path) })) window.toast.error(t('common.file.not_supported', { type: fileExtension }))
logger.error('Unsupported file type.') logger.error('Unsupported file type.')
return return
} }
// the threshold may be too large // File size check - document files allowed to be larger
if (file.size > 5 * MB) { const maxSize = isDocument ? 20 * MB : 5 * MB
window.toast.error(t('translate.files.error.too_large') + ' (0 ~ 5 MB)') if (file.size > maxSize) {
} else { window.toast.error(t('translate.files.error.too_large') + ` (0 ~ ${maxSize / MB} MB)`)
return
}
let result: string
try { try {
const result = await window.api.fs.readText(file.path) if (isDocument) {
// Use the new document reading API
result = await window.api.file.readExternal(file.path, true)
} else {
// Read text file
result = await window.api.fs.readText(file.path)
}
setText(text + result) setText(text + result)
} catch (e) { } catch (e) {
logger.error('Failed to read text file.', e as Error) logger.error('Failed to read file.', e as Error)
window.toast.error(t('translate.files.error.unknown') + ': ' + formatErrorMessage(e)) window.toast.error(t('translate.files.error.unknown') + ': ' + formatErrorMessage(e))
} }
} catch (e) {
logger.error('Failed to read file.', e as Error)
window.toast.error(t('translate.files.error.unknown') + ': ' + formatErrorMessage(e))
} }
} }
const promise = _readFile() const promise = _readFile()