mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-19 14:41:24 +08:00
feat(translate): support document files and refactor file reading logic (#11615)
* refactor(FileStorage): extract file reading logic into reusable method Move common file reading functionality from readFile and readExternalFile into a new private readFileCore method Improve error logging by distinguishing between document and text file failures Add comprehensive JSDoc documentation for all file reading methods * feat(translate): support document files and increase size limit Add support for document file types in translation file selection. Increase maximum file size limit to 20MB for documents while keeping text files at 5MB. Implement separate handling for document and text file reading.
This commit is contained in:
parent
92bb05950d
commit
ea36b918f1
@ -478,13 +478,16 @@ class FileStorage {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public readFile = async (
|
/**
|
||||||
_: Electron.IpcMainInvokeEvent,
|
* Core file reading logic that handles both documents and text files.
|
||||||
id: string,
|
*
|
||||||
detectEncoding: boolean = false
|
* @private
|
||||||
): Promise<string> => {
|
* @param filePath - Full path to the file
|
||||||
const filePath = path.join(this.storageDir, id)
|
* @param detectEncoding - Whether to auto-detect text file encoding
|
||||||
|
* @returns Promise resolving to the extracted text content
|
||||||
|
* @throws Error if file reading fails
|
||||||
|
*/
|
||||||
|
private async readFileCore(filePath: string, detectEncoding: boolean = false): Promise<string> {
|
||||||
const fileExtension = path.extname(filePath)
|
const fileExtension = path.extname(filePath)
|
||||||
|
|
||||||
if (documentExts.includes(fileExtension)) {
|
if (documentExts.includes(fileExtension)) {
|
||||||
@ -504,7 +507,7 @@ class FileStorage {
|
|||||||
return data
|
return data
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
chdir(originalCwd)
|
chdir(originalCwd)
|
||||||
logger.error('Failed to read file:', error as Error)
|
logger.error('Failed to read document file:', error as Error)
|
||||||
throw error
|
throw error
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -516,11 +519,72 @@ class FileStorage {
|
|||||||
return fs.readFileSync(filePath, 'utf-8')
|
return fs.readFileSync(filePath, 'utf-8')
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error('Failed to read file:', error as Error)
|
logger.error('Failed to read text file:', error as Error)
|
||||||
throw new Error(`Failed to read file: ${filePath}.`)
|
throw new Error(`Failed to read file: ${filePath}.`)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads and extracts content from a stored file.
|
||||||
|
*
|
||||||
|
* Supports multiple file formats including:
|
||||||
|
* - Complex documents: .pdf, .doc, .docx, .pptx, .xlsx, .odt, .odp, .ods
|
||||||
|
* - Text files: .txt, .md, .json, .csv, etc.
|
||||||
|
* - Code files: .js, .ts, .py, .java, etc.
|
||||||
|
*
|
||||||
|
* For document formats, extracts text content using specialized parsers:
|
||||||
|
* - .doc files: Uses word-extractor library
|
||||||
|
* - Other Office formats: Uses officeparser library
|
||||||
|
*
|
||||||
|
* For text files, can optionally detect encoding automatically.
|
||||||
|
*
|
||||||
|
* @param _ - Electron IPC invoke event (unused)
|
||||||
|
* @param id - File identifier with extension (e.g., "uuid.docx")
|
||||||
|
* @param detectEncoding - Whether to auto-detect text file encoding (default: false)
|
||||||
|
* @returns Promise resolving to the extracted text content of the file
|
||||||
|
* @throws Error if file reading fails or file is not found
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* // Read a DOCX file
|
||||||
|
* const content = await readFile(event, "document.docx");
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* // Read a text file with encoding detection
|
||||||
|
* const content = await readFile(event, "text.txt", true);
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* // Read a PDF file
|
||||||
|
* const content = await readFile(event, "manual.pdf");
|
||||||
|
*/
|
||||||
|
public readFile = async (
|
||||||
|
_: Electron.IpcMainInvokeEvent,
|
||||||
|
id: string,
|
||||||
|
detectEncoding: boolean = false
|
||||||
|
): Promise<string> => {
|
||||||
|
const filePath = path.join(this.storageDir, id)
|
||||||
|
return this.readFileCore(filePath, detectEncoding)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads and extracts content from an external file path.
|
||||||
|
*
|
||||||
|
* Similar to readFile, but operates on external file paths instead of stored files.
|
||||||
|
* Supports the same file formats including complex documents and text files.
|
||||||
|
*
|
||||||
|
* @param _ - Electron IPC invoke event (unused)
|
||||||
|
* @param filePath - Absolute path to the external file
|
||||||
|
* @param detectEncoding - Whether to auto-detect text file encoding (default: false)
|
||||||
|
* @returns Promise resolving to the extracted text content of the file
|
||||||
|
* @throws Error if file does not exist or reading fails
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* // Read an external DOCX file
|
||||||
|
* const content = await readExternalFile(event, "/path/to/document.docx");
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* // Read an external text file with encoding detection
|
||||||
|
* const content = await readExternalFile(event, "/path/to/text.txt", true);
|
||||||
|
*/
|
||||||
public readExternalFile = async (
|
public readExternalFile = async (
|
||||||
_: Electron.IpcMainInvokeEvent,
|
_: Electron.IpcMainInvokeEvent,
|
||||||
filePath: string,
|
filePath: string,
|
||||||
@ -530,40 +594,7 @@ class FileStorage {
|
|||||||
throw new Error(`File does not exist: ${filePath}`)
|
throw new Error(`File does not exist: ${filePath}`)
|
||||||
}
|
}
|
||||||
|
|
||||||
const fileExtension = path.extname(filePath)
|
return this.readFileCore(filePath, detectEncoding)
|
||||||
|
|
||||||
if (documentExts.includes(fileExtension)) {
|
|
||||||
const originalCwd = process.cwd()
|
|
||||||
try {
|
|
||||||
chdir(this.tempDir)
|
|
||||||
|
|
||||||
if (fileExtension === '.doc') {
|
|
||||||
const extractor = new WordExtractor()
|
|
||||||
const extracted = await extractor.extract(filePath)
|
|
||||||
chdir(originalCwd)
|
|
||||||
return extracted.getBody()
|
|
||||||
}
|
|
||||||
|
|
||||||
const data = await officeParser.parseOfficeAsync(filePath)
|
|
||||||
chdir(originalCwd)
|
|
||||||
return data
|
|
||||||
} catch (error) {
|
|
||||||
chdir(originalCwd)
|
|
||||||
logger.error('Failed to read file:', error as Error)
|
|
||||||
throw error
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (detectEncoding) {
|
|
||||||
return readTextFileWithAutoEncoding(filePath)
|
|
||||||
} else {
|
|
||||||
return fs.readFileSync(filePath, 'utf-8')
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
logger.error('Failed to read file:', error as Error)
|
|
||||||
throw new Error(`Failed to read file: ${filePath}.`)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public createTempFile = async (_: Electron.IpcMainInvokeEvent, fileName: string): Promise<string> => {
|
public createTempFile = async (_: Electron.IpcMainInvokeEvent, fileName: string): Promise<string> => {
|
||||||
|
|||||||
@ -39,6 +39,7 @@ import {
|
|||||||
detectLanguage,
|
detectLanguage,
|
||||||
determineTargetLanguage
|
determineTargetLanguage
|
||||||
} from '@renderer/utils/translate'
|
} from '@renderer/utils/translate'
|
||||||
|
import { documentExts } from '@shared/config/constant'
|
||||||
import { imageExts, MB, textExts } from '@shared/config/constant'
|
import { imageExts, MB, textExts } from '@shared/config/constant'
|
||||||
import { Button, Flex, FloatButton, Popover, Tooltip, Typography } from 'antd'
|
import { Button, Flex, FloatButton, Popover, Tooltip, Typography } from 'antd'
|
||||||
import type { TextAreaRef } from 'antd/es/input/TextArea'
|
import type { TextAreaRef } from 'antd/es/input/TextArea'
|
||||||
@ -66,7 +67,7 @@ const TranslatePage: FC = () => {
|
|||||||
const { prompt, getLanguageByLangcode, settings } = useTranslate()
|
const { prompt, getLanguageByLangcode, settings } = useTranslate()
|
||||||
const { autoCopy } = settings
|
const { autoCopy } = settings
|
||||||
const { shikiMarkdownIt } = useCodeStyle()
|
const { shikiMarkdownIt } = useCodeStyle()
|
||||||
const { onSelectFile, selecting, clearFiles } = useFiles({ extensions: [...imageExts, ...textExts] })
|
const { onSelectFile, selecting, clearFiles } = useFiles({ extensions: [...imageExts, ...textExts, ...documentExts] })
|
||||||
const { ocr } = useOcr()
|
const { ocr } = useOcr()
|
||||||
const { setTimeoutTimer } = useTimer()
|
const { setTimeoutTimer } = useTimer()
|
||||||
|
|
||||||
@ -484,33 +485,56 @@ const TranslatePage: FC = () => {
|
|||||||
const readFile = useCallback(
|
const readFile = useCallback(
|
||||||
async (file: FileMetadata) => {
|
async (file: FileMetadata) => {
|
||||||
const _readFile = async () => {
|
const _readFile = async () => {
|
||||||
let isText: boolean
|
|
||||||
try {
|
try {
|
||||||
// 检查文件是否为文本文件
|
const fileExtension = getFileExtension(file.path)
|
||||||
isText = await isTextFile(file.path)
|
|
||||||
} catch (e) {
|
|
||||||
logger.error('Failed to check if file is text.', e as Error)
|
|
||||||
window.toast.error(t('translate.files.error.check_type') + ': ' + formatErrorMessage(e))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!isText) {
|
// Check if file is supported format (text file or document file)
|
||||||
window.toast.error(t('common.file.not_supported', { type: getFileExtension(file.path) }))
|
let isText: boolean
|
||||||
logger.error('Unsupported file type.')
|
const isDocument: boolean = documentExts.includes(fileExtension)
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// the threshold may be too large
|
if (!isDocument) {
|
||||||
if (file.size > 5 * MB) {
|
try {
|
||||||
window.toast.error(t('translate.files.error.too_large') + ' (0 ~ 5 MB)')
|
// For non-document files, check if it's a text file
|
||||||
} else {
|
isText = await isTextFile(file.path)
|
||||||
|
} catch (e) {
|
||||||
|
logger.error('Failed to check file type.', e as Error)
|
||||||
|
window.toast.error(t('translate.files.error.check_type') + ': ' + formatErrorMessage(e))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
isText = false
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isText && !isDocument) {
|
||||||
|
window.toast.error(t('common.file.not_supported', { type: fileExtension }))
|
||||||
|
logger.error('Unsupported file type.')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// File size check - document files allowed to be larger
|
||||||
|
const maxSize = isDocument ? 20 * MB : 5 * MB
|
||||||
|
if (file.size > maxSize) {
|
||||||
|
window.toast.error(t('translate.files.error.too_large') + ` (0 ~ ${maxSize / MB} MB)`)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
let result: string
|
||||||
try {
|
try {
|
||||||
const result = await window.api.fs.readText(file.path)
|
if (isDocument) {
|
||||||
|
// Use the new document reading API
|
||||||
|
result = await window.api.file.readExternal(file.path, true)
|
||||||
|
} else {
|
||||||
|
// Read text file
|
||||||
|
result = await window.api.fs.readText(file.path)
|
||||||
|
}
|
||||||
setText(text + result)
|
setText(text + result)
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
logger.error('Failed to read text file.', e as Error)
|
logger.error('Failed to read file.', e as Error)
|
||||||
window.toast.error(t('translate.files.error.unknown') + ': ' + formatErrorMessage(e))
|
window.toast.error(t('translate.files.error.unknown') + ': ' + formatErrorMessage(e))
|
||||||
}
|
}
|
||||||
|
} catch (e) {
|
||||||
|
logger.error('Failed to read file.', e as Error)
|
||||||
|
window.toast.error(t('translate.files.error.unknown') + ': ' + formatErrorMessage(e))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const promise = _readFile()
|
const promise = _readFile()
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user