mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-28 05:11:24 +08:00
* feat(文件处理): 添加文件编码支持以正确处理不同编码的文本文件 添加文件编码检测和指定编码读取功能 - 在FileMetadata接口中添加encoding字段 - 添加iconv-lite和jschardet依赖用于编码处理和检测 - 文件上传时自动检测文本文件编码 - 文件读取时支持指定编码参数 - 更新所有API客户端以传递文件编码参数 * feat(文件处理): 添加文件编码检测和UTF-8读取功能 新增文件编码检测工具函数和UTF-8读取功能,统一处理不同编码的文件读取 移除重复的编码检测逻辑,优化代码结构 * refactor(FileStorage): 使用 readFileUTF8 替换 decodeBuffer 读取文件 移除冗余的 decodeBuffer 逻辑,直接使用封装好的 readFileUTF8 方法读取文件内容 * docs(utils): 为文件编码相关函数添加注释说明 添加对 detectEncoding、decodeBuffer 和 readFileUTF8 函数的详细注释,说明其功能和使用方法 * fix(utils): 为detectEncoding函数添加返回类型声明 * refactor(文件处理): 移除冗余的decodeBuffer函数并直接使用iconv.decode 简化文件读取逻辑,直接调用iconv.decode而不是通过中间函数decodeBuffer * test(file): 添加文件编码检测的测试用例 * test(文件编码检测): 移除ISO-8859-1编码的测试匹配 * refactor(file): 移除文件编码相关逻辑,统一使用UTF-8读取文本文件 移除FileMetadata接口中的encoding字段及相关检测逻辑 将所有文件读取操作统一改为使用readTextFileUTF8方法 * fix(文件读取): 改进文本文件解码逻辑以处理编码识别错误 当自动识别的编码包含错误字符时,尝试其他可能的编码 * refactor(utils): 将 console 日志替换为 electron-log 记录器 * refactor(文件存储): 移除文件读取时的可选编码参数 简化文件读取逻辑,始终使用UTF-8编码读取文本文件 * fix(utils): 修复文件编码检测中的文件描述符泄漏 在detectEncoding函数中,文件描述符在使用后未关闭,可能导致资源泄漏 * refactor(文件处理): 将readTextFileUTF8重命名为readTextFileWithAutoEncoding并改进编码检测 修复文件编码检测中未正确关闭文件描述符的问题 改进文本文件读取功能以支持自动编码检测 * test(file): 重构编码检测测试用例并改进测试结构 - 将 describe 块重命名为更明确的 detectEncoding - 提取公共的 mock 逻辑到 beforeEach - 更新测试描述为英文并保持一致性 - 简化测试实现,移除重复代码 * test(file): 添加对readTextFileWithAutoEncoding的测试用例
347 lines
12 KiB
TypeScript
347 lines
12 KiB
TypeScript
import * as fs from 'node:fs'
|
|
import os from 'node:os'
|
|
import path from 'node:path'
|
|
|
|
import { FileTypes } from '@types'
|
|
import iconv from 'iconv-lite'
|
|
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
|
|
|
import { detectEncoding, readTextFileWithAutoEncoding } from '../file'
|
|
import { getAllFiles, getAppConfigDir, getConfigDir, getFilesDir, getFileType, getTempDir } from '../file'
|
|
|
|
// Mock dependencies
|
|
vi.mock('node:fs')
|
|
vi.mock('node:os')
|
|
vi.mock('node:path')
|
|
vi.mock('uuid', () => ({
|
|
v4: () => 'mock-uuid'
|
|
}))
|
|
vi.mock('electron', () => ({
|
|
app: {
|
|
getPath: vi.fn((key) => {
|
|
if (key === 'temp') return '/mock/temp'
|
|
if (key === 'userData') return '/mock/userData'
|
|
return '/mock/unknown'
|
|
})
|
|
}
|
|
}))
|
|
|
|
describe('file', () => {
|
|
beforeEach(() => {
|
|
vi.clearAllMocks()
|
|
|
|
// Mock path.extname
|
|
vi.mocked(path.extname).mockImplementation((file) => {
|
|
const parts = file.split('.')
|
|
return parts.length > 1 ? `.${parts[parts.length - 1]}` : ''
|
|
})
|
|
|
|
// Mock path.basename
|
|
vi.mocked(path.basename).mockImplementation((file) => {
|
|
const parts = file.split('/')
|
|
return parts[parts.length - 1]
|
|
})
|
|
|
|
// Mock path.join
|
|
vi.mocked(path.join).mockImplementation((...args) => args.join('/'))
|
|
|
|
// Mock os.homedir
|
|
vi.mocked(os.homedir).mockReturnValue('/mock/home')
|
|
})
|
|
|
|
afterEach(() => {
|
|
vi.resetAllMocks()
|
|
})
|
|
|
|
describe('getFileType', () => {
|
|
it('should return IMAGE for image extensions', () => {
|
|
expect(getFileType('.jpg')).toBe(FileTypes.IMAGE)
|
|
expect(getFileType('.jpeg')).toBe(FileTypes.IMAGE)
|
|
expect(getFileType('.png')).toBe(FileTypes.IMAGE)
|
|
expect(getFileType('.gif')).toBe(FileTypes.IMAGE)
|
|
expect(getFileType('.webp')).toBe(FileTypes.IMAGE)
|
|
expect(getFileType('.bmp')).toBe(FileTypes.IMAGE)
|
|
})
|
|
|
|
it('should return VIDEO for video extensions', () => {
|
|
expect(getFileType('.mp4')).toBe(FileTypes.VIDEO)
|
|
expect(getFileType('.avi')).toBe(FileTypes.VIDEO)
|
|
expect(getFileType('.mov')).toBe(FileTypes.VIDEO)
|
|
expect(getFileType('.mkv')).toBe(FileTypes.VIDEO)
|
|
expect(getFileType('.flv')).toBe(FileTypes.VIDEO)
|
|
})
|
|
|
|
it('should return AUDIO for audio extensions', () => {
|
|
expect(getFileType('.mp3')).toBe(FileTypes.AUDIO)
|
|
expect(getFileType('.wav')).toBe(FileTypes.AUDIO)
|
|
expect(getFileType('.ogg')).toBe(FileTypes.AUDIO)
|
|
expect(getFileType('.flac')).toBe(FileTypes.AUDIO)
|
|
expect(getFileType('.aac')).toBe(FileTypes.AUDIO)
|
|
})
|
|
|
|
it('should return TEXT for text extensions', () => {
|
|
expect(getFileType('.txt')).toBe(FileTypes.TEXT)
|
|
expect(getFileType('.md')).toBe(FileTypes.TEXT)
|
|
expect(getFileType('.html')).toBe(FileTypes.TEXT)
|
|
expect(getFileType('.json')).toBe(FileTypes.TEXT)
|
|
expect(getFileType('.js')).toBe(FileTypes.TEXT)
|
|
expect(getFileType('.ts')).toBe(FileTypes.TEXT)
|
|
expect(getFileType('.css')).toBe(FileTypes.TEXT)
|
|
expect(getFileType('.java')).toBe(FileTypes.TEXT)
|
|
expect(getFileType('.py')).toBe(FileTypes.TEXT)
|
|
})
|
|
|
|
it('should return DOCUMENT for document extensions', () => {
|
|
expect(getFileType('.pdf')).toBe(FileTypes.DOCUMENT)
|
|
expect(getFileType('.pptx')).toBe(FileTypes.DOCUMENT)
|
|
expect(getFileType('.doc')).toBe(FileTypes.DOCUMENT)
|
|
expect(getFileType('.docx')).toBe(FileTypes.DOCUMENT)
|
|
expect(getFileType('.xlsx')).toBe(FileTypes.DOCUMENT)
|
|
expect(getFileType('.odt')).toBe(FileTypes.DOCUMENT)
|
|
})
|
|
|
|
it('should return OTHER for unknown extensions', () => {
|
|
expect(getFileType('.unknown')).toBe(FileTypes.OTHER)
|
|
expect(getFileType('')).toBe(FileTypes.OTHER)
|
|
expect(getFileType('.')).toBe(FileTypes.OTHER)
|
|
expect(getFileType('...')).toBe(FileTypes.OTHER)
|
|
expect(getFileType('.123')).toBe(FileTypes.OTHER)
|
|
})
|
|
|
|
it('should handle case-insensitive extensions', () => {
|
|
expect(getFileType('.JPG')).toBe(FileTypes.IMAGE)
|
|
expect(getFileType('.PDF')).toBe(FileTypes.DOCUMENT)
|
|
expect(getFileType('.Mp3')).toBe(FileTypes.AUDIO)
|
|
expect(getFileType('.HtMl')).toBe(FileTypes.TEXT)
|
|
expect(getFileType('.Xlsx')).toBe(FileTypes.DOCUMENT)
|
|
})
|
|
|
|
it('should handle extensions without leading dot', () => {
|
|
expect(getFileType('jpg')).toBe(FileTypes.OTHER)
|
|
expect(getFileType('pdf')).toBe(FileTypes.OTHER)
|
|
expect(getFileType('mp3')).toBe(FileTypes.OTHER)
|
|
})
|
|
|
|
it('should handle extreme cases', () => {
|
|
expect(getFileType('.averylongfileextensionname')).toBe(FileTypes.OTHER)
|
|
expect(getFileType('.tar.gz')).toBe(FileTypes.OTHER)
|
|
expect(getFileType('.文件')).toBe(FileTypes.OTHER)
|
|
expect(getFileType('.файл')).toBe(FileTypes.OTHER)
|
|
})
|
|
})
|
|
|
|
describe('getAllFiles', () => {
|
|
it('should return all valid files recursively', () => {
|
|
// Mock file system
|
|
// @ts-ignore - override type for testing
|
|
vi.spyOn(fs, 'readdirSync').mockImplementation((dirPath) => {
|
|
if (dirPath === '/test') {
|
|
return ['file1.txt', 'file2.pdf', 'subdir']
|
|
} else if (dirPath === '/test/subdir') {
|
|
return ['file3.md', 'file4.docx']
|
|
}
|
|
return []
|
|
})
|
|
|
|
vi.mocked(fs.statSync).mockImplementation((filePath) => {
|
|
const isDir = String(filePath).endsWith('subdir')
|
|
return {
|
|
isDirectory: () => isDir,
|
|
size: 1024
|
|
} as fs.Stats
|
|
})
|
|
|
|
const result = getAllFiles('/test')
|
|
|
|
expect(result).toHaveLength(4)
|
|
expect(result[0].id).toBe('mock-uuid')
|
|
expect(result[0].name).toBe('file1.txt')
|
|
expect(result[0].type).toBe(FileTypes.TEXT)
|
|
expect(result[1].name).toBe('file2.pdf')
|
|
expect(result[1].type).toBe(FileTypes.DOCUMENT)
|
|
})
|
|
|
|
it('should skip hidden files', () => {
|
|
// @ts-ignore - override type for testing
|
|
vi.spyOn(fs, 'readdirSync').mockReturnValue(['.hidden', 'visible.txt'])
|
|
vi.mocked(fs.statSync).mockReturnValue({
|
|
isDirectory: () => false,
|
|
size: 1024
|
|
} as fs.Stats)
|
|
|
|
const result = getAllFiles('/test')
|
|
|
|
expect(result).toHaveLength(1)
|
|
expect(result[0].name).toBe('visible.txt')
|
|
})
|
|
|
|
it('should skip unsupported file types', () => {
|
|
// @ts-ignore - override type for testing
|
|
vi.spyOn(fs, 'readdirSync').mockReturnValue(['image.jpg', 'video.mp4', 'audio.mp3', 'document.pdf'])
|
|
vi.mocked(fs.statSync).mockReturnValue({
|
|
isDirectory: () => false,
|
|
size: 1024
|
|
} as fs.Stats)
|
|
|
|
const result = getAllFiles('/test')
|
|
|
|
// Should only include document.pdf as the others are excluded types
|
|
expect(result).toHaveLength(1)
|
|
expect(result[0].name).toBe('document.pdf')
|
|
expect(result[0].type).toBe(FileTypes.DOCUMENT)
|
|
})
|
|
|
|
it('should return empty array for empty directory', () => {
|
|
// @ts-ignore - override type for testing
|
|
vi.spyOn(fs, 'readdirSync').mockReturnValue([])
|
|
|
|
const result = getAllFiles('/empty')
|
|
|
|
expect(result).toHaveLength(0)
|
|
})
|
|
|
|
it('should handle file system errors', () => {
|
|
// @ts-ignore - override type for testing
|
|
vi.spyOn(fs, 'readdirSync').mockImplementation(() => {
|
|
throw new Error('Directory not found')
|
|
})
|
|
|
|
// Since the function doesn't have error handling, we expect it to propagate
|
|
expect(() => getAllFiles('/nonexistent')).toThrow('Directory not found')
|
|
})
|
|
})
|
|
|
|
describe('getTempDir', () => {
|
|
it('should return correct temp directory path', () => {
|
|
const tempDir = getTempDir()
|
|
expect(tempDir).toBe('/mock/temp/CherryStudio')
|
|
})
|
|
})
|
|
|
|
describe('getFilesDir', () => {
|
|
it('should return correct files directory path', () => {
|
|
const filesDir = getFilesDir()
|
|
expect(filesDir).toBe('/mock/userData/Data/Files')
|
|
})
|
|
})
|
|
|
|
describe('getConfigDir', () => {
|
|
it('should return correct config directory path', () => {
|
|
const configDir = getConfigDir()
|
|
expect(configDir).toBe('/mock/home/.cherrystudio/config')
|
|
})
|
|
})
|
|
|
|
describe('getAppConfigDir', () => {
|
|
it('should return correct app config directory path', () => {
|
|
const appConfigDir = getAppConfigDir('test-app')
|
|
expect(appConfigDir).toBe('/mock/home/.cherrystudio/config/test-app')
|
|
})
|
|
|
|
it('should handle empty app name', () => {
|
|
const appConfigDir = getAppConfigDir('')
|
|
expect(appConfigDir).toBe('/mock/home/.cherrystudio/config/')
|
|
})
|
|
})
|
|
|
|
// 在 describe('file') 块内部添加新的 describe 块
|
|
describe('detectEncoding', () => {
|
|
const mockFilePath = '/path/to/mock/file.txt'
|
|
|
|
beforeEach(() => {
|
|
vi.mocked(fs.openSync).mockReturnValue(123)
|
|
vi.mocked(fs.closeSync).mockImplementation(() => {})
|
|
})
|
|
|
|
it('should correctly detect UTF-8 encoding', () => {
|
|
// 准备UTF-8编码的Buffer
|
|
const content = '这是UTF-8测试内容'
|
|
const buffer = Buffer.from(content, 'utf-8')
|
|
|
|
// 模拟文件读取
|
|
vi.mocked(fs.readSync).mockImplementation((_, buf) => {
|
|
const targetBuffer = new Uint8Array(buf.buffer)
|
|
const sourceBuffer = new Uint8Array(buffer)
|
|
targetBuffer.set(sourceBuffer)
|
|
return 1024
|
|
})
|
|
|
|
const encoding = detectEncoding(mockFilePath)
|
|
expect(encoding).toBe('UTF-8')
|
|
})
|
|
|
|
it('should correctly detect GB2312 encoding', () => {
|
|
// 使用iconv创建GB2312编码内容
|
|
const content = '这是一段GB2312编码的测试内容'
|
|
const gb2312Buffer = iconv.encode(content, 'GB2312')
|
|
|
|
// 模拟文件读取
|
|
vi.mocked(fs.readSync).mockImplementation((_, buf) => {
|
|
const targetBuffer = new Uint8Array(buf.buffer)
|
|
const sourceBuffer = new Uint8Array(gb2312Buffer)
|
|
targetBuffer.set(sourceBuffer)
|
|
return gb2312Buffer.length
|
|
})
|
|
|
|
const encoding = detectEncoding(mockFilePath)
|
|
expect(encoding).toMatch(/GB2312|GB18030/i)
|
|
})
|
|
|
|
it('should correctly detect ASCII encoding', () => {
|
|
// 准备ASCII编码内容
|
|
const content = 'ASCII content'
|
|
const buffer = Buffer.from(content, 'ascii')
|
|
|
|
// 模拟文件读取
|
|
vi.mocked(fs.readSync).mockImplementation((_, buf) => {
|
|
const targetBuffer = new Uint8Array(buf.buffer)
|
|
const sourceBuffer = new Uint8Array(buffer)
|
|
targetBuffer.set(sourceBuffer)
|
|
return buffer.length
|
|
})
|
|
|
|
const encoding = detectEncoding(mockFilePath)
|
|
expect(encoding.toLowerCase()).toBe('ascii')
|
|
})
|
|
})
|
|
|
|
describe('readTextFileWithAutoEncoding', () => {
|
|
const mockFilePath = '/path/to/mock/file.txt'
|
|
|
|
beforeEach(() => {
|
|
vi.mocked(fs.openSync).mockReturnValue(123)
|
|
vi.mocked(fs.closeSync).mockImplementation(() => {})
|
|
})
|
|
|
|
it('should read file with auto encoding', () => {
|
|
const content = '这是一段GB2312编码的测试内容'
|
|
const buffer = iconv.encode(content, 'GB2312')
|
|
vi.mocked(fs.readSync).mockImplementation((_, buf) => {
|
|
const targetBuffer = new Uint8Array(buf.buffer)
|
|
const sourceBuffer = new Uint8Array(buffer)
|
|
targetBuffer.set(sourceBuffer)
|
|
return buffer.length
|
|
})
|
|
vi.mocked(fs.readFileSync).mockReturnValue(buffer)
|
|
|
|
const result = readTextFileWithAutoEncoding(mockFilePath)
|
|
expect(result).toBe(content)
|
|
})
|
|
|
|
it('should try to fix bad detected encoding', () => {
|
|
const content = '这是一段GB2312编码的测试内容'
|
|
const buffer = iconv.encode(content, 'GB2312')
|
|
vi.mocked(fs.readSync).mockImplementation((_, buf) => {
|
|
const targetBuffer = new Uint8Array(buf.buffer)
|
|
const sourceBuffer = new Uint8Array(buffer)
|
|
targetBuffer.set(sourceBuffer)
|
|
return buffer.length
|
|
})
|
|
vi.mocked(fs.readFileSync).mockReturnValue(buffer)
|
|
vi.mocked(vi.fn(detectEncoding)).mockReturnValue('UTF-8')
|
|
const result = readTextFileWithAutoEncoding(mockFilePath)
|
|
expect(result).toBe(content)
|
|
})
|
|
})
|
|
})
|