mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-01-01 17:59:09 +08:00
fix: change jschardet to chardet (#8577)
* fix: change jschardet to chardet * Update file.test.ts * fix: error * fix: test fail * fix: test error * Update file.test.ts * fix: optimize details * Update file.test.ts * Update file.ts * Update file.ts * Update file.test.ts
This commit is contained in:
parent
7098489f15
commit
27af64f2bd
@ -168,6 +168,7 @@
|
||||
"async-mutex": "^0.5.0",
|
||||
"axios": "^1.7.3",
|
||||
"browser-image-compression": "^2.0.2",
|
||||
"chardet": "^2.1.0",
|
||||
"cli-progress": "^3.12.0",
|
||||
"code-inspector-plugin": "^0.20.14",
|
||||
"color": "^5.0.0",
|
||||
@ -204,7 +205,6 @@
|
||||
"iconv-lite": "^0.6.3",
|
||||
"jaison": "^2.0.2",
|
||||
"jest-styled-components": "^7.2.0",
|
||||
"jschardet": "^3.1.4",
|
||||
"linguist-languages": "^8.0.0",
|
||||
"lint-staged": "^15.5.0",
|
||||
"lodash": "^4.17.21",
|
||||
|
||||
@ -38,7 +38,7 @@ import { IpcChannel } from '@shared/IpcChannel'
|
||||
import { FileMetadata, KnowledgeBaseParams, KnowledgeItem } from '@types'
|
||||
import { v4 as uuidv4 } from 'uuid'
|
||||
|
||||
const logger = loggerService.withContext('KnowledgeService')
|
||||
const logger = loggerService.withContext('MainKnowledgeService')
|
||||
|
||||
export interface KnowledgeBaseAddItemOptions {
|
||||
base: KnowledgeBaseParams
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import { loggerService } from '@logger'
|
||||
import { isDev } from '@main/constant'
|
||||
import { CacheBatchSpanProcessor, FunctionSpanExporter } from '@mcp-trace/trace-core'
|
||||
import { NodeTracer as MCPNodeTracer } from '@mcp-trace/trace-node/nodeTracer'
|
||||
@ -6,7 +7,6 @@ import { BrowserWindow, ipcMain } from 'electron'
|
||||
import * as path from 'path'
|
||||
|
||||
import { ConfigKeys, configManager } from './ConfigManager'
|
||||
import { loggerService } from './LoggerService'
|
||||
import { spanCacheService } from './SpanCacheService'
|
||||
|
||||
export const TRACER_NAME = 'CherryStudio'
|
||||
|
||||
@ -4,8 +4,8 @@ import os from 'node:os'
|
||||
import path from 'node:path'
|
||||
|
||||
import { FileTypes } from '@types'
|
||||
import chardet from 'chardet'
|
||||
import iconv from 'iconv-lite'
|
||||
import { detectAll as detectEncodingAll } from 'jschardet'
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import { readTextFileWithAutoEncoding } from '../file'
|
||||
@ -260,46 +260,24 @@ describe('file', () => {
|
||||
const mockFilePath = '/path/to/mock/file.txt'
|
||||
|
||||
it('should read file with auto encoding', async () => {
|
||||
const content = '这是一段GB2312编码的测试内容'
|
||||
const buffer = iconv.encode(content, 'GB2312')
|
||||
const content = '这是一段GB18030编码的测试内容'
|
||||
const buffer = iconv.encode(content, 'GB18030')
|
||||
|
||||
// 创建模拟的 FileHandle 对象
|
||||
const mockFileHandle = {
|
||||
read: vi.fn().mockResolvedValue({
|
||||
bytesRead: buffer.byteLength,
|
||||
buffer: buffer
|
||||
}),
|
||||
close: vi.fn().mockResolvedValue(undefined)
|
||||
}
|
||||
|
||||
// 模拟 open 方法
|
||||
vi.spyOn(fsPromises, 'open').mockResolvedValue(mockFileHandle as any)
|
||||
// 模拟文件读取和编码检测
|
||||
vi.spyOn(fsPromises, 'readFile').mockResolvedValue(buffer)
|
||||
vi.spyOn(chardet, 'detectFile').mockResolvedValue('GB18030')
|
||||
|
||||
const result = await readTextFileWithAutoEncoding(mockFilePath)
|
||||
expect(result).toBe(content)
|
||||
})
|
||||
|
||||
it('should try to fix bad detected encoding', async () => {
|
||||
const content = '这是一段GB2312编码的测试内容'
|
||||
const buffer = iconv.encode(content, 'GB2312')
|
||||
const content = '这是一段UTF-8编码的测试内容'
|
||||
const buffer = iconv.encode(content, 'UTF-8')
|
||||
|
||||
// 创建模拟的 FileHandle 对象
|
||||
const mockFileHandle = {
|
||||
read: vi.fn().mockResolvedValue({
|
||||
bytesRead: buffer.byteLength,
|
||||
buffer: buffer
|
||||
}),
|
||||
close: vi.fn().mockResolvedValue(undefined)
|
||||
}
|
||||
|
||||
// 模拟 fs.open 方法
|
||||
vi.spyOn(fsPromises, 'open').mockResolvedValue(mockFileHandle as any)
|
||||
// 模拟文件读取
|
||||
vi.spyOn(fsPromises, 'readFile').mockResolvedValue(buffer)
|
||||
vi.mocked(vi.fn(detectEncodingAll)).mockReturnValue([
|
||||
{ encoding: 'UTF-8', confidence: 0.9 },
|
||||
{ encoding: 'GB2312', confidence: 0.8 }
|
||||
])
|
||||
vi.spyOn(chardet, 'detectFile').mockResolvedValue('GB18030')
|
||||
|
||||
const result = await readTextFileWithAutoEncoding(mockFilePath)
|
||||
expect(result).toBe(content)
|
||||
|
||||
@ -1,14 +1,14 @@
|
||||
import * as fs from 'node:fs'
|
||||
import { open, readFile } from 'node:fs/promises'
|
||||
import { readFile } from 'node:fs/promises'
|
||||
import os from 'node:os'
|
||||
import path from 'node:path'
|
||||
|
||||
import { loggerService } from '@logger'
|
||||
import { audioExts, documentExts, imageExts, MB, textExts, videoExts } from '@shared/config/constant'
|
||||
import { FileMetadata, FileTypes } from '@types'
|
||||
import chardet from 'chardet'
|
||||
import { app } from 'electron'
|
||||
import iconv from 'iconv-lite'
|
||||
import * as jschardet from 'jschardet'
|
||||
import { v4 as uuidv4 } from 'uuid'
|
||||
|
||||
const logger = loggerService.withContext('Utils:File')
|
||||
@ -170,39 +170,24 @@ export function getMcpDir() {
|
||||
* @returns 解码后的文件内容
|
||||
*/
|
||||
export async function readTextFileWithAutoEncoding(filePath: string): Promise<string> {
|
||||
// 读取前1MB以检测编码
|
||||
const buffer = Buffer.alloc(1 * MB)
|
||||
const fh = await open(filePath, 'r')
|
||||
const { buffer: bufferRead } = await fh.read(buffer, 0, 1 * MB, 0)
|
||||
await fh.close()
|
||||
|
||||
// 获取文件编码格式,最多取前两个可能的编码
|
||||
const encodings = jschardet
|
||||
.detectAll(bufferRead)
|
||||
.map((item) => ({
|
||||
...item,
|
||||
encoding: item.encoding === 'ascii' ? 'UTF-8' : item.encoding
|
||||
}))
|
||||
.filter((item, index, array) => array.findIndex((prevItem) => prevItem.encoding === item.encoding) === index)
|
||||
.slice(0, 2)
|
||||
|
||||
if (encodings.length === 0) {
|
||||
logger.error('Failed to detect encoding. Use utf-8 to decode.')
|
||||
const data = await readFile(filePath)
|
||||
return iconv.decode(data, 'UTF-8')
|
||||
}
|
||||
const encoding = (await chardet.detectFile(filePath, { sampleSize: MB })) || 'UTF-8'
|
||||
logger.debug(`File ${filePath} detected encoding: ${encoding}`)
|
||||
|
||||
const encodings = [encoding, 'UTF-8']
|
||||
const data = await readFile(filePath)
|
||||
|
||||
for (const item of encodings) {
|
||||
const encoding = item.encoding
|
||||
const content = iconv.decode(data, encoding)
|
||||
if (content.includes('\uFFFD')) {
|
||||
logger.error(
|
||||
`File ${filePath} was auto-detected as ${encoding} encoding, but contains invalid characters. Trying other encodings`
|
||||
)
|
||||
} else {
|
||||
return content
|
||||
for (const encoding of encodings) {
|
||||
try {
|
||||
const content = iconv.decode(data, encoding)
|
||||
if (!content.includes('\uFFFD')) {
|
||||
return content
|
||||
} else {
|
||||
logger.warn(
|
||||
`File ${filePath} was auto-detected as ${encoding} encoding, but contains invalid characters. Trying other encodings`
|
||||
)
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Failed to decode file ${filePath} with encoding ${encoding}: ${error}`)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,13 +1,14 @@
|
||||
import Anthropic from '@anthropic-ai/sdk'
|
||||
import AnthropicVertex from '@anthropic-ai/vertex-sdk'
|
||||
import { loggerService } from '@logger'
|
||||
import { getVertexAILocation, getVertexAIProjectId, getVertexAIServiceAccount } from '@renderer/hooks/useVertexAI'
|
||||
import { loggerService } from '@renderer/services/LoggerService'
|
||||
import { Provider } from '@renderer/types'
|
||||
import { isEmpty } from 'lodash'
|
||||
|
||||
const logger = loggerService.withContext('AnthropicVertexClient')
|
||||
import { AnthropicAPIClient } from './AnthropicAPIClient'
|
||||
|
||||
const logger = loggerService.withContext('AnthropicVertexClient')
|
||||
|
||||
export class AnthropicVertexClient extends AnthropicAPIClient {
|
||||
sdkInstance: AnthropicVertex | undefined = undefined
|
||||
private authHeaders?: Record<string, string>
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { loggerService } from '@logger'
|
||||
import { db } from '@renderer/databases'
|
||||
import { getDefaultTopic } from '@renderer/services/AssistantService'
|
||||
import { loggerService } from '@renderer/services/LoggerService'
|
||||
import { useAppDispatch, useAppSelector } from '@renderer/store'
|
||||
import {
|
||||
addAssistant,
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { loggerService } from '@renderer/services/LoggerService'
|
||||
import { loggerService } from '@logger'
|
||||
import { defaultLanguage } from '@shared/config/constant'
|
||||
import i18n from 'i18next'
|
||||
import { initReactI18next } from 'react-i18next'
|
||||
|
||||
@ -195,7 +195,7 @@ class KnowledgeQueue {
|
||||
updateBaseItemIsPreprocessed({
|
||||
baseId,
|
||||
itemId: item.id,
|
||||
isPreprocessed: base.preprocessOrOcrProvider ? true : false
|
||||
isPreprocessed: !!base.preprocessOrOcrProvider
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
@ -13,7 +13,7 @@ import { isEmpty } from 'lodash'
|
||||
import { getProviderByModel } from './AssistantService'
|
||||
import FileManager from './FileManager'
|
||||
|
||||
const logger = loggerService.withContext('KnowledgeService')
|
||||
const logger = loggerService.withContext('RendererKnowledgeService')
|
||||
|
||||
export const getKnowledgeBaseParams = (base: KnowledgeBase): KnowledgeBaseParams => {
|
||||
const provider = getProviderByModel(base.model)
|
||||
|
||||
16
yarn.lock
16
yarn.lock
@ -7293,6 +7293,7 @@ __metadata:
|
||||
async-mutex: "npm:^0.5.0"
|
||||
axios: "npm:^1.7.3"
|
||||
browser-image-compression: "npm:^2.0.2"
|
||||
chardet: "npm:^2.1.0"
|
||||
cli-progress: "npm:^3.12.0"
|
||||
code-inspector-plugin: "npm:^0.20.14"
|
||||
color: "npm:^5.0.0"
|
||||
@ -7330,7 +7331,6 @@ __metadata:
|
||||
iconv-lite: "npm:^0.6.3"
|
||||
jaison: "npm:^2.0.2"
|
||||
jest-styled-components: "npm:^7.2.0"
|
||||
jschardet: "npm:^3.1.4"
|
||||
jsdom: "npm:26.1.0"
|
||||
linguist-languages: "npm:^8.0.0"
|
||||
lint-staged: "npm:^15.5.0"
|
||||
@ -8593,6 +8593,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"chardet@npm:^2.1.0":
|
||||
version: 2.1.0
|
||||
resolution: "chardet@npm:2.1.0"
|
||||
checksum: 10c0/d1b03e47371851ed72741a898281d58f8a9b577aeea6fdfa75a86832898b36c550b3ad057e66d50d774a9cebd9f56c66b6880e4fe75e387794538ba7565b0b6f
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"charenc@npm:0.0.2":
|
||||
version: 0.0.2
|
||||
resolution: "charenc@npm:0.0.2"
|
||||
@ -13350,13 +13357,6 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"jschardet@npm:^3.1.4":
|
||||
version: 3.1.4
|
||||
resolution: "jschardet@npm:3.1.4"
|
||||
checksum: 10c0/d72c724ff60bc185d3962617ffda6849c6d632a935820841078c656a5247d73617a5df3b233e1fb1064de8683f7dae1b422b68186d1d6db22117b59edb5433dc
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"jsdom@npm:26.1.0":
|
||||
version: 26.1.0
|
||||
resolution: "jsdom@npm:26.1.0"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user