From 27af64f2bd119db4661f2382ab683101d46dea28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=87=AA=E7=94=B1=E7=9A=84=E4=B8=96=E7=95=8C=E4=BA=BA?= <3196812536@qq.com> Date: Tue, 29 Jul 2025 17:27:36 +0800 Subject: [PATCH] fix: change jschardet to chardet (#8577) * fix: change jschardet to chardet * Update file.test.ts * fix: error * fix: test fail * fix: test error * Update file.test.ts * fix: optimize details * Update file.test.ts * Update file.ts * Update file.ts * Update file.test.ts --- package.json | 2 +- src/main/services/KnowledgeService.ts | 2 +- src/main/services/NodeTraceService.ts | 2 +- src/main/utils/__tests__/file.test.ts | 40 ++++----------- src/main/utils/file.ts | 49 +++++++------------ .../anthropic/AnthropicVertexClient.ts | 5 +- src/renderer/src/hooks/useAssistant.ts | 2 +- src/renderer/src/i18n/index.ts | 2 +- src/renderer/src/queue/KnowledgeQueue.ts | 2 +- src/renderer/src/services/KnowledgeService.ts | 2 +- yarn.lock | 16 +++--- 11 files changed, 44 insertions(+), 80 deletions(-) diff --git a/package.json b/package.json index d996f3fc65..cd8805fb8a 100644 --- a/package.json +++ b/package.json @@ -168,6 +168,7 @@ "async-mutex": "^0.5.0", "axios": "^1.7.3", "browser-image-compression": "^2.0.2", + "chardet": "^2.1.0", "cli-progress": "^3.12.0", "code-inspector-plugin": "^0.20.14", "color": "^5.0.0", @@ -204,7 +205,6 @@ "iconv-lite": "^0.6.3", "jaison": "^2.0.2", "jest-styled-components": "^7.2.0", - "jschardet": "^3.1.4", "linguist-languages": "^8.0.0", "lint-staged": "^15.5.0", "lodash": "^4.17.21", diff --git a/src/main/services/KnowledgeService.ts b/src/main/services/KnowledgeService.ts index 59ff2a049c..c60aa39b6a 100644 --- a/src/main/services/KnowledgeService.ts +++ b/src/main/services/KnowledgeService.ts @@ -38,7 +38,7 @@ import { IpcChannel } from '@shared/IpcChannel' import { FileMetadata, KnowledgeBaseParams, KnowledgeItem } from '@types' import { v4 as uuidv4 } from 'uuid' -const logger = loggerService.withContext('KnowledgeService') +const logger = loggerService.withContext('MainKnowledgeService') export interface KnowledgeBaseAddItemOptions { base: KnowledgeBaseParams diff --git a/src/main/services/NodeTraceService.ts b/src/main/services/NodeTraceService.ts index c3b7e9c0dc..d2e4db20c9 100644 --- a/src/main/services/NodeTraceService.ts +++ b/src/main/services/NodeTraceService.ts @@ -1,3 +1,4 @@ +import { loggerService } from '@logger' import { isDev } from '@main/constant' import { CacheBatchSpanProcessor, FunctionSpanExporter } from '@mcp-trace/trace-core' import { NodeTracer as MCPNodeTracer } from '@mcp-trace/trace-node/nodeTracer' @@ -6,7 +7,6 @@ import { BrowserWindow, ipcMain } from 'electron' import * as path from 'path' import { ConfigKeys, configManager } from './ConfigManager' -import { loggerService } from './LoggerService' import { spanCacheService } from './SpanCacheService' export const TRACER_NAME = 'CherryStudio' diff --git a/src/main/utils/__tests__/file.test.ts b/src/main/utils/__tests__/file.test.ts index a95665f815..f6f6d2c40e 100644 --- a/src/main/utils/__tests__/file.test.ts +++ b/src/main/utils/__tests__/file.test.ts @@ -4,8 +4,8 @@ import os from 'node:os' import path from 'node:path' import { FileTypes } from '@types' +import chardet from 'chardet' import iconv from 'iconv-lite' -import { detectAll as detectEncodingAll } from 'jschardet' import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import { readTextFileWithAutoEncoding } from '../file' @@ -260,46 +260,24 @@ describe('file', () => { const mockFilePath = '/path/to/mock/file.txt' it('should read file with auto encoding', async () => { - const content = '这是一段GB2312编码的测试内容' - const buffer = iconv.encode(content, 'GB2312') + const content = '这是一段GB18030编码的测试内容' + const buffer = iconv.encode(content, 'GB18030') - // 创建模拟的 FileHandle 对象 - const mockFileHandle = { - read: vi.fn().mockResolvedValue({ - bytesRead: buffer.byteLength, - buffer: buffer - }), - close: vi.fn().mockResolvedValue(undefined) - } - - // 模拟 open 方法 - vi.spyOn(fsPromises, 'open').mockResolvedValue(mockFileHandle as any) + // 模拟文件读取和编码检测 vi.spyOn(fsPromises, 'readFile').mockResolvedValue(buffer) + vi.spyOn(chardet, 'detectFile').mockResolvedValue('GB18030') const result = await readTextFileWithAutoEncoding(mockFilePath) expect(result).toBe(content) }) it('should try to fix bad detected encoding', async () => { - const content = '这是一段GB2312编码的测试内容' - const buffer = iconv.encode(content, 'GB2312') + const content = '这是一段UTF-8编码的测试内容' + const buffer = iconv.encode(content, 'UTF-8') - // 创建模拟的 FileHandle 对象 - const mockFileHandle = { - read: vi.fn().mockResolvedValue({ - bytesRead: buffer.byteLength, - buffer: buffer - }), - close: vi.fn().mockResolvedValue(undefined) - } - - // 模拟 fs.open 方法 - vi.spyOn(fsPromises, 'open').mockResolvedValue(mockFileHandle as any) + // 模拟文件读取 vi.spyOn(fsPromises, 'readFile').mockResolvedValue(buffer) - vi.mocked(vi.fn(detectEncodingAll)).mockReturnValue([ - { encoding: 'UTF-8', confidence: 0.9 }, - { encoding: 'GB2312', confidence: 0.8 } - ]) + vi.spyOn(chardet, 'detectFile').mockResolvedValue('GB18030') const result = await readTextFileWithAutoEncoding(mockFilePath) expect(result).toBe(content) diff --git a/src/main/utils/file.ts b/src/main/utils/file.ts index e73e546022..dc6af193f8 100644 --- a/src/main/utils/file.ts +++ b/src/main/utils/file.ts @@ -1,14 +1,14 @@ import * as fs from 'node:fs' -import { open, readFile } from 'node:fs/promises' +import { readFile } from 'node:fs/promises' import os from 'node:os' import path from 'node:path' import { loggerService } from '@logger' import { audioExts, documentExts, imageExts, MB, textExts, videoExts } from '@shared/config/constant' import { FileMetadata, FileTypes } from '@types' +import chardet from 'chardet' import { app } from 'electron' import iconv from 'iconv-lite' -import * as jschardet from 'jschardet' import { v4 as uuidv4 } from 'uuid' const logger = loggerService.withContext('Utils:File') @@ -170,39 +170,24 @@ export function getMcpDir() { * @returns 解码后的文件内容 */ export async function readTextFileWithAutoEncoding(filePath: string): Promise { - // 读取前1MB以检测编码 - const buffer = Buffer.alloc(1 * MB) - const fh = await open(filePath, 'r') - const { buffer: bufferRead } = await fh.read(buffer, 0, 1 * MB, 0) - await fh.close() - - // 获取文件编码格式,最多取前两个可能的编码 - const encodings = jschardet - .detectAll(bufferRead) - .map((item) => ({ - ...item, - encoding: item.encoding === 'ascii' ? 'UTF-8' : item.encoding - })) - .filter((item, index, array) => array.findIndex((prevItem) => prevItem.encoding === item.encoding) === index) - .slice(0, 2) - - if (encodings.length === 0) { - logger.error('Failed to detect encoding. Use utf-8 to decode.') - const data = await readFile(filePath) - return iconv.decode(data, 'UTF-8') - } + const encoding = (await chardet.detectFile(filePath, { sampleSize: MB })) || 'UTF-8' + logger.debug(`File ${filePath} detected encoding: ${encoding}`) + const encodings = [encoding, 'UTF-8'] const data = await readFile(filePath) - for (const item of encodings) { - const encoding = item.encoding - const content = iconv.decode(data, encoding) - if (content.includes('\uFFFD')) { - logger.error( - `File ${filePath} was auto-detected as ${encoding} encoding, but contains invalid characters. Trying other encodings` - ) - } else { - return content + for (const encoding of encodings) { + try { + const content = iconv.decode(data, encoding) + if (!content.includes('\uFFFD')) { + return content + } else { + logger.warn( + `File ${filePath} was auto-detected as ${encoding} encoding, but contains invalid characters. Trying other encodings` + ) + } + } catch (error) { + logger.error(`Failed to decode file ${filePath} with encoding ${encoding}: ${error}`) } } diff --git a/src/renderer/src/aiCore/clients/anthropic/AnthropicVertexClient.ts b/src/renderer/src/aiCore/clients/anthropic/AnthropicVertexClient.ts index 7ef4ea1753..bb96ac90ae 100644 --- a/src/renderer/src/aiCore/clients/anthropic/AnthropicVertexClient.ts +++ b/src/renderer/src/aiCore/clients/anthropic/AnthropicVertexClient.ts @@ -1,13 +1,14 @@ import Anthropic from '@anthropic-ai/sdk' import AnthropicVertex from '@anthropic-ai/vertex-sdk' +import { loggerService } from '@logger' import { getVertexAILocation, getVertexAIProjectId, getVertexAIServiceAccount } from '@renderer/hooks/useVertexAI' -import { loggerService } from '@renderer/services/LoggerService' import { Provider } from '@renderer/types' import { isEmpty } from 'lodash' -const logger = loggerService.withContext('AnthropicVertexClient') import { AnthropicAPIClient } from './AnthropicAPIClient' +const logger = loggerService.withContext('AnthropicVertexClient') + export class AnthropicVertexClient extends AnthropicAPIClient { sdkInstance: AnthropicVertex | undefined = undefined private authHeaders?: Record diff --git a/src/renderer/src/hooks/useAssistant.ts b/src/renderer/src/hooks/useAssistant.ts index 375a41e37b..24d192501e 100644 --- a/src/renderer/src/hooks/useAssistant.ts +++ b/src/renderer/src/hooks/useAssistant.ts @@ -1,6 +1,6 @@ +import { loggerService } from '@logger' import { db } from '@renderer/databases' import { getDefaultTopic } from '@renderer/services/AssistantService' -import { loggerService } from '@renderer/services/LoggerService' import { useAppDispatch, useAppSelector } from '@renderer/store' import { addAssistant, diff --git a/src/renderer/src/i18n/index.ts b/src/renderer/src/i18n/index.ts index b7c9bd7e22..da6b924af9 100644 --- a/src/renderer/src/i18n/index.ts +++ b/src/renderer/src/i18n/index.ts @@ -1,4 +1,4 @@ -import { loggerService } from '@renderer/services/LoggerService' +import { loggerService } from '@logger' import { defaultLanguage } from '@shared/config/constant' import i18n from 'i18next' import { initReactI18next } from 'react-i18next' diff --git a/src/renderer/src/queue/KnowledgeQueue.ts b/src/renderer/src/queue/KnowledgeQueue.ts index 3be945ff68..a618524633 100644 --- a/src/renderer/src/queue/KnowledgeQueue.ts +++ b/src/renderer/src/queue/KnowledgeQueue.ts @@ -195,7 +195,7 @@ class KnowledgeQueue { updateBaseItemIsPreprocessed({ baseId, itemId: item.id, - isPreprocessed: base.preprocessOrOcrProvider ? true : false + isPreprocessed: !!base.preprocessOrOcrProvider }) ) } diff --git a/src/renderer/src/services/KnowledgeService.ts b/src/renderer/src/services/KnowledgeService.ts index 3e646648ab..b55ab3ab9f 100644 --- a/src/renderer/src/services/KnowledgeService.ts +++ b/src/renderer/src/services/KnowledgeService.ts @@ -13,7 +13,7 @@ import { isEmpty } from 'lodash' import { getProviderByModel } from './AssistantService' import FileManager from './FileManager' -const logger = loggerService.withContext('KnowledgeService') +const logger = loggerService.withContext('RendererKnowledgeService') export const getKnowledgeBaseParams = (base: KnowledgeBase): KnowledgeBaseParams => { const provider = getProviderByModel(base.model) diff --git a/yarn.lock b/yarn.lock index ced877958e..7af86162d9 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7293,6 +7293,7 @@ __metadata: async-mutex: "npm:^0.5.0" axios: "npm:^1.7.3" browser-image-compression: "npm:^2.0.2" + chardet: "npm:^2.1.0" cli-progress: "npm:^3.12.0" code-inspector-plugin: "npm:^0.20.14" color: "npm:^5.0.0" @@ -7330,7 +7331,6 @@ __metadata: iconv-lite: "npm:^0.6.3" jaison: "npm:^2.0.2" jest-styled-components: "npm:^7.2.0" - jschardet: "npm:^3.1.4" jsdom: "npm:26.1.0" linguist-languages: "npm:^8.0.0" lint-staged: "npm:^15.5.0" @@ -8593,6 +8593,13 @@ __metadata: languageName: node linkType: hard +"chardet@npm:^2.1.0": + version: 2.1.0 + resolution: "chardet@npm:2.1.0" + checksum: 10c0/d1b03e47371851ed72741a898281d58f8a9b577aeea6fdfa75a86832898b36c550b3ad057e66d50d774a9cebd9f56c66b6880e4fe75e387794538ba7565b0b6f + languageName: node + linkType: hard + "charenc@npm:0.0.2": version: 0.0.2 resolution: "charenc@npm:0.0.2" @@ -13350,13 +13357,6 @@ __metadata: languageName: node linkType: hard -"jschardet@npm:^3.1.4": - version: 3.1.4 - resolution: "jschardet@npm:3.1.4" - checksum: 10c0/d72c724ff60bc185d3962617ffda6849c6d632a935820841078c656a5247d73617a5df3b233e1fb1064de8683f7dae1b422b68186d1d6db22117b59edb5433dc - languageName: node - linkType: hard - "jsdom@npm:26.1.0": version: 26.1.0 resolution: "jsdom@npm:26.1.0"