From 9b74e88884fa8c60d947feca548e5130360f2539 Mon Sep 17 00:00:00 2001 From: kangfenmao Date: Thu, 13 Feb 2025 17:49:00 +0800 Subject: [PATCH] feat: Enhance file processing and loader configuration - Remove image loader from dependencies - Update file loading to skip image, video, and audio files - Add logging for knowledge base file processing - Modify common file extensions list - Add type ignore for LocalPathLoader to resolve TypeScript issues --- package.json | 2 +- src/main/loader/index.ts | 7 ++++++- src/main/utils/file.ts | 5 ++++- src/renderer/src/pages/knowledge/KnowledgeContent.tsx | 1 + 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index e8b69195ab..40fdd9128e 100644 --- a/package.json +++ b/package.json @@ -55,7 +55,6 @@ "@llm-tools/embedjs": "patch:@llm-tools/embedjs@npm%3A0.1.28#~/.yarn/patches/@llm-tools-embedjs-npm-0.1.28-8e4393fa2d.patch", "@llm-tools/embedjs-libsql": "^0.1.28", "@llm-tools/embedjs-loader-csv": "^0.1.28", - "@llm-tools/embedjs-loader-image": "^0.1.28", "@llm-tools/embedjs-loader-markdown": "patch:@llm-tools/embedjs-loader-markdown@npm%3A0.1.28#~/.yarn/patches/@llm-tools-embedjs-loader-markdown-npm-0.1.28-81647ffac6.patch", "@llm-tools/embedjs-loader-msoffice": "^0.1.28", "@llm-tools/embedjs-loader-pdf": "^0.1.28", @@ -86,6 +85,7 @@ "@electron-toolkit/tsconfig": "^1.0.1", "@hello-pangea/dnd": "^16.6.0", "@kangfenmao/keyv-storage": "^0.1.0", + "@llm-tools/embedjs-loader-image": "^0.1.28", "@reduxjs/toolkit": "^2.2.5", "@types/adm-zip": "^0", "@types/fs-extra": "^11", diff --git a/src/main/loader/index.ts b/src/main/loader/index.ts index b7416efcee..e8444d2141 100644 --- a/src/main/loader/index.ts +++ b/src/main/loader/index.ts @@ -4,11 +4,12 @@ import { LocalPathLoader, RAGApplication, TextLoader } from '@llm-tools/embedjs' import type { AddLoaderReturn } from '@llm-tools/embedjs-interfaces' import { LoaderReturn } from '@shared/config/types' import { FileType, KnowledgeBaseParams } from '@types' +import Logger from 'electron-log' import { OdLoader, OdType } from './odLoader' // embedjs内置loader类型 -const commonExts = ['.pdf', '.csv', '.json', '.docx', '.pptx', '.xlsx', '.md', '.jpeg'] +const commonExts = ['.pdf', '.csv', '.json', '.docx', '.pptx', '.xlsx', '.md'] export async function addOdLoader( ragApplication: RAGApplication, @@ -45,6 +46,7 @@ export async function addFileLoader( // 内置类型 if (commonExts.includes(file.ext)) { const loaderReturn = await ragApplication.addLoader( + // @ts-ignore LocalPathLoader new LocalPathLoader({ path: file.path, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any, forceReload ) @@ -73,6 +75,9 @@ export async function addFileLoader( new TextLoader({ text: fileContent, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any, forceReload ) + + Logger.info('[KnowledgeBase] processing file', file.path) + return { entriesAdded: loaderReturn.entriesAdded, uniqueId: loaderReturn.uniqueId, diff --git a/src/main/utils/file.ts b/src/main/utils/file.ts index 50e1479868..8f837a2616 100644 --- a/src/main/utils/file.ts +++ b/src/main/utils/file.ts @@ -25,7 +25,9 @@ export function getAllFiles(dirPath: string, arrayOfFiles: FileType[] = []): Fil const ext = path.extname(file) const fileType = getFileType(ext) - if (fileType === FileTypes.OTHER) return + if ([FileTypes.OTHER, FileTypes.IMAGE, FileTypes.VIDEO, FileTypes.AUDIO].includes(fileType)) { + return + } const name = path.basename(file) const size = fs.statSync(fullPath).size @@ -41,6 +43,7 @@ export function getAllFiles(dirPath: string, arrayOfFiles: FileType[] = []): Fil type: fileType, created_at: new Date() } + arrayOfFiles.push(fileItem) } }) diff --git a/src/renderer/src/pages/knowledge/KnowledgeContent.tsx b/src/renderer/src/pages/knowledge/KnowledgeContent.tsx index 78b121add9..7fb17af479 100644 --- a/src/renderer/src/pages/knowledge/KnowledgeContent.tsx +++ b/src/renderer/src/pages/knowledge/KnowledgeContent.tsx @@ -37,6 +37,7 @@ interface KnowledgeContentProps { const fileTypes = [...documentExts, ...textExts] const KnowledgeContent: FC = ({ selectedBase }) => { const { t } = useTranslation() + const { base, noteItems,