From d574a09529118e7d91468f151d2dd66d416a765c Mon Sep 17 00:00:00 2001 From: Wenwei Lin <75592450+wenwei-lin@users.noreply.github.com> Date: Sat, 15 Feb 2025 22:50:05 +0800 Subject: [PATCH] fix: support html file in knowledge base (#1703) --- src/main/loader/index.ts | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/main/loader/index.ts b/src/main/loader/index.ts index e8444d2141..e69c3ff224 100644 --- a/src/main/loader/index.ts +++ b/src/main/loader/index.ts @@ -2,6 +2,7 @@ import * as fs from 'node:fs' import { LocalPathLoader, RAGApplication, TextLoader } from '@llm-tools/embedjs' import type { AddLoaderReturn } from '@llm-tools/embedjs-interfaces' +import { WebLoader } from '@llm-tools/embedjs-loader-web' import { LoaderReturn } from '@shared/config/types' import { FileType, KnowledgeBaseParams } from '@types' import Logger from 'electron-log' @@ -69,8 +70,26 @@ export async function addFileLoader( } as LoaderReturn } - // 文本类型 const fileContent = fs.readFileSync(file.path, 'utf-8') + // HTML类型 + if (['.html', '.htm'].includes(file.ext)) { + const loaderReturn = await ragApplication.addLoader( + new WebLoader({ + urlOrContent: fileContent, + chunkSize: base.chunkSize, + chunkOverlap: base.chunkOverlap + }) as any, + forceReload + ) + return { + entriesAdded: loaderReturn.entriesAdded, + uniqueId: loaderReturn.uniqueId, + uniqueIds: [loaderReturn.uniqueId], + loaderType: loaderReturn.loaderType + } + } + + // 文本类型 const loaderReturn = await ragApplication.addLoader( new TextLoader({ text: fileContent, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any, forceReload