diff --git a/src/main/loader/index.ts b/src/main/loader/index.ts index e8444d2141..e69c3ff224 100644 --- a/src/main/loader/index.ts +++ b/src/main/loader/index.ts @@ -2,6 +2,7 @@ import * as fs from 'node:fs' import { LocalPathLoader, RAGApplication, TextLoader } from '@llm-tools/embedjs' import type { AddLoaderReturn } from '@llm-tools/embedjs-interfaces' +import { WebLoader } from '@llm-tools/embedjs-loader-web' import { LoaderReturn } from '@shared/config/types' import { FileType, KnowledgeBaseParams } from '@types' import Logger from 'electron-log' @@ -69,8 +70,26 @@ export async function addFileLoader( } as LoaderReturn } - // 文本类型 const fileContent = fs.readFileSync(file.path, 'utf-8') + // HTML类型 + if (['.html', '.htm'].includes(file.ext)) { + const loaderReturn = await ragApplication.addLoader( + new WebLoader({ + urlOrContent: fileContent, + chunkSize: base.chunkSize, + chunkOverlap: base.chunkOverlap + }) as any, + forceReload + ) + return { + entriesAdded: loaderReturn.entriesAdded, + uniqueId: loaderReturn.uniqueId, + uniqueIds: [loaderReturn.uniqueId], + loaderType: loaderReturn.loaderType + } + } + + // 文本类型 const loaderReturn = await ragApplication.addLoader( new TextLoader({ text: fileContent, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any, forceReload