mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-21 07:40:11 +08:00
feat(WebSearch): support RAG for external websearch, improve feedback (#7446)
* feat(WebSearch, RAG): support RAG for external websearch * refactor(WebSearch): handle content limit in service * refactor: update migrate * refactor: UI, constants, types * refactor: migrate contentLimit to cutoffLimit * refactor: update default rag document count * refactor: add a helper function for merging references * refactor: reference filtering * feat: feedback for websearch phases * feat: support cutoff by token * refactor: add a warning and fix the bound of cutoff limit * fix: not pass `dimensions` if it is not set by the user * refactor: update i18n and error message * refactor: improve UI * fix: cutoff unit style
This commit is contained in:
parent
1fe439bb51
commit
d463d6ea2e
@ -213,7 +213,7 @@
|
|||||||
"styled-components": "^6.1.11",
|
"styled-components": "^6.1.11",
|
||||||
"tar": "^7.4.3",
|
"tar": "^7.4.3",
|
||||||
"tiny-pinyin": "^1.3.2",
|
"tiny-pinyin": "^1.3.2",
|
||||||
"tokenx": "^0.4.1",
|
"tokenx": "^1.1.0",
|
||||||
"typescript": "^5.6.2",
|
"typescript": "^5.6.2",
|
||||||
"uuid": "^10.0.0",
|
"uuid": "^10.0.0",
|
||||||
"vite": "6.2.6",
|
"vite": "6.2.6",
|
||||||
|
|||||||
44
src/main/loader/noteLoader.ts
Normal file
44
src/main/loader/noteLoader.ts
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
import { BaseLoader } from '@cherrystudio/embedjs-interfaces'
|
||||||
|
import { cleanString } from '@cherrystudio/embedjs-utils'
|
||||||
|
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'
|
||||||
|
import md5 from 'md5'
|
||||||
|
|
||||||
|
export class NoteLoader extends BaseLoader<{ type: 'NoteLoader' }> {
|
||||||
|
private readonly text: string
|
||||||
|
private readonly sourceUrl?: string
|
||||||
|
|
||||||
|
constructor({
|
||||||
|
text,
|
||||||
|
sourceUrl,
|
||||||
|
chunkSize,
|
||||||
|
chunkOverlap
|
||||||
|
}: {
|
||||||
|
text: string
|
||||||
|
sourceUrl?: string
|
||||||
|
chunkSize?: number
|
||||||
|
chunkOverlap?: number
|
||||||
|
}) {
|
||||||
|
super(`NoteLoader_${md5(text + (sourceUrl || ''))}`, { text, sourceUrl }, chunkSize ?? 2000, chunkOverlap ?? 0)
|
||||||
|
this.text = text
|
||||||
|
this.sourceUrl = sourceUrl
|
||||||
|
}
|
||||||
|
|
||||||
|
override async *getUnfilteredChunks() {
|
||||||
|
const chunker = new RecursiveCharacterTextSplitter({
|
||||||
|
chunkSize: this.chunkSize,
|
||||||
|
chunkOverlap: this.chunkOverlap
|
||||||
|
})
|
||||||
|
|
||||||
|
const chunks = await chunker.splitText(cleanString(this.text))
|
||||||
|
|
||||||
|
for (const chunk of chunks) {
|
||||||
|
yield {
|
||||||
|
pageContent: chunk,
|
||||||
|
metadata: {
|
||||||
|
type: 'NoteLoader' as const,
|
||||||
|
source: this.sourceUrl || 'note'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -16,13 +16,14 @@
|
|||||||
import * as fs from 'node:fs'
|
import * as fs from 'node:fs'
|
||||||
import path from 'node:path'
|
import path from 'node:path'
|
||||||
|
|
||||||
import { RAGApplication, RAGApplicationBuilder, TextLoader } from '@cherrystudio/embedjs'
|
import { RAGApplication, RAGApplicationBuilder } from '@cherrystudio/embedjs'
|
||||||
import type { ExtractChunkData } from '@cherrystudio/embedjs-interfaces'
|
import type { ExtractChunkData } from '@cherrystudio/embedjs-interfaces'
|
||||||
import { LibSqlDb } from '@cherrystudio/embedjs-libsql'
|
import { LibSqlDb } from '@cherrystudio/embedjs-libsql'
|
||||||
import { SitemapLoader } from '@cherrystudio/embedjs-loader-sitemap'
|
import { SitemapLoader } from '@cherrystudio/embedjs-loader-sitemap'
|
||||||
import { WebLoader } from '@cherrystudio/embedjs-loader-web'
|
import { WebLoader } from '@cherrystudio/embedjs-loader-web'
|
||||||
import Embeddings from '@main/embeddings/Embeddings'
|
import Embeddings from '@main/embeddings/Embeddings'
|
||||||
import { addFileLoader } from '@main/loader'
|
import { addFileLoader } from '@main/loader'
|
||||||
|
import { NoteLoader } from '@main/loader/noteLoader'
|
||||||
import Reranker from '@main/reranker/Reranker'
|
import Reranker from '@main/reranker/Reranker'
|
||||||
import { windowService } from '@main/services/WindowService'
|
import { windowService } from '@main/services/WindowService'
|
||||||
import { getDataPath } from '@main/utils'
|
import { getDataPath } from '@main/utils'
|
||||||
@ -143,7 +144,7 @@ class KnowledgeService {
|
|||||||
this.getRagApplication(base)
|
this.getRagApplication(base)
|
||||||
}
|
}
|
||||||
|
|
||||||
public reset = async (_: Electron.IpcMainInvokeEvent, { base }: { base: KnowledgeBaseParams }): Promise<void> => {
|
public reset = async (_: Electron.IpcMainInvokeEvent, base: KnowledgeBaseParams): Promise<void> => {
|
||||||
const ragApplication = await this.getRagApplication(base)
|
const ragApplication = await this.getRagApplication(base)
|
||||||
await ragApplication.reset()
|
await ragApplication.reset()
|
||||||
}
|
}
|
||||||
@ -333,6 +334,7 @@ class KnowledgeService {
|
|||||||
): LoaderTask {
|
): LoaderTask {
|
||||||
const { base, item, forceReload } = options
|
const { base, item, forceReload } = options
|
||||||
const content = item.content as string
|
const content = item.content as string
|
||||||
|
const sourceUrl = (item as any).sourceUrl
|
||||||
|
|
||||||
const encoder = new TextEncoder()
|
const encoder = new TextEncoder()
|
||||||
const contentBytes = encoder.encode(content)
|
const contentBytes = encoder.encode(content)
|
||||||
@ -342,7 +344,12 @@ class KnowledgeService {
|
|||||||
state: LoaderTaskItemState.PENDING,
|
state: LoaderTaskItemState.PENDING,
|
||||||
task: () => {
|
task: () => {
|
||||||
const loaderReturn = ragApplication.addLoader(
|
const loaderReturn = ragApplication.addLoader(
|
||||||
new TextLoader({ text: content, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }),
|
new NoteLoader({
|
||||||
|
text: content,
|
||||||
|
sourceUrl,
|
||||||
|
chunkSize: base.chunkSize,
|
||||||
|
chunkOverlap: base.chunkOverlap
|
||||||
|
}),
|
||||||
forceReload
|
forceReload
|
||||||
) as Promise<LoaderReturn>
|
) as Promise<LoaderReturn>
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,5 @@
|
|||||||
import { Search } from 'lucide-react'
|
import { Search } from 'lucide-react'
|
||||||
import { motion } from 'motion/react'
|
import { motion } from 'motion/react'
|
||||||
import { useTranslation } from 'react-i18next'
|
|
||||||
import styled from 'styled-components'
|
import styled from 'styled-components'
|
||||||
|
|
||||||
interface Props {
|
interface Props {
|
||||||
@ -18,7 +17,6 @@ const spinnerVariants = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export default function Spinner({ text }: Props) {
|
export default function Spinner({ text }: Props) {
|
||||||
const { t } = useTranslation()
|
|
||||||
return (
|
return (
|
||||||
<Searching
|
<Searching
|
||||||
variants={spinnerVariants}
|
variants={spinnerVariants}
|
||||||
@ -31,7 +29,7 @@ export default function Spinner({ text }: Props) {
|
|||||||
ease: 'easeInOut'
|
ease: 'easeInOut'
|
||||||
}}>
|
}}>
|
||||||
<Search size={16} style={{ color: 'unset' }} />
|
<Search size={16} style={{ color: 'unset' }} />
|
||||||
<span>{t(text)}</span>
|
<span>{text}</span>
|
||||||
</Searching>
|
</Searching>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,6 +3,7 @@ export const DEFAULT_CONTEXTCOUNT = 5
|
|||||||
export const DEFAULT_MAX_TOKENS = 4096
|
export const DEFAULT_MAX_TOKENS = 4096
|
||||||
export const DEFAULT_KNOWLEDGE_DOCUMENT_COUNT = 6
|
export const DEFAULT_KNOWLEDGE_DOCUMENT_COUNT = 6
|
||||||
export const DEFAULT_KNOWLEDGE_THRESHOLD = 0.0
|
export const DEFAULT_KNOWLEDGE_THRESHOLD = 0.0
|
||||||
|
export const DEFAULT_WEBSEARCH_RAG_DOCUMENT_COUNT = 1
|
||||||
|
|
||||||
export const platform = window.electron?.process?.platform
|
export const platform = window.electron?.process?.platform
|
||||||
export const isMac = platform === 'darwin'
|
export const isMac = platform === 'darwin'
|
||||||
|
|||||||
@ -1,9 +1,12 @@
|
|||||||
import { useAppDispatch, useAppSelector } from '@renderer/store'
|
import { useAppDispatch, useAppSelector } from '@renderer/store'
|
||||||
import {
|
import {
|
||||||
addSubscribeSource as _addSubscribeSource,
|
addSubscribeSource as _addSubscribeSource,
|
||||||
|
type CompressionConfig,
|
||||||
removeSubscribeSource as _removeSubscribeSource,
|
removeSubscribeSource as _removeSubscribeSource,
|
||||||
|
setCompressionConfig,
|
||||||
setDefaultProvider as _setDefaultProvider,
|
setDefaultProvider as _setDefaultProvider,
|
||||||
setSubscribeSources as _setSubscribeSources,
|
setSubscribeSources as _setSubscribeSources,
|
||||||
|
updateCompressionConfig,
|
||||||
updateSubscribeBlacklist as _updateSubscribeBlacklist,
|
updateSubscribeBlacklist as _updateSubscribeBlacklist,
|
||||||
updateWebSearchProvider,
|
updateWebSearchProvider,
|
||||||
updateWebSearchProviders
|
updateWebSearchProviders
|
||||||
@ -90,3 +93,14 @@ export const useBlacklist = () => {
|
|||||||
setSubscribeSources
|
setSubscribeSources
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const useWebSearchSettings = () => {
|
||||||
|
const state = useAppSelector((state) => state.websearch)
|
||||||
|
const dispatch = useAppDispatch()
|
||||||
|
|
||||||
|
return {
|
||||||
|
...state,
|
||||||
|
setCompressionConfig: (config: CompressionConfig) => dispatch(setCompressionConfig(config)),
|
||||||
|
updateCompressionConfig: (config: Partial<CompressionConfig>) => dispatch(updateCompressionConfig(config))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -703,6 +703,13 @@
|
|||||||
"success.siyuan.export": "Successfully exported to Siyuan Note",
|
"success.siyuan.export": "Successfully exported to Siyuan Note",
|
||||||
"warn.yuque.exporting": "Exporting to Yuque, please do not request export repeatedly!",
|
"warn.yuque.exporting": "Exporting to Yuque, please do not request export repeatedly!",
|
||||||
"warn.siyuan.exporting": "Exporting to Siyuan Note, please do not request export repeatedly!",
|
"warn.siyuan.exporting": "Exporting to Siyuan Note, please do not request export repeatedly!",
|
||||||
|
"websearch": {
|
||||||
|
"rag": "Executing RAG...",
|
||||||
|
"rag_complete": "Keeping {{countAfter}} out of {{countBefore}} results...",
|
||||||
|
"rag_failed": "RAG failed, returning empty results...",
|
||||||
|
"cutoff": "Truncating search content...",
|
||||||
|
"fetch_complete": "Completed {{count}} searches..."
|
||||||
|
},
|
||||||
"download.success": "Download successfully",
|
"download.success": "Download successfully",
|
||||||
"download.failed": "Download failed"
|
"download.failed": "Download failed"
|
||||||
},
|
},
|
||||||
@ -776,6 +783,7 @@
|
|||||||
"dimensions": "Dimensions {{dimensions}}",
|
"dimensions": "Dimensions {{dimensions}}",
|
||||||
"edit": "Edit Model",
|
"edit": "Edit Model",
|
||||||
"embedding": "Embedding",
|
"embedding": "Embedding",
|
||||||
|
"embedding_dimensions": "Embedding Dimensions",
|
||||||
"embedding_model": "Embedding Model",
|
"embedding_model": "Embedding Model",
|
||||||
"embedding_model_tooltip": "Add in Settings->Model Provider->Manage",
|
"embedding_model_tooltip": "Add in Settings->Model Provider->Manage",
|
||||||
"function_calling": "Function Calling",
|
"function_calling": "Function Calling",
|
||||||
@ -1845,8 +1853,33 @@
|
|||||||
"overwrite_tooltip": "Force use search service instead of LLM",
|
"overwrite_tooltip": "Force use search service instead of LLM",
|
||||||
"apikey": "API key",
|
"apikey": "API key",
|
||||||
"free": "Free",
|
"free": "Free",
|
||||||
"content_limit": "Content length limit",
|
"compression": {
|
||||||
"content_limit_tooltip": "Limit the content length of the search results; content that exceeds the limit will be truncated."
|
"title": "Search Result Compression",
|
||||||
|
"method": "Compression Method",
|
||||||
|
"method.none": "None",
|
||||||
|
"method.cutoff": "Cutoff",
|
||||||
|
"cutoff.limit": "Cutoff Limit",
|
||||||
|
"cutoff.limit.placeholder": "Enter length",
|
||||||
|
"cutoff.limit.tooltip": "Limit the content length of search results, content exceeding the limit will be truncated (e.g., 2000 characters)",
|
||||||
|
"cutoff.unit.char": "Char",
|
||||||
|
"cutoff.unit.token": "Token",
|
||||||
|
"method.rag": "RAG",
|
||||||
|
"rag.document_count": "Document Count",
|
||||||
|
"rag.document_count.default": "Default",
|
||||||
|
"rag.document_count.tooltip": "Expected number of documents to extract from each search result, the actual total number of extracted documents is this value multiplied by the number of search results.",
|
||||||
|
"rag.embedding_dimensions.auto_get": "Auto Get Dimensions",
|
||||||
|
"rag.embedding_dimensions.placeholder": "Leave empty",
|
||||||
|
"rag.embedding_dimensions.tooltip": "If left blank, the dimensions parameter will not be passed",
|
||||||
|
"info": {
|
||||||
|
"dimensions_auto_success": "Dimensions auto-obtained successfully, dimensions: {{dimensions}}"
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"embedding_model_required": "Please select an embedding model first",
|
||||||
|
"dimensions_auto_failed": "Failed to auto-obtain dimensions",
|
||||||
|
"provider_not_found": "Provider not found",
|
||||||
|
"rag_failed": "RAG failed"
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"quickPhrase": {
|
"quickPhrase": {
|
||||||
"title": "Quick Phrases",
|
"title": "Quick Phrases",
|
||||||
|
|||||||
@ -702,6 +702,13 @@
|
|||||||
"warn.yuque.exporting": "語雀にエクスポート中です。重複してエクスポートしないでください!",
|
"warn.yuque.exporting": "語雀にエクスポート中です。重複してエクスポートしないでください!",
|
||||||
"warn.siyuan.exporting": "思源ノートにエクスポート中です。重複してエクスポートしないでください!",
|
"warn.siyuan.exporting": "思源ノートにエクスポート中です。重複してエクスポートしないでください!",
|
||||||
"error.yuque.no_config": "語雀のAPIアドレスまたはトークンが設定されていません",
|
"error.yuque.no_config": "語雀のAPIアドレスまたはトークンが設定されていません",
|
||||||
|
"websearch": {
|
||||||
|
"rag": "RAGを実行中...",
|
||||||
|
"rag_complete": "{{countBefore}}個の結果から{{countAfter}}個を保持...",
|
||||||
|
"rag_failed": "RAGが失敗しました。空の結果を返します...",
|
||||||
|
"cutoff": "検索内容を切り詰めています...",
|
||||||
|
"fetch_complete": "{{count}}回の検索を完了しました..."
|
||||||
|
},
|
||||||
"download.success": "ダウンロードに成功しました",
|
"download.success": "ダウンロードに成功しました",
|
||||||
"download.failed": "ダウンロードに失敗しました",
|
"download.failed": "ダウンロードに失敗しました",
|
||||||
"error.fetchTopicName": "トピック名の取得に失敗しました"
|
"error.fetchTopicName": "トピック名の取得に失敗しました"
|
||||||
@ -776,6 +783,7 @@
|
|||||||
"dimensions": "{{dimensions}} 次元",
|
"dimensions": "{{dimensions}} 次元",
|
||||||
"edit": "モデルを編集",
|
"edit": "モデルを編集",
|
||||||
"embedding": "埋め込み",
|
"embedding": "埋め込み",
|
||||||
|
"embedding_dimensions": "埋め込み次元",
|
||||||
"embedding_model": "埋め込み模型",
|
"embedding_model": "埋め込み模型",
|
||||||
"embedding_model_tooltip": "設定->モデルサービス->管理で追加",
|
"embedding_model_tooltip": "設定->モデルサービス->管理で追加",
|
||||||
"function_calling": "関数呼び出し",
|
"function_calling": "関数呼び出し",
|
||||||
@ -1826,8 +1834,33 @@
|
|||||||
"overwrite_tooltip": "大規模言語モデルではなく、サービス検索を使用する",
|
"overwrite_tooltip": "大規模言語モデルではなく、サービス検索を使用する",
|
||||||
"apikey": "API キー",
|
"apikey": "API キー",
|
||||||
"free": "無料",
|
"free": "無料",
|
||||||
"content_limit": "内容の長さ制限",
|
"compression": {
|
||||||
"content_limit_tooltip": "検索結果の内容長を制限し、制限を超える内容は切り捨てられます。"
|
"title": "検索結果の圧縮",
|
||||||
|
"method": "圧縮方法",
|
||||||
|
"method.none": "圧縮しない",
|
||||||
|
"method.cutoff": "切り捨て",
|
||||||
|
"cutoff.limit": "切り捨て長",
|
||||||
|
"cutoff.limit.placeholder": "長さを入力",
|
||||||
|
"cutoff.limit.tooltip": "検索結果の内容長を制限し、制限を超える内容は切り捨てられます(例:2000文字)",
|
||||||
|
"cutoff.unit.char": "文字",
|
||||||
|
"cutoff.unit.token": "トークン",
|
||||||
|
"method.rag": "RAG",
|
||||||
|
"rag.document_count": "文書数",
|
||||||
|
"rag.document_count.default": "デフォルト",
|
||||||
|
"rag.document_count.tooltip": "単一の検索結果から抽出する文書数。実際に抽出される文書数は、この値に検索結果数を乗じたものです。",
|
||||||
|
"rag.embedding_dimensions.auto_get": "次元を自動取得",
|
||||||
|
"rag.embedding_dimensions.placeholder": "次元を設定しない",
|
||||||
|
"rag.embedding_dimensions.tooltip": "空の場合、dimensions パラメーターは渡されません",
|
||||||
|
"info": {
|
||||||
|
"dimensions_auto_success": "次元が自動取得されました。次元: {{dimensions}}"
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"embedding_model_required": "まず埋め込みモデルを選択してください",
|
||||||
|
"dimensions_auto_failed": "次元の自動取得に失敗しました",
|
||||||
|
"provider_not_found": "プロバイダーが見つかりません",
|
||||||
|
"rag_failed": "RAG に失敗しました"
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"general.auto_check_update.title": "自動更新",
|
"general.auto_check_update.title": "自動更新",
|
||||||
"general.early_access.title": "早期アクセス",
|
"general.early_access.title": "早期アクセス",
|
||||||
|
|||||||
@ -702,6 +702,13 @@
|
|||||||
"success.siyuan.export": "Успешный экспорт в Siyuan",
|
"success.siyuan.export": "Успешный экспорт в Siyuan",
|
||||||
"warn.yuque.exporting": "Экспортируется в Yuque, пожалуйста, не отправляйте повторные запросы!",
|
"warn.yuque.exporting": "Экспортируется в Yuque, пожалуйста, не отправляйте повторные запросы!",
|
||||||
"warn.siyuan.exporting": "Экспортируется в Siyuan, пожалуйста, не отправляйте повторные запросы!",
|
"warn.siyuan.exporting": "Экспортируется в Siyuan, пожалуйста, не отправляйте повторные запросы!",
|
||||||
|
"websearch": {
|
||||||
|
"rag": "Выполнение RAG...",
|
||||||
|
"rag_complete": "Сохранено {{countAfter}} из {{countBefore}} результатов...",
|
||||||
|
"rag_failed": "RAG не удалось, возвращается пустой результат...",
|
||||||
|
"cutoff": "Обрезка содержимого поиска...",
|
||||||
|
"fetch_complete": "Завершено {{count}} поисков..."
|
||||||
|
},
|
||||||
"download.success": "Скачано успешно",
|
"download.success": "Скачано успешно",
|
||||||
"download.failed": "Скачивание не удалось",
|
"download.failed": "Скачивание не удалось",
|
||||||
"error.fetchTopicName": "Не удалось назвать топик"
|
"error.fetchTopicName": "Не удалось назвать топик"
|
||||||
@ -776,6 +783,7 @@
|
|||||||
"dimensions": "{{dimensions}} мер",
|
"dimensions": "{{dimensions}} мер",
|
||||||
"edit": "Редактировать модель",
|
"edit": "Редактировать модель",
|
||||||
"embedding": "Встраиваемые",
|
"embedding": "Встраиваемые",
|
||||||
|
"embedding_dimensions": "Встраиваемые размерности",
|
||||||
"embedding_model": "Встраиваемые модели",
|
"embedding_model": "Встраиваемые модели",
|
||||||
"embedding_model_tooltip": "Добавьте в настройки->модель сервиса->управление",
|
"embedding_model_tooltip": "Добавьте в настройки->модель сервиса->управление",
|
||||||
"function_calling": "Вызов функции",
|
"function_calling": "Вызов функции",
|
||||||
@ -1826,8 +1834,33 @@
|
|||||||
"overwrite_tooltip": "Использовать провайдера поиска вместо LLM",
|
"overwrite_tooltip": "Использовать провайдера поиска вместо LLM",
|
||||||
"apikey": "API ключ",
|
"apikey": "API ключ",
|
||||||
"free": "Бесплатно",
|
"free": "Бесплатно",
|
||||||
"content_limit": "Ограничение длины текста",
|
"compression": {
|
||||||
"content_limit_tooltip": "Ограничьте длину содержимого результатов поиска, контент, превышающий ограничение, будет обрезан."
|
"title": "Сжатие результатов поиска",
|
||||||
|
"method": "Метод сжатия",
|
||||||
|
"method.none": "Не сжимать",
|
||||||
|
"method.cutoff": "Обрезка",
|
||||||
|
"cutoff.limit": "Лимит обрезки",
|
||||||
|
"cutoff.limit.placeholder": "Введите длину",
|
||||||
|
"cutoff.limit.tooltip": "Ограничьте длину содержимого результатов поиска, контент, превышающий ограничение, будет обрезан (например, 2000 символов)",
|
||||||
|
"cutoff.unit.char": "Символы",
|
||||||
|
"cutoff.unit.token": "Токены",
|
||||||
|
"method.rag": "RAG",
|
||||||
|
"rag.document_count": "Количество документов",
|
||||||
|
"rag.document_count.default": "По умолчанию",
|
||||||
|
"rag.document_count.tooltip": "Ожидаемое количество документов, которые будут извлечены из каждого результата поиска. Фактическое количество извлеченных документов равно этому значению, умноженному на количество результатов поиска.",
|
||||||
|
"rag.embedding_dimensions.auto_get": "Автоматически получить размерности",
|
||||||
|
"rag.embedding_dimensions.placeholder": "Не устанавливать размерности",
|
||||||
|
"rag.embedding_dimensions.tooltip": "Если оставить пустым, параметр dimensions не будет передан",
|
||||||
|
"info": {
|
||||||
|
"dimensions_auto_success": "Размерности успешно получены, размерности: {{dimensions}}"
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"embedding_model_required": "Пожалуйста, сначала выберите модель встраивания",
|
||||||
|
"dimensions_auto_failed": "Не удалось получить размерности",
|
||||||
|
"provider_not_found": "Поставщик не найден",
|
||||||
|
"rag_failed": "RAG не удалось"
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"general.auto_check_update.title": "Автоматическое обновление",
|
"general.auto_check_update.title": "Автоматическое обновление",
|
||||||
"general.early_access.title": "Ранний доступ",
|
"general.early_access.title": "Ранний доступ",
|
||||||
|
|||||||
@ -703,6 +703,13 @@
|
|||||||
"success.siyuan.export": "导出到思源笔记成功",
|
"success.siyuan.export": "导出到思源笔记成功",
|
||||||
"warn.yuque.exporting": "正在导出语雀, 请勿重复请求导出!",
|
"warn.yuque.exporting": "正在导出语雀, 请勿重复请求导出!",
|
||||||
"warn.siyuan.exporting": "正在导出到思源笔记,请勿重复请求导出!",
|
"warn.siyuan.exporting": "正在导出到思源笔记,请勿重复请求导出!",
|
||||||
|
"websearch": {
|
||||||
|
"rag": "正在执行 RAG...",
|
||||||
|
"rag_complete": "保留 {{countBefore}} 个结果中的 {{countAfter}} 个...",
|
||||||
|
"rag_failed": "RAG 失败,返回空结果...",
|
||||||
|
"cutoff": "正在截断搜索内容...",
|
||||||
|
"fetch_complete": "已完成 {{count}} 次搜索..."
|
||||||
|
},
|
||||||
"download.success": "下载成功",
|
"download.success": "下载成功",
|
||||||
"download.failed": "下载失败"
|
"download.failed": "下载失败"
|
||||||
},
|
},
|
||||||
@ -776,6 +783,7 @@
|
|||||||
"dimensions": "{{dimensions}} 维",
|
"dimensions": "{{dimensions}} 维",
|
||||||
"edit": "编辑模型",
|
"edit": "编辑模型",
|
||||||
"embedding": "嵌入",
|
"embedding": "嵌入",
|
||||||
|
"embedding_dimensions": "嵌入维度",
|
||||||
"embedding_model": "嵌入模型",
|
"embedding_model": "嵌入模型",
|
||||||
"embedding_model_tooltip": "在设置->模型服务中点击管理按钮添加",
|
"embedding_model_tooltip": "在设置->模型服务中点击管理按钮添加",
|
||||||
"function_calling": "函数调用",
|
"function_calling": "函数调用",
|
||||||
@ -1845,8 +1853,33 @@
|
|||||||
"title": "网络搜索",
|
"title": "网络搜索",
|
||||||
"apikey": "API 密钥",
|
"apikey": "API 密钥",
|
||||||
"free": "免费",
|
"free": "免费",
|
||||||
"content_limit": "内容长度限制",
|
"compression": {
|
||||||
"content_limit_tooltip": "限制搜索结果的内容长度, 超过限制的内容将被截断"
|
"title": "搜索结果压缩",
|
||||||
|
"method": "压缩方法",
|
||||||
|
"method.none": "不压缩",
|
||||||
|
"method.cutoff": "截断",
|
||||||
|
"cutoff.limit": "截断长度",
|
||||||
|
"cutoff.limit.placeholder": "输入长度",
|
||||||
|
"cutoff.limit.tooltip": "限制搜索结果的内容长度, 超过限制的内容将被截断(例如 2000 字符)",
|
||||||
|
"cutoff.unit.char": "字符",
|
||||||
|
"cutoff.unit.token": "Token",
|
||||||
|
"method.rag": "RAG",
|
||||||
|
"rag.document_count": "文档数量",
|
||||||
|
"rag.document_count.default": "默认",
|
||||||
|
"rag.document_count.tooltip": "预期从单个搜索结果中提取的文档数量,实际提取的总数量是这个值乘以搜索结果数量。",
|
||||||
|
"rag.embedding_dimensions.auto_get": "自动获取维度",
|
||||||
|
"rag.embedding_dimensions.placeholder": "不设置维度",
|
||||||
|
"rag.embedding_dimensions.tooltip": "留空则不传递 dimensions 参数",
|
||||||
|
"info": {
|
||||||
|
"dimensions_auto_success": "维度自动获取成功,维度为 {{dimensions}}"
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"embedding_model_required": "请先选择嵌入模型",
|
||||||
|
"dimensions_auto_failed": "维度自动获取失败",
|
||||||
|
"provider_not_found": "未找到服务商",
|
||||||
|
"rag_failed": "RAG 失败"
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"quickPhrase": {
|
"quickPhrase": {
|
||||||
"title": "快捷短语",
|
"title": "快捷短语",
|
||||||
|
|||||||
@ -703,6 +703,13 @@
|
|||||||
"success.siyuan.export": "導出到思源筆記成功",
|
"success.siyuan.export": "導出到思源筆記成功",
|
||||||
"warn.yuque.exporting": "正在導出語雀,請勿重複請求導出!",
|
"warn.yuque.exporting": "正在導出語雀,請勿重複請求導出!",
|
||||||
"warn.siyuan.exporting": "正在導出到思源筆記,請勿重複請求導出!",
|
"warn.siyuan.exporting": "正在導出到思源筆記,請勿重複請求導出!",
|
||||||
|
"websearch": {
|
||||||
|
"rag": "正在執行 RAG...",
|
||||||
|
"rag_complete": "保留 {{countBefore}} 個結果中的 {{countAfter}} 個...",
|
||||||
|
"rag_failed": "RAG 失敗,返回空結果...",
|
||||||
|
"cutoff": "正在截斷搜尋內容...",
|
||||||
|
"fetch_complete": "已完成 {{count}} 次搜尋..."
|
||||||
|
},
|
||||||
"download.success": "下載成功",
|
"download.success": "下載成功",
|
||||||
"download.failed": "下載失敗"
|
"download.failed": "下載失敗"
|
||||||
},
|
},
|
||||||
@ -776,6 +783,7 @@
|
|||||||
"dimensions": "{{dimensions}} 維",
|
"dimensions": "{{dimensions}} 維",
|
||||||
"edit": "編輯模型",
|
"edit": "編輯模型",
|
||||||
"embedding": "嵌入",
|
"embedding": "嵌入",
|
||||||
|
"embedding_dimensions": "嵌入維度",
|
||||||
"embedding_model": "嵌入模型",
|
"embedding_model": "嵌入模型",
|
||||||
"embedding_model_tooltip": "在設定->模型服務中點選管理按鈕新增",
|
"embedding_model_tooltip": "在設定->模型服務中點選管理按鈕新增",
|
||||||
"function_calling": "函數調用",
|
"function_calling": "函數調用",
|
||||||
@ -1829,8 +1837,33 @@
|
|||||||
"overwrite_tooltip": "強制使用搜尋服務商而不是大語言模型進行搜尋",
|
"overwrite_tooltip": "強制使用搜尋服務商而不是大語言模型進行搜尋",
|
||||||
"apikey": "API 金鑰",
|
"apikey": "API 金鑰",
|
||||||
"free": "免費",
|
"free": "免費",
|
||||||
"content_limit": "內容長度限制",
|
"compression": {
|
||||||
"content_limit_tooltip": "限制搜尋結果的內容長度,超過限制的內容將被截斷"
|
"title": "搜尋結果壓縮",
|
||||||
|
"method": "壓縮方法",
|
||||||
|
"method.none": "不壓縮",
|
||||||
|
"method.cutoff": "截斷",
|
||||||
|
"cutoff.limit": "截斷長度",
|
||||||
|
"cutoff.limit.placeholder": "輸入長度",
|
||||||
|
"cutoff.limit.tooltip": "限制搜尋結果的內容長度,超過限制的內容將被截斷(例如 2000 字符)",
|
||||||
|
"cutoff.unit.char": "字符",
|
||||||
|
"cutoff.unit.token": "Token",
|
||||||
|
"method.rag": "RAG",
|
||||||
|
"rag.document_count": "文檔數量",
|
||||||
|
"rag.document_count.default": "預設",
|
||||||
|
"rag.document_count.tooltip": "預期從單個搜尋結果中提取的文檔數量,實際提取的總數量是這個值乘以搜尋結果數量。",
|
||||||
|
"rag.embedding_dimensions.auto_get": "自動獲取維度",
|
||||||
|
"rag.embedding_dimensions.placeholder": "不設置維度",
|
||||||
|
"rag.embedding_dimensions.tooltip": "留空則不傳遞 dimensions 參數",
|
||||||
|
"info": {
|
||||||
|
"dimensions_auto_success": "維度自動獲取成功,維度為 {{dimensions}}"
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"embedding_model_required": "請先選擇嵌入模型",
|
||||||
|
"dimensions_auto_failed": "維度自動獲取失敗",
|
||||||
|
"provider_not_found": "未找到服務商",
|
||||||
|
"rag_failed": "RAG 失敗"
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"general.auto_check_update.title": "自動更新",
|
"general.auto_check_update.title": "自動更新",
|
||||||
"general.early_access.title": "搶先體驗",
|
"general.early_access.title": "搶先體驗",
|
||||||
|
|||||||
@ -5,13 +5,19 @@ import { selectFormattedCitationsByBlockId } from '@renderer/store/messageBlock'
|
|||||||
import { WebSearchSource } from '@renderer/types'
|
import { WebSearchSource } from '@renderer/types'
|
||||||
import { type CitationMessageBlock, MessageBlockStatus } from '@renderer/types/newMessage'
|
import { type CitationMessageBlock, MessageBlockStatus } from '@renderer/types/newMessage'
|
||||||
import React, { useMemo } from 'react'
|
import React, { useMemo } from 'react'
|
||||||
|
import { useTranslation } from 'react-i18next'
|
||||||
import { useSelector } from 'react-redux'
|
import { useSelector } from 'react-redux'
|
||||||
import styled from 'styled-components'
|
import styled from 'styled-components'
|
||||||
|
|
||||||
import CitationsList from '../CitationsList'
|
import CitationsList from '../CitationsList'
|
||||||
|
|
||||||
function CitationBlock({ block }: { block: CitationMessageBlock }) {
|
function CitationBlock({ block }: { block: CitationMessageBlock }) {
|
||||||
|
const { t } = useTranslation()
|
||||||
const formattedCitations = useSelector((state: RootState) => selectFormattedCitationsByBlockId(state, block.id))
|
const formattedCitations = useSelector((state: RootState) => selectFormattedCitationsByBlockId(state, block.id))
|
||||||
|
const { websearch } = useSelector((state: RootState) => state.runtime)
|
||||||
|
const message = useSelector((state: RootState) => state.messages.entities[block.messageId])
|
||||||
|
const userMessageId = message?.askId || block.messageId // 如果没有 askId 则回退到 messageId
|
||||||
|
|
||||||
const hasGeminiBlock = block.response?.source === WebSearchSource.GEMINI
|
const hasGeminiBlock = block.response?.source === WebSearchSource.GEMINI
|
||||||
const hasCitations = useMemo(() => {
|
const hasCitations = useMemo(() => {
|
||||||
return (
|
return (
|
||||||
@ -21,8 +27,32 @@ function CitationBlock({ block }: { block: CitationMessageBlock }) {
|
|||||||
)
|
)
|
||||||
}, [formattedCitations, block.knowledge, hasGeminiBlock])
|
}, [formattedCitations, block.knowledge, hasGeminiBlock])
|
||||||
|
|
||||||
|
const getWebSearchStatusText = (requestId: string) => {
|
||||||
|
const status = websearch.activeSearches[requestId] ?? { phase: 'default' }
|
||||||
|
|
||||||
|
switch (status.phase) {
|
||||||
|
case 'fetch_complete':
|
||||||
|
return t('message.websearch.fetch_complete', {
|
||||||
|
count: status.countAfter ?? 0
|
||||||
|
})
|
||||||
|
case 'rag':
|
||||||
|
return t('message.websearch.rag')
|
||||||
|
case 'rag_complete':
|
||||||
|
return t('message.websearch.rag_complete', {
|
||||||
|
countBefore: status.countBefore ?? 0,
|
||||||
|
countAfter: status.countAfter ?? 0
|
||||||
|
})
|
||||||
|
case 'rag_failed':
|
||||||
|
return t('message.websearch.rag_failed')
|
||||||
|
case 'cutoff':
|
||||||
|
return t('message.websearch.cutoff')
|
||||||
|
default:
|
||||||
|
return t('message.searching')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (block.status === MessageBlockStatus.PROCESSING) {
|
if (block.status === MessageBlockStatus.PROCESSING) {
|
||||||
return <Spinner text="message.searching" />
|
return <Spinner text={getWebSearchStatusText(userMessageId)} />
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!hasCitations) {
|
if (!hasCitations) {
|
||||||
|
|||||||
@ -41,7 +41,8 @@ const PopupContainer: React.FC<Props> = ({ base, resolve }) => {
|
|||||||
const searchResults = await searchKnowledgeBase(value, base)
|
const searchResults = await searchKnowledgeBase(value, base)
|
||||||
setResults(searchResults)
|
setResults(searchResults)
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Search failed:', error)
|
console.error(`Failed to search knowledge base ${base.name}:`, error)
|
||||||
|
setResults([])
|
||||||
} finally {
|
} finally {
|
||||||
setLoading(false)
|
setLoading(false)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,18 +1,16 @@
|
|||||||
import { useTheme } from '@renderer/context/ThemeProvider'
|
import { useTheme } from '@renderer/context/ThemeProvider'
|
||||||
import { useAppDispatch, useAppSelector } from '@renderer/store'
|
import { useWebSearchSettings } from '@renderer/hooks/useWebSearchProviders'
|
||||||
import { setContentLimit, setMaxResult, setSearchWithTime } from '@renderer/store/websearch'
|
import { useAppDispatch } from '@renderer/store'
|
||||||
import { Input, Slider, Switch, Tooltip } from 'antd'
|
import { setMaxResult, setSearchWithTime } from '@renderer/store/websearch'
|
||||||
|
import { Slider, Switch } from 'antd'
|
||||||
import { t } from 'i18next'
|
import { t } from 'i18next'
|
||||||
import { Info } from 'lucide-react'
|
|
||||||
import { FC } from 'react'
|
import { FC } from 'react'
|
||||||
|
|
||||||
import { SettingDivider, SettingGroup, SettingRow, SettingRowTitle, SettingTitle } from '..'
|
import { SettingDivider, SettingGroup, SettingRow, SettingRowTitle, SettingTitle } from '..'
|
||||||
|
|
||||||
const BasicSettings: FC = () => {
|
const BasicSettings: FC = () => {
|
||||||
const { theme } = useTheme()
|
const { theme } = useTheme()
|
||||||
const searchWithTime = useAppSelector((state) => state.websearch.searchWithTime)
|
const { searchWithTime, maxResults } = useWebSearchSettings()
|
||||||
const maxResults = useAppSelector((state) => state.websearch.maxResults)
|
|
||||||
const contentLimit = useAppSelector((state) => state.websearch.contentLimit)
|
|
||||||
|
|
||||||
const dispatch = useAppDispatch()
|
const dispatch = useAppDispatch()
|
||||||
|
|
||||||
@ -38,28 +36,6 @@ const BasicSettings: FC = () => {
|
|||||||
onChangeComplete={(value) => dispatch(setMaxResult(value))}
|
onChangeComplete={(value) => dispatch(setMaxResult(value))}
|
||||||
/>
|
/>
|
||||||
</SettingRow>
|
</SettingRow>
|
||||||
<SettingDivider style={{ marginTop: 15, marginBottom: 10 }} />
|
|
||||||
<SettingRow>
|
|
||||||
<SettingRowTitle>
|
|
||||||
{t('settings.websearch.content_limit')}
|
|
||||||
<Tooltip title={t('settings.websearch.content_limit_tooltip')} placement="right">
|
|
||||||
<Info size={16} color="var(--color-icon)" style={{ marginLeft: 5, cursor: 'pointer' }} />
|
|
||||||
</Tooltip>
|
|
||||||
</SettingRowTitle>
|
|
||||||
<Input
|
|
||||||
style={{ width: '100px' }}
|
|
||||||
placeholder="2000"
|
|
||||||
value={contentLimit === undefined ? '' : contentLimit}
|
|
||||||
onChange={(e) => {
|
|
||||||
const value = e.target.value
|
|
||||||
if (value === '') {
|
|
||||||
dispatch(setContentLimit(undefined))
|
|
||||||
} else if (!isNaN(Number(value)) && Number(value) > 0) {
|
|
||||||
dispatch(setContentLimit(Number(value)))
|
|
||||||
}
|
|
||||||
}}
|
|
||||||
/>
|
|
||||||
</SettingRow>
|
|
||||||
</SettingGroup>
|
</SettingGroup>
|
||||||
</>
|
</>
|
||||||
)
|
)
|
||||||
|
|||||||
@ -0,0 +1,60 @@
|
|||||||
|
import { useWebSearchSettings } from '@renderer/hooks/useWebSearchProviders'
|
||||||
|
import { SettingRow, SettingRowTitle } from '@renderer/pages/settings'
|
||||||
|
import { Input, Select, Space, Tooltip } from 'antd'
|
||||||
|
import { ChevronDown, Info } from 'lucide-react'
|
||||||
|
import { useTranslation } from 'react-i18next'
|
||||||
|
|
||||||
|
const INPUT_BOX_WIDTH = '200px'
|
||||||
|
|
||||||
|
const CutoffSettings = () => {
|
||||||
|
const { t } = useTranslation()
|
||||||
|
const { compressionConfig, updateCompressionConfig } = useWebSearchSettings()
|
||||||
|
|
||||||
|
const handleCutoffLimitChange = (value: number | null) => {
|
||||||
|
updateCompressionConfig({ cutoffLimit: value || undefined })
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleCutoffUnitChange = (unit: 'char' | 'token') => {
|
||||||
|
updateCompressionConfig({ cutoffUnit: unit })
|
||||||
|
}
|
||||||
|
|
||||||
|
const unitOptions = [
|
||||||
|
{ value: 'char', label: t('settings.websearch.compression.cutoff.unit.char') },
|
||||||
|
{ value: 'token', label: t('settings.websearch.compression.cutoff.unit.token') }
|
||||||
|
]
|
||||||
|
|
||||||
|
return (
|
||||||
|
<SettingRow>
|
||||||
|
<SettingRowTitle>
|
||||||
|
{t('settings.websearch.compression.cutoff.limit')}
|
||||||
|
<Tooltip title={t('settings.websearch.compression.cutoff.limit.tooltip')} placement="right">
|
||||||
|
<Info size={16} color="var(--color-icon)" style={{ marginLeft: 5, cursor: 'pointer' }} />
|
||||||
|
</Tooltip>
|
||||||
|
</SettingRowTitle>
|
||||||
|
<Space.Compact style={{ width: INPUT_BOX_WIDTH }}>
|
||||||
|
<Input
|
||||||
|
style={{ maxWidth: '60%' }}
|
||||||
|
placeholder={t('settings.websearch.compression.cutoff.limit.placeholder')}
|
||||||
|
value={compressionConfig?.cutoffLimit === undefined ? '' : compressionConfig.cutoffLimit}
|
||||||
|
onChange={(e) => {
|
||||||
|
const value = e.target.value
|
||||||
|
if (value === '') {
|
||||||
|
handleCutoffLimitChange(null)
|
||||||
|
} else if (!isNaN(Number(value)) && Number(value) > 0) {
|
||||||
|
handleCutoffLimitChange(Number(value))
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
<Select
|
||||||
|
value={compressionConfig?.cutoffUnit || 'char'}
|
||||||
|
style={{ minWidth: '40%' }}
|
||||||
|
onChange={handleCutoffUnitChange}
|
||||||
|
options={unitOptions}
|
||||||
|
suffixIcon={<ChevronDown size={16} color="var(--color-border)" />}
|
||||||
|
/>
|
||||||
|
</Space.Compact>
|
||||||
|
</SettingRow>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default CutoffSettings
|
||||||
@ -0,0 +1,207 @@
|
|||||||
|
import AiProvider from '@renderer/aiCore'
|
||||||
|
import { DEFAULT_WEBSEARCH_RAG_DOCUMENT_COUNT } from '@renderer/config/constant'
|
||||||
|
import Logger from '@renderer/config/logger'
|
||||||
|
import { isEmbeddingModel, isRerankModel } from '@renderer/config/models'
|
||||||
|
import { NOT_SUPPORTED_REANK_PROVIDERS } from '@renderer/config/providers'
|
||||||
|
import { useProviders } from '@renderer/hooks/useProvider'
|
||||||
|
import { useWebSearchSettings } from '@renderer/hooks/useWebSearchProviders'
|
||||||
|
import { SettingDivider, SettingRow, SettingRowTitle } from '@renderer/pages/settings'
|
||||||
|
import { getModelUniqId } from '@renderer/services/ModelService'
|
||||||
|
import { Model } from '@renderer/types'
|
||||||
|
import { Button, InputNumber, Select, Slider, Tooltip } from 'antd'
|
||||||
|
import { find, sortBy } from 'lodash'
|
||||||
|
import { ChevronDown, Info, RefreshCw } from 'lucide-react'
|
||||||
|
import { useMemo, useState } from 'react'
|
||||||
|
import { useTranslation } from 'react-i18next'
|
||||||
|
|
||||||
|
const INPUT_BOX_WIDTH = '200px'
|
||||||
|
|
||||||
|
const RagSettings = () => {
|
||||||
|
const { t } = useTranslation()
|
||||||
|
const { providers } = useProviders()
|
||||||
|
const { compressionConfig, updateCompressionConfig } = useWebSearchSettings()
|
||||||
|
const [loadingDimensions, setLoadingDimensions] = useState(false)
|
||||||
|
|
||||||
|
const embeddingModels = useMemo(() => {
|
||||||
|
return providers
|
||||||
|
.map((p) => p.models)
|
||||||
|
.flat()
|
||||||
|
.filter((model) => isEmbeddingModel(model))
|
||||||
|
}, [providers])
|
||||||
|
|
||||||
|
const rerankModels = useMemo(() => {
|
||||||
|
return providers
|
||||||
|
.map((p) => p.models)
|
||||||
|
.flat()
|
||||||
|
.filter((model) => isRerankModel(model))
|
||||||
|
}, [providers])
|
||||||
|
|
||||||
|
const embeddingSelectOptions = useMemo(() => {
|
||||||
|
return providers
|
||||||
|
.filter((p) => p.models.length > 0)
|
||||||
|
.map((p) => ({
|
||||||
|
label: p.isSystem ? t(`provider.${p.id}`) : p.name,
|
||||||
|
title: p.name,
|
||||||
|
options: sortBy(p.models, 'name')
|
||||||
|
.filter((model) => isEmbeddingModel(model))
|
||||||
|
.map((m) => ({
|
||||||
|
label: m.name,
|
||||||
|
value: getModelUniqId(m),
|
||||||
|
providerId: p.id,
|
||||||
|
modelId: m.id
|
||||||
|
}))
|
||||||
|
}))
|
||||||
|
.filter((group) => group.options.length > 0)
|
||||||
|
}, [providers, t])
|
||||||
|
|
||||||
|
const rerankSelectOptions = useMemo(() => {
|
||||||
|
return providers
|
||||||
|
.filter((p) => p.models.length > 0)
|
||||||
|
.filter((p) => !NOT_SUPPORTED_REANK_PROVIDERS.includes(p.id))
|
||||||
|
.map((p) => ({
|
||||||
|
label: p.isSystem ? t(`provider.${p.id}`) : p.name,
|
||||||
|
title: p.name,
|
||||||
|
options: sortBy(p.models, 'name')
|
||||||
|
.filter((model) => isRerankModel(model))
|
||||||
|
.map((m) => ({
|
||||||
|
label: m.name,
|
||||||
|
value: getModelUniqId(m)
|
||||||
|
}))
|
||||||
|
}))
|
||||||
|
.filter((group) => group.options.length > 0)
|
||||||
|
}, [providers, t])
|
||||||
|
|
||||||
|
const handleEmbeddingModelChange = (modelValue: string) => {
|
||||||
|
const selectedModel = find(embeddingModels, JSON.parse(modelValue)) as Model
|
||||||
|
updateCompressionConfig({ embeddingModel: selectedModel })
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleRerankModelChange = (modelValue?: string) => {
|
||||||
|
const selectedModel = modelValue ? (find(rerankModels, JSON.parse(modelValue)) as Model) : undefined
|
||||||
|
updateCompressionConfig({ rerankModel: selectedModel })
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleEmbeddingDimensionsChange = (value: number | null) => {
|
||||||
|
updateCompressionConfig({ embeddingDimensions: value || undefined })
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleDocumentCountChange = (value: number) => {
|
||||||
|
updateCompressionConfig({ documentCount: value })
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleAutoGetDimensions = async () => {
|
||||||
|
if (!compressionConfig?.embeddingModel) {
|
||||||
|
Logger.log('[RagSettings] handleAutoGetDimensions: no embedding model')
|
||||||
|
window.message.error(t('settings.websearch.compression.error.embedding_model_required'))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const provider = providers.find((p) => p.id === compressionConfig.embeddingModel?.provider)
|
||||||
|
if (!provider) {
|
||||||
|
Logger.log('[RagSettings] handleAutoGetDimensions: provider not found')
|
||||||
|
window.message.error(t('settings.websearch.compression.error.provider_not_found'))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
setLoadingDimensions(true)
|
||||||
|
try {
|
||||||
|
const aiProvider = new AiProvider(provider)
|
||||||
|
const dimensions = await aiProvider.getEmbeddingDimensions(compressionConfig.embeddingModel)
|
||||||
|
|
||||||
|
updateCompressionConfig({ embeddingDimensions: dimensions })
|
||||||
|
|
||||||
|
window.message.success(t('settings.websearch.compression.info.dimensions_auto_success', { dimensions }))
|
||||||
|
} catch (error) {
|
||||||
|
Logger.error('[RagSettings] handleAutoGetDimensions: failed to get embedding dimensions', error)
|
||||||
|
window.message.error(t('settings.websearch.compression.error.dimensions_auto_failed'))
|
||||||
|
} finally {
|
||||||
|
setLoadingDimensions(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<SettingRow>
|
||||||
|
<SettingRowTitle>{t('models.embedding_model')}</SettingRowTitle>
|
||||||
|
<Select
|
||||||
|
value={compressionConfig?.embeddingModel ? getModelUniqId(compressionConfig.embeddingModel) : undefined}
|
||||||
|
style={{ width: INPUT_BOX_WIDTH }}
|
||||||
|
options={embeddingSelectOptions}
|
||||||
|
placeholder={t('settings.models.empty')}
|
||||||
|
onChange={handleEmbeddingModelChange}
|
||||||
|
allowClear={false}
|
||||||
|
showSearch
|
||||||
|
suffixIcon={<ChevronDown size={16} color="var(--color-border)" />}
|
||||||
|
/>
|
||||||
|
</SettingRow>
|
||||||
|
<SettingDivider />
|
||||||
|
|
||||||
|
<SettingRow>
|
||||||
|
<SettingRowTitle>
|
||||||
|
{t('models.embedding_dimensions')}
|
||||||
|
<Tooltip title={t('settings.websearch.compression.rag.embedding_dimensions.tooltip')}>
|
||||||
|
<Info size={16} color="var(--color-icon)" style={{ marginLeft: 5, cursor: 'pointer' }} />
|
||||||
|
</Tooltip>
|
||||||
|
</SettingRowTitle>
|
||||||
|
<div style={{ display: 'flex', gap: '8px', alignItems: 'center', width: INPUT_BOX_WIDTH }}>
|
||||||
|
<InputNumber
|
||||||
|
value={compressionConfig?.embeddingDimensions}
|
||||||
|
style={{ flex: 1 }}
|
||||||
|
placeholder={t('settings.websearch.compression.rag.embedding_dimensions.placeholder')}
|
||||||
|
min={0}
|
||||||
|
onChange={handleEmbeddingDimensionsChange}
|
||||||
|
/>
|
||||||
|
<Tooltip title={t('settings.websearch.compression.rag.embedding_dimensions.auto_get')}>
|
||||||
|
<Button
|
||||||
|
icon={<RefreshCw size={16} />}
|
||||||
|
loading={loadingDimensions}
|
||||||
|
disabled={!compressionConfig?.embeddingModel}
|
||||||
|
onClick={handleAutoGetDimensions}
|
||||||
|
/>
|
||||||
|
</Tooltip>
|
||||||
|
</div>
|
||||||
|
</SettingRow>
|
||||||
|
<SettingDivider />
|
||||||
|
|
||||||
|
<SettingRow>
|
||||||
|
<SettingRowTitle>{t('models.rerank_model')}</SettingRowTitle>
|
||||||
|
<Select
|
||||||
|
value={compressionConfig?.rerankModel ? getModelUniqId(compressionConfig.rerankModel) : undefined}
|
||||||
|
style={{ width: INPUT_BOX_WIDTH }}
|
||||||
|
options={rerankSelectOptions}
|
||||||
|
placeholder={t('settings.models.empty')}
|
||||||
|
onChange={handleRerankModelChange}
|
||||||
|
allowClear
|
||||||
|
showSearch
|
||||||
|
suffixIcon={<ChevronDown size={16} color="var(--color-border)" />}
|
||||||
|
/>
|
||||||
|
</SettingRow>
|
||||||
|
<SettingDivider />
|
||||||
|
|
||||||
|
<SettingRow>
|
||||||
|
<SettingRowTitle>
|
||||||
|
{t('settings.websearch.compression.rag.document_count')}
|
||||||
|
<Tooltip title={t('settings.websearch.compression.rag.document_count.tooltip')} placement="right">
|
||||||
|
<Info size={16} color="var(--color-icon)" style={{ marginLeft: 5, cursor: 'pointer' }} />
|
||||||
|
</Tooltip>
|
||||||
|
</SettingRowTitle>
|
||||||
|
<div style={{ width: INPUT_BOX_WIDTH }}>
|
||||||
|
<Slider
|
||||||
|
value={compressionConfig?.documentCount || DEFAULT_WEBSEARCH_RAG_DOCUMENT_COUNT}
|
||||||
|
min={1}
|
||||||
|
max={10}
|
||||||
|
step={1}
|
||||||
|
onChange={handleDocumentCountChange}
|
||||||
|
marks={{
|
||||||
|
1: t('settings.websearch.compression.rag.document_count.default'),
|
||||||
|
3: '3',
|
||||||
|
10: '10'
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</SettingRow>
|
||||||
|
</>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default RagSettings
|
||||||
@ -0,0 +1,47 @@
|
|||||||
|
import { useWebSearchSettings } from '@renderer/hooks/useWebSearchProviders'
|
||||||
|
import { SettingDivider, SettingGroup, SettingRow, SettingRowTitle, SettingTitle } from '@renderer/pages/settings'
|
||||||
|
import { Select } from 'antd'
|
||||||
|
import { ChevronDown } from 'lucide-react'
|
||||||
|
import { useTranslation } from 'react-i18next'
|
||||||
|
|
||||||
|
import CutoffSettings from './CutoffSettings'
|
||||||
|
import RagSettings from './RagSettings'
|
||||||
|
|
||||||
|
const CompressionSettings = () => {
|
||||||
|
const { t } = useTranslation()
|
||||||
|
const { compressionConfig, updateCompressionConfig } = useWebSearchSettings()
|
||||||
|
|
||||||
|
const compressionMethodOptions = [
|
||||||
|
{ value: 'none', label: t('settings.websearch.compression.method.none') },
|
||||||
|
{ value: 'cutoff', label: t('settings.websearch.compression.method.cutoff') },
|
||||||
|
{ value: 'rag', label: t('settings.websearch.compression.method.rag') }
|
||||||
|
]
|
||||||
|
|
||||||
|
const handleCompressionMethodChange = (method: 'none' | 'cutoff' | 'rag') => {
|
||||||
|
updateCompressionConfig({ method })
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<SettingGroup>
|
||||||
|
<SettingTitle>{t('settings.websearch.compression.title')}</SettingTitle>
|
||||||
|
<SettingDivider />
|
||||||
|
|
||||||
|
<SettingRow>
|
||||||
|
<SettingRowTitle>{t('settings.websearch.compression.method')}</SettingRowTitle>
|
||||||
|
<Select
|
||||||
|
value={compressionConfig?.method || 'none'}
|
||||||
|
style={{ width: '200px' }}
|
||||||
|
onChange={handleCompressionMethodChange}
|
||||||
|
options={compressionMethodOptions}
|
||||||
|
suffixIcon={<ChevronDown size={16} color="var(--color-border)" />}
|
||||||
|
/>
|
||||||
|
</SettingRow>
|
||||||
|
<SettingDivider />
|
||||||
|
|
||||||
|
{compressionConfig?.method === 'cutoff' && <CutoffSettings />}
|
||||||
|
{compressionConfig?.method === 'rag' && <RagSettings />}
|
||||||
|
</SettingGroup>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default CompressionSettings
|
||||||
@ -9,6 +9,7 @@ import { useTranslation } from 'react-i18next'
|
|||||||
import { SettingContainer, SettingDivider, SettingGroup, SettingRow, SettingRowTitle, SettingTitle } from '..'
|
import { SettingContainer, SettingDivider, SettingGroup, SettingRow, SettingRowTitle, SettingTitle } from '..'
|
||||||
import BasicSettings from './BasicSettings'
|
import BasicSettings from './BasicSettings'
|
||||||
import BlacklistSettings from './BlacklistSettings'
|
import BlacklistSettings from './BlacklistSettings'
|
||||||
|
import CompressionSettings from './CompressionSettings'
|
||||||
import WebSearchProviderSetting from './WebSearchProviderSetting'
|
import WebSearchProviderSetting from './WebSearchProviderSetting'
|
||||||
|
|
||||||
const WebSearchSettings: FC = () => {
|
const WebSearchSettings: FC = () => {
|
||||||
@ -56,6 +57,7 @@ const WebSearchSettings: FC = () => {
|
|||||||
</SettingGroup>
|
</SettingGroup>
|
||||||
)}
|
)}
|
||||||
<BasicSettings />
|
<BasicSettings />
|
||||||
|
<CompressionSettings />
|
||||||
<BlacklistSettings />
|
<BlacklistSettings />
|
||||||
</SettingContainer>
|
</SettingContainer>
|
||||||
)
|
)
|
||||||
|
|||||||
@ -26,15 +26,13 @@ export default class BochaProvider extends BaseWebSearchProvider {
|
|||||||
Authorization: `Bearer ${this.apiKey}`
|
Authorization: `Bearer ${this.apiKey}`
|
||||||
}
|
}
|
||||||
|
|
||||||
const contentLimit = websearch.contentLimit
|
|
||||||
|
|
||||||
const params: BochaSearchParams = {
|
const params: BochaSearchParams = {
|
||||||
query,
|
query,
|
||||||
count: websearch.maxResults,
|
count: websearch.maxResults,
|
||||||
exclude: websearch.excludeDomains.join(','),
|
exclude: websearch.excludeDomains.join(','),
|
||||||
freshness: websearch.searchWithTime ? 'oneDay' : 'noLimit',
|
freshness: websearch.searchWithTime ? 'oneDay' : 'noLimit',
|
||||||
summary: false,
|
summary: true,
|
||||||
page: contentLimit ? Math.ceil(contentLimit / websearch.maxResults) : 1
|
page: 1
|
||||||
}
|
}
|
||||||
|
|
||||||
const response = await fetch(`${this.apiHost}/v1/web-search`, {
|
const response = await fetch(`${this.apiHost}/v1/web-search`, {
|
||||||
@ -58,7 +56,8 @@ export default class BochaProvider extends BaseWebSearchProvider {
|
|||||||
query: resp.data.queryContext.originalQuery,
|
query: resp.data.queryContext.originalQuery,
|
||||||
results: resp.data.webPages.value.map((result) => ({
|
results: resp.data.webPages.value.map((result) => ({
|
||||||
title: result.name,
|
title: result.name,
|
||||||
content: result.snippet,
|
// 优先使用 summary(更详细),如果没有则使用 snippet
|
||||||
|
content: result.summary || result.snippet || '',
|
||||||
url: result.url
|
url: result.url
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|||||||
@ -35,14 +35,9 @@ export default class ExaProvider extends BaseWebSearchProvider {
|
|||||||
return {
|
return {
|
||||||
query: response.autopromptString,
|
query: response.autopromptString,
|
||||||
results: response.results.slice(0, websearch.maxResults).map((result) => {
|
results: response.results.slice(0, websearch.maxResults).map((result) => {
|
||||||
let content = result.text || ''
|
|
||||||
if (websearch.contentLimit && content.length > websearch.contentLimit) {
|
|
||||||
content = content.slice(0, websearch.contentLimit) + '...'
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
title: result.title || 'No title',
|
title: result.title || 'No title',
|
||||||
content: content,
|
content: result.text || '',
|
||||||
url: result.url || ''
|
url: result.url || ''
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
@ -55,11 +55,7 @@ export default class LocalSearchProvider extends BaseWebSearchProvider {
|
|||||||
// Fetch content for each URL concurrently
|
// Fetch content for each URL concurrently
|
||||||
const fetchPromises = validItems.map(async (item) => {
|
const fetchPromises = validItems.map(async (item) => {
|
||||||
// Logger.log(`Fetching content for ${item.url}...`)
|
// Logger.log(`Fetching content for ${item.url}...`)
|
||||||
const result = await fetchWebContent(item.url, 'markdown', this.provider.usingBrowser, httpOptions)
|
return await fetchWebContent(item.url, 'markdown', this.provider.usingBrowser, httpOptions)
|
||||||
if (websearch.contentLimit && result.content.length > websearch.contentLimit) {
|
|
||||||
result.content = result.content.slice(0, websearch.contentLimit) + '...'
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
})
|
})
|
||||||
|
|
||||||
// Wait for all fetches to complete
|
// Wait for all fetches to complete
|
||||||
|
|||||||
@ -122,11 +122,7 @@ export default class SearxngProvider extends BaseWebSearchProvider {
|
|||||||
// Fetch content for each URL concurrently
|
// Fetch content for each URL concurrently
|
||||||
const fetchPromises = validItems.map(async (item) => {
|
const fetchPromises = validItems.map(async (item) => {
|
||||||
// Logger.log(`Fetching content for ${item.url}...`)
|
// Logger.log(`Fetching content for ${item.url}...`)
|
||||||
const result = await fetchWebContent(item.url, 'markdown', this.provider.usingBrowser)
|
return await fetchWebContent(item.url, 'markdown', this.provider.usingBrowser)
|
||||||
if (websearch.contentLimit && result.content.length > websearch.contentLimit) {
|
|
||||||
result.content = result.content.slice(0, websearch.contentLimit) + '...'
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
})
|
})
|
||||||
|
|
||||||
// Wait for all fetches to complete
|
// Wait for all fetches to complete
|
||||||
|
|||||||
@ -31,14 +31,9 @@ export default class TavilyProvider extends BaseWebSearchProvider {
|
|||||||
return {
|
return {
|
||||||
query: result.query,
|
query: result.query,
|
||||||
results: result.results.slice(0, websearch.maxResults).map((result) => {
|
results: result.results.slice(0, websearch.maxResults).map((result) => {
|
||||||
let content = result.content || ''
|
|
||||||
if (websearch.contentLimit && content.length > websearch.contentLimit) {
|
|
||||||
content = content.slice(0, websearch.contentLimit) + '...'
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
title: result.title || 'No title',
|
title: result.title || 'No title',
|
||||||
content: content,
|
content: result.content || '',
|
||||||
url: result.url || ''
|
url: result.url || ''
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
@ -157,8 +157,13 @@ async function fetchExternalTool(
|
|||||||
try {
|
try {
|
||||||
// Use the consolidated processWebsearch function
|
// Use the consolidated processWebsearch function
|
||||||
WebSearchService.createAbortSignal(lastUserMessage.id)
|
WebSearchService.createAbortSignal(lastUserMessage.id)
|
||||||
|
const webSearchResponse = await WebSearchService.processWebsearch(
|
||||||
|
webSearchProvider!,
|
||||||
|
extractResults,
|
||||||
|
lastUserMessage.id
|
||||||
|
)
|
||||||
return {
|
return {
|
||||||
results: await WebSearchService.processWebsearch(webSearchProvider!, extractResults),
|
results: webSearchResponse,
|
||||||
source: WebSearchSource.WEBSEARCH
|
source: WebSearchSource.WEBSEARCH
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|||||||
@ -130,7 +130,7 @@ export const searchKnowledgeBase = async (
|
|||||||
)
|
)
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
Logger.error(`Error searching knowledge base ${base.name}:`, error)
|
Logger.error(`Error searching knowledge base ${base.name}:`, error)
|
||||||
return []
|
throw error
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,13 +1,38 @@
|
|||||||
|
import { DEFAULT_WEBSEARCH_RAG_DOCUMENT_COUNT } from '@renderer/config/constant'
|
||||||
import Logger from '@renderer/config/logger'
|
import Logger from '@renderer/config/logger'
|
||||||
|
import i18n from '@renderer/i18n'
|
||||||
import WebSearchEngineProvider from '@renderer/providers/WebSearchProvider'
|
import WebSearchEngineProvider from '@renderer/providers/WebSearchProvider'
|
||||||
import store from '@renderer/store'
|
import store from '@renderer/store'
|
||||||
import { WebSearchState } from '@renderer/store/websearch'
|
import { setWebSearchStatus } from '@renderer/store/runtime'
|
||||||
import { WebSearchProvider, WebSearchProviderResponse } from '@renderer/types'
|
import { CompressionConfig, WebSearchState } from '@renderer/store/websearch'
|
||||||
import { hasObjectKey } from '@renderer/utils'
|
import {
|
||||||
|
KnowledgeBase,
|
||||||
|
KnowledgeItem,
|
||||||
|
KnowledgeReference,
|
||||||
|
WebSearchProvider,
|
||||||
|
WebSearchProviderResponse,
|
||||||
|
WebSearchProviderResult,
|
||||||
|
WebSearchStatus
|
||||||
|
} from '@renderer/types'
|
||||||
|
import { hasObjectKey, uuid } from '@renderer/utils'
|
||||||
import { addAbortController } from '@renderer/utils/abortController'
|
import { addAbortController } from '@renderer/utils/abortController'
|
||||||
|
import { formatErrorMessage } from '@renderer/utils/error'
|
||||||
import { ExtractResults } from '@renderer/utils/extract'
|
import { ExtractResults } from '@renderer/utils/extract'
|
||||||
import { fetchWebContents } from '@renderer/utils/fetch'
|
import { fetchWebContents } from '@renderer/utils/fetch'
|
||||||
|
import { consolidateReferencesByUrl, selectReferences } from '@renderer/utils/websearch'
|
||||||
import dayjs from 'dayjs'
|
import dayjs from 'dayjs'
|
||||||
|
import { LRUCache } from 'lru-cache'
|
||||||
|
|
||||||
|
import { getKnowledgeBaseParams } from './KnowledgeService'
|
||||||
|
import { getKnowledgeSourceUrl, searchKnowledgeBase } from './KnowledgeService'
|
||||||
|
|
||||||
|
interface RequestState {
|
||||||
|
signal: AbortSignal | null
|
||||||
|
searchBase?: KnowledgeBase
|
||||||
|
isPaused: boolean
|
||||||
|
createdAt: number
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 提供网络搜索相关功能的服务类
|
* 提供网络搜索相关功能的服务类
|
||||||
*/
|
*/
|
||||||
@ -19,12 +44,47 @@ class WebSearchService {
|
|||||||
|
|
||||||
isPaused = false
|
isPaused = false
|
||||||
|
|
||||||
createAbortSignal(key: string) {
|
// 管理不同请求的状态
|
||||||
|
private requestStates = new LRUCache<string, RequestState>({
|
||||||
|
max: 5, // 最多5个并发请求
|
||||||
|
ttl: 1000 * 60 * 2, // 2分钟过期
|
||||||
|
dispose: (requestState: RequestState, requestId: string) => {
|
||||||
|
if (!requestState.searchBase) return
|
||||||
|
window.api.knowledgeBase
|
||||||
|
.delete(requestState.searchBase.id)
|
||||||
|
.catch((error) => Logger.warn(`[WebSearchService] Failed to cleanup search base for ${requestId}:`, error))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取或创建单个请求的状态
|
||||||
|
* @param requestId 请求 ID(通常是消息 ID)
|
||||||
|
*/
|
||||||
|
private getRequestState(requestId: string): RequestState {
|
||||||
|
let state = this.requestStates.get(requestId)
|
||||||
|
if (!state) {
|
||||||
|
state = {
|
||||||
|
signal: null,
|
||||||
|
isPaused: false,
|
||||||
|
createdAt: Date.now()
|
||||||
|
}
|
||||||
|
this.requestStates.set(requestId, state)
|
||||||
|
}
|
||||||
|
return state
|
||||||
|
}
|
||||||
|
|
||||||
|
createAbortSignal(requestId: string) {
|
||||||
const controller = new AbortController()
|
const controller = new AbortController()
|
||||||
this.signal = controller.signal
|
this.signal = controller.signal // 保持向后兼容
|
||||||
addAbortController(key, () => {
|
|
||||||
this.isPaused = true
|
const state = this.getRequestState(requestId)
|
||||||
|
state.signal = controller.signal
|
||||||
|
|
||||||
|
addAbortController(requestId, () => {
|
||||||
|
this.isPaused = true // 保持向后兼容
|
||||||
|
state.isPaused = true
|
||||||
this.signal = null
|
this.signal = null
|
||||||
|
this.requestStates.delete(requestId)
|
||||||
controller.abort()
|
controller.abort()
|
||||||
})
|
})
|
||||||
return controller
|
return controller
|
||||||
@ -137,45 +197,338 @@ class WebSearchService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 设置网络搜索状态
|
||||||
|
*/
|
||||||
|
private async setWebSearchStatus(requestId: string, status: WebSearchStatus, delayMs?: number) {
|
||||||
|
store.dispatch(setWebSearchStatus({ requestId, status }))
|
||||||
|
if (delayMs) {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, delayMs))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 确保搜索压缩知识库存在并配置正确
|
||||||
|
*/
|
||||||
|
private async ensureSearchBase(
|
||||||
|
config: CompressionConfig,
|
||||||
|
documentCount: number,
|
||||||
|
requestId: string
|
||||||
|
): Promise<KnowledgeBase> {
|
||||||
|
const baseId = `websearch-compression-${requestId}`
|
||||||
|
const state = this.getRequestState(requestId)
|
||||||
|
|
||||||
|
// 如果已存在且配置未变,直接复用
|
||||||
|
if (state.searchBase && this.isConfigMatched(state.searchBase, config)) {
|
||||||
|
return state.searchBase
|
||||||
|
}
|
||||||
|
|
||||||
|
// 清理旧的知识库
|
||||||
|
if (state.searchBase) {
|
||||||
|
await window.api.knowledgeBase.delete(state.searchBase.id)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!config.embeddingModel) {
|
||||||
|
throw new Error('Embedding model is required for RAG compression')
|
||||||
|
}
|
||||||
|
|
||||||
|
// 创建新的知识库
|
||||||
|
state.searchBase = {
|
||||||
|
id: baseId,
|
||||||
|
name: `WebSearch-RAG-${requestId}`,
|
||||||
|
model: config.embeddingModel,
|
||||||
|
rerankModel: config.rerankModel,
|
||||||
|
dimensions: config.embeddingDimensions,
|
||||||
|
documentCount,
|
||||||
|
items: [],
|
||||||
|
created_at: Date.now(),
|
||||||
|
updated_at: Date.now(),
|
||||||
|
version: 1
|
||||||
|
}
|
||||||
|
|
||||||
|
// 更新LRU cache
|
||||||
|
this.requestStates.set(requestId, state)
|
||||||
|
|
||||||
|
// 创建知识库
|
||||||
|
const baseParams = getKnowledgeBaseParams(state.searchBase)
|
||||||
|
await window.api.knowledgeBase.create(baseParams)
|
||||||
|
|
||||||
|
return state.searchBase
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 检查配置是否匹配
|
||||||
|
*/
|
||||||
|
private isConfigMatched(base: KnowledgeBase, config: CompressionConfig): boolean {
|
||||||
|
return (
|
||||||
|
base.model.id === config.embeddingModel?.id &&
|
||||||
|
base.rerankModel?.id === config.rerankModel?.id &&
|
||||||
|
base.dimensions === config.embeddingDimensions
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 对搜索知识库执行多问题查询并按分数排序
|
||||||
|
* @param questions 问题列表
|
||||||
|
* @param searchBase 搜索知识库
|
||||||
|
* @returns 排序后的知识引用列表
|
||||||
|
*/
|
||||||
|
private async querySearchBase(questions: string[], searchBase: KnowledgeBase): Promise<KnowledgeReference[]> {
|
||||||
|
// 1. 单独搜索每个问题
|
||||||
|
const searchPromises = questions.map((question) => searchKnowledgeBase(question, searchBase))
|
||||||
|
const allResults = await Promise.all(searchPromises)
|
||||||
|
|
||||||
|
// 2. 合并所有结果并按分数排序
|
||||||
|
const flatResults = allResults.flat().sort((a, b) => b.score - a.score)
|
||||||
|
|
||||||
|
// 3. 去重,保留最高分的重复内容
|
||||||
|
const seen = new Set<string>()
|
||||||
|
const uniqueResults = flatResults.filter((item) => {
|
||||||
|
if (seen.has(item.pageContent)) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
seen.add(item.pageContent)
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
|
||||||
|
// 4. 转换为引用格式
|
||||||
|
return await Promise.all(
|
||||||
|
uniqueResults.map(async (result, index) => ({
|
||||||
|
id: index + 1,
|
||||||
|
content: result.pageContent,
|
||||||
|
sourceUrl: await getKnowledgeSourceUrl(result),
|
||||||
|
type: 'url' as const
|
||||||
|
}))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 使用RAG压缩搜索结果。
|
||||||
|
* - 一次性将所有搜索结果添加到知识库
|
||||||
|
* - 从知识库中 retrieve 相关结果
|
||||||
|
* - 根据 sourceUrl 映射回原始搜索结果
|
||||||
|
*
|
||||||
|
* @param questions 问题列表
|
||||||
|
* @param rawResults 原始搜索结果
|
||||||
|
* @param config 压缩配置
|
||||||
|
* @param requestId 请求ID
|
||||||
|
* @returns 压缩后的搜索结果
|
||||||
|
*/
|
||||||
|
private async compressWithSearchBase(
|
||||||
|
questions: string[],
|
||||||
|
rawResults: WebSearchProviderResult[],
|
||||||
|
config: CompressionConfig,
|
||||||
|
requestId: string
|
||||||
|
): Promise<WebSearchProviderResult[]> {
|
||||||
|
// 根据搜索次数计算所需的文档数量
|
||||||
|
const totalDocumentCount =
|
||||||
|
Math.max(0, rawResults.length) * (config.documentCount ?? DEFAULT_WEBSEARCH_RAG_DOCUMENT_COUNT)
|
||||||
|
|
||||||
|
const searchBase = await this.ensureSearchBase(config, totalDocumentCount, requestId)
|
||||||
|
|
||||||
|
// 1. 清空知识库
|
||||||
|
await window.api.knowledgeBase.reset(getKnowledgeBaseParams(searchBase))
|
||||||
|
|
||||||
|
// 2. 一次性添加所有搜索结果到知识库
|
||||||
|
const addPromises = rawResults.map(async (result) => {
|
||||||
|
const item: KnowledgeItem & { sourceUrl?: string } = {
|
||||||
|
id: uuid(),
|
||||||
|
type: 'note',
|
||||||
|
content: result.content,
|
||||||
|
sourceUrl: result.url, // 设置 sourceUrl 用于映射
|
||||||
|
created_at: Date.now(),
|
||||||
|
updated_at: Date.now(),
|
||||||
|
processingStatus: 'pending'
|
||||||
|
}
|
||||||
|
|
||||||
|
await window.api.knowledgeBase.add({
|
||||||
|
base: getKnowledgeBaseParams(searchBase),
|
||||||
|
item
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// 等待所有结果添加完成
|
||||||
|
await Promise.all(addPromises)
|
||||||
|
|
||||||
|
// 3. 对知识库执行多问题搜索获取压缩结果
|
||||||
|
const references = await this.querySearchBase(questions, searchBase)
|
||||||
|
|
||||||
|
// 4. 使用 Round Robin 策略选择引用
|
||||||
|
const selectedReferences = selectReferences(rawResults, references, totalDocumentCount)
|
||||||
|
|
||||||
|
Logger.log('[WebSearchService] With RAG, the number of search results:', {
|
||||||
|
raw: rawResults.length,
|
||||||
|
retrieved: references.length,
|
||||||
|
selected: selectedReferences.length
|
||||||
|
})
|
||||||
|
|
||||||
|
// 5. 按 sourceUrl 分组并合并同源片段
|
||||||
|
return consolidateReferencesByUrl(rawResults, selectedReferences)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 使用截断方式压缩搜索结果,可以选择单位 char 或 token。
|
||||||
|
*
|
||||||
|
* @param rawResults 原始搜索结果
|
||||||
|
* @param config 压缩配置
|
||||||
|
* @returns 截断后的搜索结果
|
||||||
|
*/
|
||||||
|
private async compressWithCutoff(
|
||||||
|
rawResults: WebSearchProviderResult[],
|
||||||
|
config: CompressionConfig
|
||||||
|
): Promise<WebSearchProviderResult[]> {
|
||||||
|
if (!config.cutoffLimit) {
|
||||||
|
Logger.warn('[WebSearchService] Cutoff limit is not set, skipping compression')
|
||||||
|
return rawResults
|
||||||
|
}
|
||||||
|
|
||||||
|
const perResultLimit = Math.max(1, Math.floor(config.cutoffLimit / rawResults.length))
|
||||||
|
|
||||||
|
// 动态导入 tokenx
|
||||||
|
const { sliceByTokens } = await import('tokenx')
|
||||||
|
|
||||||
|
return rawResults.map((result) => {
|
||||||
|
if (config.cutoffUnit === 'token') {
|
||||||
|
// 使用 token 截断
|
||||||
|
const slicedContent = sliceByTokens(result.content, 0, perResultLimit)
|
||||||
|
return {
|
||||||
|
...result,
|
||||||
|
content: slicedContent.length < result.content.length ? slicedContent + '...' : slicedContent
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// 使用字符截断(默认行为)
|
||||||
|
return {
|
||||||
|
...result,
|
||||||
|
content:
|
||||||
|
result.content.length > perResultLimit ? result.content.slice(0, perResultLimit) + '...' : result.content
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 处理网络搜索请求的核心方法,处理过程中会设置运行时状态供 UI 使用。
|
||||||
|
*
|
||||||
|
* 该方法执行以下步骤:
|
||||||
|
* - 验证输入参数并处理边界情况
|
||||||
|
* - 处理特殊的summarize请求
|
||||||
|
* - 并行执行多个搜索查询
|
||||||
|
* - 聚合搜索结果并处理失败情况
|
||||||
|
* - 根据配置应用结果压缩(RAG或截断)
|
||||||
|
* - 返回最终的搜索响应
|
||||||
|
*
|
||||||
|
* @param webSearchProvider - 要使用的网络搜索提供商
|
||||||
|
* @param extractResults - 包含搜索问题和链接的提取结果对象
|
||||||
|
* @param requestId - 唯一的请求标识符,用于状态跟踪和资源管理
|
||||||
|
*
|
||||||
|
* @returns 包含搜索结果的响应对象
|
||||||
|
*/
|
||||||
public async processWebsearch(
|
public async processWebsearch(
|
||||||
webSearchProvider: WebSearchProvider,
|
webSearchProvider: WebSearchProvider,
|
||||||
extractResults: ExtractResults
|
extractResults: ExtractResults,
|
||||||
|
requestId: string
|
||||||
): Promise<WebSearchProviderResponse> {
|
): Promise<WebSearchProviderResponse> {
|
||||||
|
// 重置状态
|
||||||
|
await this.setWebSearchStatus(requestId, { phase: 'default' })
|
||||||
|
|
||||||
// 检查 websearch 和 question 是否有效
|
// 检查 websearch 和 question 是否有效
|
||||||
if (!extractResults.websearch?.question || extractResults.websearch.question.length === 0) {
|
if (!extractResults.websearch?.question || extractResults.websearch.question.length === 0) {
|
||||||
Logger.log('[processWebsearch] No valid question found in extractResults.websearch')
|
Logger.log('[processWebsearch] No valid question found in extractResults.websearch')
|
||||||
return { results: [] }
|
return { results: [] }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 使用请求特定的signal,如果没有则回退到全局signal
|
||||||
|
const signal = this.getRequestState(requestId).signal || this.signal
|
||||||
|
|
||||||
const questions = extractResults.websearch.question
|
const questions = extractResults.websearch.question
|
||||||
const links = extractResults.websearch.links
|
const links = extractResults.websearch.links
|
||||||
const firstQuestion = questions[0]
|
|
||||||
if (firstQuestion === 'summarize' && links && links.length > 0) {
|
|
||||||
const contents = await fetchWebContents(links, undefined, undefined, {
|
|
||||||
signal: this.signal
|
|
||||||
})
|
|
||||||
return {
|
|
||||||
query: 'summaries',
|
|
||||||
results: contents
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const searchPromises = questions.map((q) => this.search(webSearchProvider, q, { signal: this.signal }))
|
|
||||||
const searchResults = await Promise.allSettled(searchPromises)
|
|
||||||
const aggregatedResults: any[] = []
|
|
||||||
|
|
||||||
|
// 处理 summarize
|
||||||
|
if (questions[0] === 'summarize' && links && links.length > 0) {
|
||||||
|
const contents = await fetchWebContents(links, undefined, undefined, { signal })
|
||||||
|
return { query: 'summaries', results: contents }
|
||||||
|
}
|
||||||
|
|
||||||
|
const searchPromises = questions.map((q) => this.search(webSearchProvider, q, { signal }))
|
||||||
|
const searchResults = await Promise.allSettled(searchPromises)
|
||||||
|
|
||||||
|
// 统计成功完成的搜索数量
|
||||||
|
const successfulSearchCount = searchResults.filter((result) => result.status === 'fulfilled').length
|
||||||
|
if (successfulSearchCount > 1) {
|
||||||
|
await this.setWebSearchStatus(
|
||||||
|
requestId,
|
||||||
|
{
|
||||||
|
phase: 'fetch_complete',
|
||||||
|
countAfter: successfulSearchCount
|
||||||
|
},
|
||||||
|
1000
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
let finalResults: WebSearchProviderResult[] = []
|
||||||
searchResults.forEach((result) => {
|
searchResults.forEach((result) => {
|
||||||
if (result.status === 'fulfilled') {
|
if (result.status === 'fulfilled') {
|
||||||
if (result.value.results) {
|
if (result.value.results) {
|
||||||
aggregatedResults.push(...result.value.results)
|
finalResults.push(...result.value.results)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (result.status === 'rejected') {
|
if (result.status === 'rejected') {
|
||||||
throw result.reason
|
throw result.reason
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// 如果没有搜索结果,直接返回空结果
|
||||||
|
if (finalResults.length === 0) {
|
||||||
|
await this.setWebSearchStatus(requestId, { phase: 'default' })
|
||||||
return {
|
return {
|
||||||
query: questions.join(' | '),
|
query: questions.join(' | '),
|
||||||
results: aggregatedResults
|
results: []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const { compressionConfig } = this.getWebSearchState()
|
||||||
|
|
||||||
|
// RAG压缩处理
|
||||||
|
if (compressionConfig?.method === 'rag' && requestId) {
|
||||||
|
await this.setWebSearchStatus(requestId, { phase: 'rag' }, 500)
|
||||||
|
|
||||||
|
const originalCount = finalResults.length
|
||||||
|
|
||||||
|
try {
|
||||||
|
finalResults = await this.compressWithSearchBase(questions, finalResults, compressionConfig, requestId)
|
||||||
|
await this.setWebSearchStatus(
|
||||||
|
requestId,
|
||||||
|
{
|
||||||
|
phase: 'rag_complete',
|
||||||
|
countBefore: originalCount,
|
||||||
|
countAfter: finalResults.length
|
||||||
|
},
|
||||||
|
1000
|
||||||
|
)
|
||||||
|
} catch (error) {
|
||||||
|
Logger.warn('[WebSearchService] RAG compression failed, will return empty results:', error)
|
||||||
|
window.message.error({
|
||||||
|
key: 'websearch-rag-failed',
|
||||||
|
duration: 10,
|
||||||
|
content: `${i18n.t('settings.websearch.compression.error.rag_failed')}: ${formatErrorMessage(error)}`
|
||||||
|
})
|
||||||
|
|
||||||
|
finalResults = []
|
||||||
|
await this.setWebSearchStatus(requestId, { phase: 'rag_failed' }, 1000)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 截断压缩处理
|
||||||
|
else if (compressionConfig?.method === 'cutoff' && compressionConfig.cutoffLimit) {
|
||||||
|
await this.setWebSearchStatus(requestId, { phase: 'cutoff' }, 500)
|
||||||
|
finalResults = await this.compressWithCutoff(finalResults, compressionConfig)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 重置状态
|
||||||
|
await this.setWebSearchStatus(requestId, { phase: 'default' })
|
||||||
|
|
||||||
|
return {
|
||||||
|
query: questions.join(' | '),
|
||||||
|
results: finalResults
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -50,7 +50,7 @@ const persistedReducer = persistReducer(
|
|||||||
{
|
{
|
||||||
key: 'cherry-studio',
|
key: 'cherry-studio',
|
||||||
storage,
|
storage,
|
||||||
version: 115,
|
version: 116,
|
||||||
blacklist: ['runtime', 'messages', 'messageBlocks'],
|
blacklist: ['runtime', 'messages', 'messageBlocks'],
|
||||||
migrate
|
migrate
|
||||||
},
|
},
|
||||||
|
|||||||
@ -1631,6 +1631,31 @@ const migrateConfig = {
|
|||||||
if (state.settings) {
|
if (state.settings) {
|
||||||
state.settings.upgradeChannel = UpgradeChannel.LATEST
|
state.settings.upgradeChannel = UpgradeChannel.LATEST
|
||||||
}
|
}
|
||||||
|
return state
|
||||||
|
} catch (error) {
|
||||||
|
return state
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'116': (state: RootState) => {
|
||||||
|
try {
|
||||||
|
if (state.websearch) {
|
||||||
|
// migrate contentLimit to cutoffLimit
|
||||||
|
// @ts-ignore eslint-disable-next-line
|
||||||
|
if (state.websearch.contentLimit) {
|
||||||
|
state.websearch.compressionConfig = {
|
||||||
|
method: 'cutoff',
|
||||||
|
cutoffUnit: 'char',
|
||||||
|
// @ts-ignore eslint-disable-next-line
|
||||||
|
cutoffLimit: state.websearch.contentLimit
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
state.websearch.compressionConfig = { method: 'none', cutoffUnit: 'char' }
|
||||||
|
}
|
||||||
|
|
||||||
|
// @ts-ignore eslint-disable-next-line
|
||||||
|
delete state.websearch.contentLimit
|
||||||
|
}
|
||||||
|
|
||||||
return state
|
return state
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
return state
|
return state
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
import { createSlice, PayloadAction } from '@reduxjs/toolkit'
|
import { createSlice, PayloadAction } from '@reduxjs/toolkit'
|
||||||
import { AppLogo, UserAvatar } from '@renderer/config/env'
|
import { AppLogo, UserAvatar } from '@renderer/config/env'
|
||||||
import type { MinAppType, Topic } from '@renderer/types'
|
import type { MinAppType, Topic, WebSearchStatus } from '@renderer/types'
|
||||||
import type { UpdateInfo } from 'builder-util-runtime'
|
import type { UpdateInfo } from 'builder-util-runtime'
|
||||||
|
|
||||||
export interface ChatState {
|
export interface ChatState {
|
||||||
@ -13,6 +13,10 @@ export interface ChatState {
|
|||||||
newlyRenamedTopics: string[]
|
newlyRenamedTopics: string[]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface WebSearchState {
|
||||||
|
activeSearches: Record<string, WebSearchStatus>
|
||||||
|
}
|
||||||
|
|
||||||
export interface UpdateState {
|
export interface UpdateState {
|
||||||
info: UpdateInfo | null
|
info: UpdateInfo | null
|
||||||
checking: boolean
|
checking: boolean
|
||||||
@ -39,6 +43,7 @@ export interface RuntimeState {
|
|||||||
update: UpdateState
|
update: UpdateState
|
||||||
export: ExportState
|
export: ExportState
|
||||||
chat: ChatState
|
chat: ChatState
|
||||||
|
websearch: WebSearchState
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ExportState {
|
export interface ExportState {
|
||||||
@ -72,6 +77,9 @@ const initialState: RuntimeState = {
|
|||||||
activeTopic: null,
|
activeTopic: null,
|
||||||
renamingTopics: [],
|
renamingTopics: [],
|
||||||
newlyRenamedTopics: []
|
newlyRenamedTopics: []
|
||||||
|
},
|
||||||
|
websearch: {
|
||||||
|
activeSearches: {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -130,6 +138,17 @@ const runtimeSlice = createSlice({
|
|||||||
},
|
},
|
||||||
setNewlyRenamedTopics: (state, action: PayloadAction<string[]>) => {
|
setNewlyRenamedTopics: (state, action: PayloadAction<string[]>) => {
|
||||||
state.chat.newlyRenamedTopics = action.payload
|
state.chat.newlyRenamedTopics = action.payload
|
||||||
|
},
|
||||||
|
// WebSearch related actions
|
||||||
|
setActiveSearches: (state, action: PayloadAction<Record<string, WebSearchStatus>>) => {
|
||||||
|
state.websearch.activeSearches = action.payload
|
||||||
|
},
|
||||||
|
setWebSearchStatus: (state, action: PayloadAction<{ requestId: string; status: WebSearchStatus }>) => {
|
||||||
|
const { requestId, status } = action.payload
|
||||||
|
if (status.phase === 'default') {
|
||||||
|
delete state.websearch.activeSearches[requestId]
|
||||||
|
}
|
||||||
|
state.websearch.activeSearches[requestId] = status
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@ -151,7 +170,10 @@ export const {
|
|||||||
setSelectedMessageIds,
|
setSelectedMessageIds,
|
||||||
setActiveTopic,
|
setActiveTopic,
|
||||||
setRenamingTopics,
|
setRenamingTopics,
|
||||||
setNewlyRenamedTopics
|
setNewlyRenamedTopics,
|
||||||
|
// WebSearch related actions
|
||||||
|
setActiveSearches,
|
||||||
|
setWebSearchStatus
|
||||||
} = runtimeSlice.actions
|
} = runtimeSlice.actions
|
||||||
|
|
||||||
export default runtimeSlice.reducer
|
export default runtimeSlice.reducer
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
import { createSlice, PayloadAction } from '@reduxjs/toolkit'
|
import { createSlice, PayloadAction } from '@reduxjs/toolkit'
|
||||||
import type { WebSearchProvider } from '@renderer/types'
|
import type { Model, WebSearchProvider } from '@renderer/types'
|
||||||
export interface SubscribeSource {
|
export interface SubscribeSource {
|
||||||
key: number
|
key: number
|
||||||
url: string
|
url: string
|
||||||
@ -7,6 +7,16 @@ export interface SubscribeSource {
|
|||||||
blacklist?: string[] // 存储从该订阅源获取的黑名单
|
blacklist?: string[] // 存储从该订阅源获取的黑名单
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface CompressionConfig {
|
||||||
|
method: 'none' | 'cutoff' | 'rag'
|
||||||
|
cutoffLimit?: number
|
||||||
|
cutoffUnit?: 'char' | 'token'
|
||||||
|
embeddingModel?: Model
|
||||||
|
embeddingDimensions?: number // undefined表示自动获取
|
||||||
|
documentCount?: number // 每个搜索结果的文档数量(只是预期值)
|
||||||
|
rerankModel?: Model
|
||||||
|
}
|
||||||
|
|
||||||
export interface WebSearchState {
|
export interface WebSearchState {
|
||||||
// 默认搜索提供商的ID
|
// 默认搜索提供商的ID
|
||||||
/** @deprecated 支持在快捷菜单中自选搜索供应商,所以这个不再适用 */
|
/** @deprecated 支持在快捷菜单中自选搜索供应商,所以这个不再适用 */
|
||||||
@ -24,12 +34,13 @@ export interface WebSearchState {
|
|||||||
// 是否覆盖服务商搜索
|
// 是否覆盖服务商搜索
|
||||||
/** @deprecated 支持在快捷菜单中自选搜索供应商,所以这个不再适用 */
|
/** @deprecated 支持在快捷菜单中自选搜索供应商,所以这个不再适用 */
|
||||||
overwrite: boolean
|
overwrite: boolean
|
||||||
contentLimit?: number
|
// 搜索结果压缩
|
||||||
|
compressionConfig?: CompressionConfig
|
||||||
// 具体供应商的配置
|
// 具体供应商的配置
|
||||||
providerConfig: Record<string, any>
|
providerConfig: Record<string, any>
|
||||||
}
|
}
|
||||||
|
|
||||||
const initialState: WebSearchState = {
|
export const initialState: WebSearchState = {
|
||||||
defaultProvider: 'local-bing',
|
defaultProvider: 'local-bing',
|
||||||
providers: [
|
providers: [
|
||||||
{
|
{
|
||||||
@ -78,6 +89,10 @@ const initialState: WebSearchState = {
|
|||||||
excludeDomains: [],
|
excludeDomains: [],
|
||||||
subscribeSources: [],
|
subscribeSources: [],
|
||||||
overwrite: false,
|
overwrite: false,
|
||||||
|
compressionConfig: {
|
||||||
|
method: 'none',
|
||||||
|
cutoffUnit: 'char'
|
||||||
|
},
|
||||||
providerConfig: {}
|
providerConfig: {}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -150,8 +165,14 @@ const websearchSlice = createSlice({
|
|||||||
state.providers.push(action.payload)
|
state.providers.push(action.payload)
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
setContentLimit: (state, action: PayloadAction<number | undefined>) => {
|
setCompressionConfig: (state, action: PayloadAction<CompressionConfig>) => {
|
||||||
state.contentLimit = action.payload
|
state.compressionConfig = action.payload
|
||||||
|
},
|
||||||
|
updateCompressionConfig: (state, action: PayloadAction<Partial<CompressionConfig>>) => {
|
||||||
|
state.compressionConfig = {
|
||||||
|
...state.compressionConfig,
|
||||||
|
...action.payload
|
||||||
|
} as CompressionConfig
|
||||||
},
|
},
|
||||||
setProviderConfig: (state, action: PayloadAction<Record<string, any>>) => {
|
setProviderConfig: (state, action: PayloadAction<Record<string, any>>) => {
|
||||||
state.providerConfig = action.payload
|
state.providerConfig = action.payload
|
||||||
@ -176,7 +197,8 @@ export const {
|
|||||||
setSubscribeSources,
|
setSubscribeSources,
|
||||||
setOverwrite,
|
setOverwrite,
|
||||||
addWebSearchProvider,
|
addWebSearchProvider,
|
||||||
setContentLimit,
|
setCompressionConfig,
|
||||||
|
updateCompressionConfig,
|
||||||
setProviderConfig,
|
setProviderConfig,
|
||||||
updateProviderConfig
|
updateProviderConfig
|
||||||
} = websearchSlice.actions
|
} = websearchSlice.actions
|
||||||
|
|||||||
@ -500,7 +500,6 @@ export type WebSearchProvider = {
|
|||||||
url?: string
|
url?: string
|
||||||
basicAuthUsername?: string
|
basicAuthUsername?: string
|
||||||
basicAuthPassword?: string
|
basicAuthPassword?: string
|
||||||
contentLimit?: number
|
|
||||||
usingBrowser?: boolean
|
usingBrowser?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -542,6 +541,14 @@ export type WebSearchResponse = {
|
|||||||
source: WebSearchSource
|
source: WebSearchSource
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type WebSearchPhase = 'default' | 'fetch_complete' | 'rag' | 'rag_complete' | 'rag_failed' | 'cutoff'
|
||||||
|
|
||||||
|
export type WebSearchStatus = {
|
||||||
|
phase: WebSearchPhase
|
||||||
|
countBefore?: number
|
||||||
|
countAfter?: number
|
||||||
|
}
|
||||||
|
|
||||||
export type KnowledgeReference = {
|
export type KnowledgeReference = {
|
||||||
id: number
|
id: number
|
||||||
content: string
|
content: string
|
||||||
|
|||||||
226
src/renderer/src/utils/__tests__/websearch.test.ts
Normal file
226
src/renderer/src/utils/__tests__/websearch.test.ts
Normal file
@ -0,0 +1,226 @@
|
|||||||
|
import { KnowledgeReference, WebSearchProviderResult } from '@renderer/types'
|
||||||
|
import { describe, expect, it } from 'vitest'
|
||||||
|
|
||||||
|
import { consolidateReferencesByUrl, selectReferences } from '../websearch'
|
||||||
|
|
||||||
|
describe('websearch', () => {
|
||||||
|
describe('consolidateReferencesByUrl', () => {
|
||||||
|
const createMockRawResult = (url: string, title: string): WebSearchProviderResult => ({
|
||||||
|
title,
|
||||||
|
url,
|
||||||
|
content: `Original content for ${title}`
|
||||||
|
})
|
||||||
|
|
||||||
|
const createMockReference = (sourceUrl: string, content: string, id: number = 1): KnowledgeReference => ({
|
||||||
|
id,
|
||||||
|
sourceUrl,
|
||||||
|
content,
|
||||||
|
type: 'url'
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should consolidate single reference to matching raw result', () => {
|
||||||
|
// 基本功能:单个引用与原始结果匹配
|
||||||
|
const rawResults = [createMockRawResult('https://example.com', 'Example Title')]
|
||||||
|
const references = [createMockReference('https://example.com', 'Retrieved content')]
|
||||||
|
|
||||||
|
const result = consolidateReferencesByUrl(rawResults, references)
|
||||||
|
|
||||||
|
expect(result).toHaveLength(1)
|
||||||
|
expect(result[0]).toEqual({
|
||||||
|
title: 'Example Title',
|
||||||
|
url: 'https://example.com',
|
||||||
|
content: 'Retrieved content'
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should consolidate multiple references from same source URL', () => {
|
||||||
|
// 多个片段合并到同一个URL
|
||||||
|
const rawResults = [createMockRawResult('https://example.com', 'Example Title')]
|
||||||
|
const references = [
|
||||||
|
createMockReference('https://example.com', 'First content', 1),
|
||||||
|
createMockReference('https://example.com', 'Second content', 2)
|
||||||
|
]
|
||||||
|
|
||||||
|
const result = consolidateReferencesByUrl(rawResults, references)
|
||||||
|
|
||||||
|
expect(result).toHaveLength(1)
|
||||||
|
expect(result[0]).toEqual({
|
||||||
|
title: 'Example Title',
|
||||||
|
url: 'https://example.com',
|
||||||
|
content: 'First content\n\n---\n\nSecond content'
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should consolidate references from multiple source URLs', () => {
|
||||||
|
// 多个不同URL的引用
|
||||||
|
const rawResults = [
|
||||||
|
createMockRawResult('https://example.com', 'Example Title'),
|
||||||
|
createMockRawResult('https://test.com', 'Test Title')
|
||||||
|
]
|
||||||
|
const references = [
|
||||||
|
createMockReference('https://example.com', 'Example content', 1),
|
||||||
|
createMockReference('https://test.com', 'Test content', 2)
|
||||||
|
]
|
||||||
|
|
||||||
|
const result = consolidateReferencesByUrl(rawResults, references)
|
||||||
|
|
||||||
|
expect(result).toHaveLength(2)
|
||||||
|
// 结果顺序可能不确定,使用 toContainEqual
|
||||||
|
expect(result).toContainEqual({
|
||||||
|
title: 'Example Title',
|
||||||
|
url: 'https://example.com',
|
||||||
|
content: 'Example content'
|
||||||
|
})
|
||||||
|
expect(result).toContainEqual({
|
||||||
|
title: 'Test Title',
|
||||||
|
url: 'https://test.com',
|
||||||
|
content: 'Test content'
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should use custom separator for multiple references', () => {
|
||||||
|
// 自定义分隔符
|
||||||
|
const rawResults = [createMockRawResult('https://example.com', 'Example Title')]
|
||||||
|
const references = [
|
||||||
|
createMockReference('https://example.com', 'First content', 1),
|
||||||
|
createMockReference('https://example.com', 'Second content', 2)
|
||||||
|
]
|
||||||
|
|
||||||
|
const result = consolidateReferencesByUrl(rawResults, references, ' | ')
|
||||||
|
|
||||||
|
expect(result).toHaveLength(1)
|
||||||
|
expect(result[0].content).toBe('First content | Second content')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should ignore references with no matching raw result', () => {
|
||||||
|
// 无匹配的引用
|
||||||
|
const rawResults = [createMockRawResult('https://example.com', 'Example Title')]
|
||||||
|
const references = [
|
||||||
|
createMockReference('https://example.com', 'Matching content', 1),
|
||||||
|
createMockReference('https://nonexistent.com', 'Non-matching content', 2)
|
||||||
|
]
|
||||||
|
|
||||||
|
const result = consolidateReferencesByUrl(rawResults, references)
|
||||||
|
|
||||||
|
expect(result).toHaveLength(1)
|
||||||
|
expect(result[0]).toEqual({
|
||||||
|
title: 'Example Title',
|
||||||
|
url: 'https://example.com',
|
||||||
|
content: 'Matching content'
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should return empty array when no references match raw results', () => {
|
||||||
|
// 完全无匹配的情况
|
||||||
|
const rawResults = [createMockRawResult('https://example.com', 'Example Title')]
|
||||||
|
const references = [createMockReference('https://nonexistent.com', 'Non-matching content', 1)]
|
||||||
|
|
||||||
|
const result = consolidateReferencesByUrl(rawResults, references)
|
||||||
|
|
||||||
|
expect(result).toHaveLength(0)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle empty inputs', () => {
|
||||||
|
// 边界条件:空输入
|
||||||
|
expect(consolidateReferencesByUrl([], [])).toEqual([])
|
||||||
|
|
||||||
|
const rawResults = [createMockRawResult('https://example.com', 'Example Title')]
|
||||||
|
expect(consolidateReferencesByUrl(rawResults, [])).toEqual([])
|
||||||
|
|
||||||
|
const references = [createMockReference('https://example.com', 'Content', 1)]
|
||||||
|
expect(consolidateReferencesByUrl([], references)).toEqual([])
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should preserve original result metadata', () => {
|
||||||
|
// 验证原始结果的元数据保持不变
|
||||||
|
const rawResults = [createMockRawResult('https://example.com', 'Complex Title with Special Characters & Symbols')]
|
||||||
|
const references = [createMockReference('https://example.com', 'New content', 1)]
|
||||||
|
|
||||||
|
const result = consolidateReferencesByUrl(rawResults, references)
|
||||||
|
|
||||||
|
expect(result[0].title).toBe('Complex Title with Special Characters & Symbols')
|
||||||
|
expect(result[0].url).toBe('https://example.com')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('selectReferences', () => {
|
||||||
|
const createMockRawResult = (url: string, title: string): WebSearchProviderResult => ({
|
||||||
|
title,
|
||||||
|
url,
|
||||||
|
content: `Original content for ${title}`
|
||||||
|
})
|
||||||
|
|
||||||
|
const createMockReference = (sourceUrl: string, content: string, id: number = 1): KnowledgeReference => ({
|
||||||
|
id,
|
||||||
|
sourceUrl,
|
||||||
|
content,
|
||||||
|
type: 'url'
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should select references using round robin strategy', () => {
|
||||||
|
const rawResults = [
|
||||||
|
createMockRawResult('https://a.com', 'A'),
|
||||||
|
createMockRawResult('https://b.com', 'B'),
|
||||||
|
createMockRawResult('https://c.com', 'C')
|
||||||
|
]
|
||||||
|
|
||||||
|
const references = [
|
||||||
|
createMockReference('https://a.com', 'A1', 1),
|
||||||
|
createMockReference('https://a.com', 'A2', 2),
|
||||||
|
createMockReference('https://b.com', 'B1', 3),
|
||||||
|
createMockReference('https://c.com', 'C1', 4),
|
||||||
|
createMockReference('https://c.com', 'C2', 5)
|
||||||
|
]
|
||||||
|
|
||||||
|
const result = selectReferences(rawResults, references, 4)
|
||||||
|
|
||||||
|
expect(result).toHaveLength(4)
|
||||||
|
// 按照 rawResults 顺序轮询:A1, B1, C1, A2
|
||||||
|
expect(result[0].content).toBe('A1')
|
||||||
|
expect(result[1].content).toBe('B1')
|
||||||
|
expect(result[2].content).toBe('C1')
|
||||||
|
expect(result[3].content).toBe('A2')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle maxRefs larger than available references', () => {
|
||||||
|
const rawResults = [createMockRawResult('https://a.com', 'A')]
|
||||||
|
const references = [createMockReference('https://a.com', 'A1', 1)]
|
||||||
|
|
||||||
|
const result = selectReferences(rawResults, references, 10)
|
||||||
|
|
||||||
|
expect(result).toHaveLength(1)
|
||||||
|
expect(result[0].content).toBe('A1')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should return empty array for edge cases', () => {
|
||||||
|
const rawResults = [createMockRawResult('https://a.com', 'A')]
|
||||||
|
const references = [createMockReference('https://a.com', 'A1', 1)]
|
||||||
|
|
||||||
|
// maxRefs is 0
|
||||||
|
expect(selectReferences(rawResults, references, 0)).toEqual([])
|
||||||
|
|
||||||
|
// empty references
|
||||||
|
expect(selectReferences(rawResults, [], 5)).toEqual([])
|
||||||
|
|
||||||
|
// no matching URLs
|
||||||
|
const nonMatchingRefs = [createMockReference('https://different.com', 'Content', 1)]
|
||||||
|
expect(selectReferences(rawResults, nonMatchingRefs, 5)).toEqual([])
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should preserve rawResults order in round robin', () => {
|
||||||
|
// rawResults 的顺序应该影响轮询顺序
|
||||||
|
const rawResults = [
|
||||||
|
createMockRawResult('https://z.com', 'Z'), // 应该第一个被选择
|
||||||
|
createMockRawResult('https://a.com', 'A') // 应该第二个被选择
|
||||||
|
]
|
||||||
|
|
||||||
|
const references = [createMockReference('https://a.com', 'A1', 1), createMockReference('https://z.com', 'Z1', 2)]
|
||||||
|
|
||||||
|
const result = selectReferences(rawResults, references, 2)
|
||||||
|
|
||||||
|
expect(result).toHaveLength(2)
|
||||||
|
expect(result[0].content).toBe('Z1') // Z 先被选择
|
||||||
|
expect(result[1].content).toBe('A1') // A 后被选择
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
116
src/renderer/src/utils/websearch.ts
Normal file
116
src/renderer/src/utils/websearch.ts
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
import { KnowledgeReference, WebSearchProviderResult } from '@renderer/types'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 将检索到的知识片段按源URL整合为搜索结果
|
||||||
|
*
|
||||||
|
* 这个函数接收原始搜索结果和从知识库检索到的相关片段,
|
||||||
|
* 将同源的片段按URL分组并合并为最终的搜索结果。
|
||||||
|
*
|
||||||
|
* @param rawResults 原始搜索结果,用于提供标题和URL信息
|
||||||
|
* @param references 从知识库检索到的相关片段
|
||||||
|
* @param separator 合并片段时使用的分隔符,默认为 '\n\n---\n\n'
|
||||||
|
* @returns 合并后的搜索结果数组
|
||||||
|
*/
|
||||||
|
export function consolidateReferencesByUrl(
|
||||||
|
rawResults: WebSearchProviderResult[],
|
||||||
|
references: KnowledgeReference[],
|
||||||
|
separator: string = '\n\n---\n\n'
|
||||||
|
): WebSearchProviderResult[] {
|
||||||
|
// 创建URL到原始结果的映射,用于快速查找
|
||||||
|
const urlToOriginalResult = new Map(rawResults.map((result) => [result.url, result]))
|
||||||
|
|
||||||
|
// 使用 reduce 进行分组和内容收集
|
||||||
|
const sourceGroups = references.reduce((groups, reference) => {
|
||||||
|
const originalResult = urlToOriginalResult.get(reference.sourceUrl)
|
||||||
|
if (!originalResult) return groups
|
||||||
|
|
||||||
|
const existing = groups.get(reference.sourceUrl)
|
||||||
|
if (existing) {
|
||||||
|
// 如果已存在该URL的分组,直接添加内容
|
||||||
|
existing.contents.push(reference.content)
|
||||||
|
} else {
|
||||||
|
// 创建新的分组
|
||||||
|
groups.set(reference.sourceUrl, {
|
||||||
|
originalResult,
|
||||||
|
contents: [reference.content]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return groups
|
||||||
|
}, new Map<string, { originalResult: WebSearchProviderResult; contents: string[] }>())
|
||||||
|
|
||||||
|
// 转换为最终结果
|
||||||
|
return Array.from(sourceGroups.values(), (group) => ({
|
||||||
|
title: group.originalResult.title,
|
||||||
|
url: group.originalResult.url,
|
||||||
|
content: group.contents.join(separator)
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 使用 Round Robin 策略从引用中选择指定数量的项目
|
||||||
|
* 按照原始搜索结果的顺序轮询选择,确保每个源都有机会被选中
|
||||||
|
*
|
||||||
|
* @param rawResults 原始搜索结果,用于确定轮询顺序
|
||||||
|
* @param references 所有可选的引用项目
|
||||||
|
* @param maxRefs 最大选择数量
|
||||||
|
* @returns 按 Round Robin 策略选择的引用数组
|
||||||
|
*/
|
||||||
|
export function selectReferences(
|
||||||
|
rawResults: WebSearchProviderResult[],
|
||||||
|
references: KnowledgeReference[],
|
||||||
|
maxRefs: number
|
||||||
|
): KnowledgeReference[] {
|
||||||
|
if (maxRefs <= 0 || references.length === 0) {
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
|
||||||
|
// 建立URL到索引的映射,用于确定轮询顺序
|
||||||
|
const urlToIndex = new Map<string, number>()
|
||||||
|
rawResults.forEach((result, index) => {
|
||||||
|
urlToIndex.set(result.url, index)
|
||||||
|
})
|
||||||
|
|
||||||
|
// 按sourceUrl分组references,每组内按原顺序保持(已按分数排序)
|
||||||
|
const groupsByUrl = new Map<string, KnowledgeReference[]>()
|
||||||
|
references.forEach((ref) => {
|
||||||
|
if (!groupsByUrl.has(ref.sourceUrl)) {
|
||||||
|
groupsByUrl.set(ref.sourceUrl, [])
|
||||||
|
}
|
||||||
|
groupsByUrl.get(ref.sourceUrl)!.push(ref)
|
||||||
|
})
|
||||||
|
|
||||||
|
// 获取有效的URL列表,按rawResults顺序排序
|
||||||
|
const availableUrls = Array.from(groupsByUrl.keys())
|
||||||
|
.filter((url) => urlToIndex.has(url))
|
||||||
|
.sort((a, b) => urlToIndex.get(a)! - urlToIndex.get(b)!)
|
||||||
|
|
||||||
|
if (availableUrls.length === 0) {
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
|
||||||
|
// Round Robin 选择
|
||||||
|
const selected: KnowledgeReference[] = []
|
||||||
|
let roundIndex = 0
|
||||||
|
|
||||||
|
while (selected.length < maxRefs && availableUrls.length > 0) {
|
||||||
|
const currentUrl = availableUrls[roundIndex]
|
||||||
|
const group = groupsByUrl.get(currentUrl)!
|
||||||
|
|
||||||
|
if (group.length > 0) {
|
||||||
|
selected.push(group.shift()!)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 如果当前组为空,从可用URL列表中移除
|
||||||
|
if (group.length === 0) {
|
||||||
|
availableUrls.splice(roundIndex, 1)
|
||||||
|
// 调整索引,避免跳过下一个URL
|
||||||
|
if (roundIndex >= availableUrls.length) {
|
||||||
|
roundIndex = 0
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
roundIndex = (roundIndex + 1) % availableUrls.length
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return selected
|
||||||
|
}
|
||||||
10
yarn.lock
10
yarn.lock
@ -5745,7 +5745,7 @@ __metadata:
|
|||||||
styled-components: "npm:^6.1.11"
|
styled-components: "npm:^6.1.11"
|
||||||
tar: "npm:^7.4.3"
|
tar: "npm:^7.4.3"
|
||||||
tiny-pinyin: "npm:^1.3.2"
|
tiny-pinyin: "npm:^1.3.2"
|
||||||
tokenx: "npm:^0.4.1"
|
tokenx: "npm:^1.1.0"
|
||||||
turndown: "npm:7.2.0"
|
turndown: "npm:7.2.0"
|
||||||
typescript: "npm:^5.6.2"
|
typescript: "npm:^5.6.2"
|
||||||
uuid: "npm:^10.0.0"
|
uuid: "npm:^10.0.0"
|
||||||
@ -17588,10 +17588,10 @@ __metadata:
|
|||||||
languageName: node
|
languageName: node
|
||||||
linkType: hard
|
linkType: hard
|
||||||
|
|
||||||
"tokenx@npm:^0.4.1":
|
"tokenx@npm:^1.1.0":
|
||||||
version: 0.4.1
|
version: 1.1.0
|
||||||
resolution: "tokenx@npm:0.4.1"
|
resolution: "tokenx@npm:1.1.0"
|
||||||
checksum: 10c0/377f4e3c31ff9dc57b5b6af0fb1ae821227dee5e1d87b92a3ab1a0ed25454f01185c709d73592002b0d3024de1c904c8f029c46ae1806677816e4659fb8c481e
|
checksum: 10c0/8214bce58b48e130bcf4a27ac1bb5abf486c395310fb0c8f54e31656acacf97da533372afb9e8ac8f7736e6c3f29af86ea9623d4875f1399e66a5203b80609db
|
||||||
languageName: node
|
languageName: node
|
||||||
linkType: hard
|
linkType: hard
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user