feat(WebSearch): support RAG for external websearch, improve feedback (#7446)

* feat(WebSearch, RAG): support RAG for external websearch

* refactor(WebSearch): handle content limit in service

* refactor: update migrate

* refactor: UI, constants, types

* refactor: migrate contentLimit to cutoffLimit

* refactor: update default rag document count

* refactor: add a helper function for merging references

* refactor: reference filtering

* feat: feedback for websearch phases

* feat: support cutoff by token

* refactor: add a warning and fix the bound of cutoff limit

* fix: not pass `dimensions` if it is not set by the user

* refactor: update i18n and error message

* refactor: improve UI

* fix: cutoff unit style
This commit is contained in:
one 2025-06-27 18:04:42 +08:00 committed by GitHub
parent 1fe439bb51
commit d463d6ea2e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
34 changed files with 1424 additions and 115 deletions

View File

@ -213,7 +213,7 @@
"styled-components": "^6.1.11",
"tar": "^7.4.3",
"tiny-pinyin": "^1.3.2",
"tokenx": "^0.4.1",
"tokenx": "^1.1.0",
"typescript": "^5.6.2",
"uuid": "^10.0.0",
"vite": "6.2.6",

View File

@ -0,0 +1,44 @@
import { BaseLoader } from '@cherrystudio/embedjs-interfaces'
import { cleanString } from '@cherrystudio/embedjs-utils'
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'
import md5 from 'md5'
export class NoteLoader extends BaseLoader<{ type: 'NoteLoader' }> {
private readonly text: string
private readonly sourceUrl?: string
constructor({
text,
sourceUrl,
chunkSize,
chunkOverlap
}: {
text: string
sourceUrl?: string
chunkSize?: number
chunkOverlap?: number
}) {
super(`NoteLoader_${md5(text + (sourceUrl || ''))}`, { text, sourceUrl }, chunkSize ?? 2000, chunkOverlap ?? 0)
this.text = text
this.sourceUrl = sourceUrl
}
override async *getUnfilteredChunks() {
const chunker = new RecursiveCharacterTextSplitter({
chunkSize: this.chunkSize,
chunkOverlap: this.chunkOverlap
})
const chunks = await chunker.splitText(cleanString(this.text))
for (const chunk of chunks) {
yield {
pageContent: chunk,
metadata: {
type: 'NoteLoader' as const,
source: this.sourceUrl || 'note'
}
}
}
}
}

View File

@ -16,13 +16,14 @@
import * as fs from 'node:fs'
import path from 'node:path'
import { RAGApplication, RAGApplicationBuilder, TextLoader } from '@cherrystudio/embedjs'
import { RAGApplication, RAGApplicationBuilder } from '@cherrystudio/embedjs'
import type { ExtractChunkData } from '@cherrystudio/embedjs-interfaces'
import { LibSqlDb } from '@cherrystudio/embedjs-libsql'
import { SitemapLoader } from '@cherrystudio/embedjs-loader-sitemap'
import { WebLoader } from '@cherrystudio/embedjs-loader-web'
import Embeddings from '@main/embeddings/Embeddings'
import { addFileLoader } from '@main/loader'
import { NoteLoader } from '@main/loader/noteLoader'
import Reranker from '@main/reranker/Reranker'
import { windowService } from '@main/services/WindowService'
import { getDataPath } from '@main/utils'
@ -143,7 +144,7 @@ class KnowledgeService {
this.getRagApplication(base)
}
public reset = async (_: Electron.IpcMainInvokeEvent, { base }: { base: KnowledgeBaseParams }): Promise<void> => {
public reset = async (_: Electron.IpcMainInvokeEvent, base: KnowledgeBaseParams): Promise<void> => {
const ragApplication = await this.getRagApplication(base)
await ragApplication.reset()
}
@ -333,6 +334,7 @@ class KnowledgeService {
): LoaderTask {
const { base, item, forceReload } = options
const content = item.content as string
const sourceUrl = (item as any).sourceUrl
const encoder = new TextEncoder()
const contentBytes = encoder.encode(content)
@ -342,7 +344,12 @@ class KnowledgeService {
state: LoaderTaskItemState.PENDING,
task: () => {
const loaderReturn = ragApplication.addLoader(
new TextLoader({ text: content, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }),
new NoteLoader({
text: content,
sourceUrl,
chunkSize: base.chunkSize,
chunkOverlap: base.chunkOverlap
}),
forceReload
) as Promise<LoaderReturn>

View File

@ -1,6 +1,5 @@
import { Search } from 'lucide-react'
import { motion } from 'motion/react'
import { useTranslation } from 'react-i18next'
import styled from 'styled-components'
interface Props {
@ -18,7 +17,6 @@ const spinnerVariants = {
}
export default function Spinner({ text }: Props) {
const { t } = useTranslation()
return (
<Searching
variants={spinnerVariants}
@ -31,7 +29,7 @@ export default function Spinner({ text }: Props) {
ease: 'easeInOut'
}}>
<Search size={16} style={{ color: 'unset' }} />
<span>{t(text)}</span>
<span>{text}</span>
</Searching>
)
}

View File

@ -3,6 +3,7 @@ export const DEFAULT_CONTEXTCOUNT = 5
export const DEFAULT_MAX_TOKENS = 4096
export const DEFAULT_KNOWLEDGE_DOCUMENT_COUNT = 6
export const DEFAULT_KNOWLEDGE_THRESHOLD = 0.0
export const DEFAULT_WEBSEARCH_RAG_DOCUMENT_COUNT = 1
export const platform = window.electron?.process?.platform
export const isMac = platform === 'darwin'

View File

@ -1,9 +1,12 @@
import { useAppDispatch, useAppSelector } from '@renderer/store'
import {
addSubscribeSource as _addSubscribeSource,
type CompressionConfig,
removeSubscribeSource as _removeSubscribeSource,
setCompressionConfig,
setDefaultProvider as _setDefaultProvider,
setSubscribeSources as _setSubscribeSources,
updateCompressionConfig,
updateSubscribeBlacklist as _updateSubscribeBlacklist,
updateWebSearchProvider,
updateWebSearchProviders
@ -90,3 +93,14 @@ export const useBlacklist = () => {
setSubscribeSources
}
}
export const useWebSearchSettings = () => {
const state = useAppSelector((state) => state.websearch)
const dispatch = useAppDispatch()
return {
...state,
setCompressionConfig: (config: CompressionConfig) => dispatch(setCompressionConfig(config)),
updateCompressionConfig: (config: Partial<CompressionConfig>) => dispatch(updateCompressionConfig(config))
}
}

View File

@ -703,6 +703,13 @@
"success.siyuan.export": "Successfully exported to Siyuan Note",
"warn.yuque.exporting": "Exporting to Yuque, please do not request export repeatedly!",
"warn.siyuan.exporting": "Exporting to Siyuan Note, please do not request export repeatedly!",
"websearch": {
"rag": "Executing RAG...",
"rag_complete": "Keeping {{countAfter}} out of {{countBefore}} results...",
"rag_failed": "RAG failed, returning empty results...",
"cutoff": "Truncating search content...",
"fetch_complete": "Completed {{count}} searches..."
},
"download.success": "Download successfully",
"download.failed": "Download failed"
},
@ -776,6 +783,7 @@
"dimensions": "Dimensions {{dimensions}}",
"edit": "Edit Model",
"embedding": "Embedding",
"embedding_dimensions": "Embedding Dimensions",
"embedding_model": "Embedding Model",
"embedding_model_tooltip": "Add in Settings->Model Provider->Manage",
"function_calling": "Function Calling",
@ -1845,8 +1853,33 @@
"overwrite_tooltip": "Force use search service instead of LLM",
"apikey": "API key",
"free": "Free",
"content_limit": "Content length limit",
"content_limit_tooltip": "Limit the content length of the search results; content that exceeds the limit will be truncated."
"compression": {
"title": "Search Result Compression",
"method": "Compression Method",
"method.none": "None",
"method.cutoff": "Cutoff",
"cutoff.limit": "Cutoff Limit",
"cutoff.limit.placeholder": "Enter length",
"cutoff.limit.tooltip": "Limit the content length of search results, content exceeding the limit will be truncated (e.g., 2000 characters)",
"cutoff.unit.char": "Char",
"cutoff.unit.token": "Token",
"method.rag": "RAG",
"rag.document_count": "Document Count",
"rag.document_count.default": "Default",
"rag.document_count.tooltip": "Expected number of documents to extract from each search result, the actual total number of extracted documents is this value multiplied by the number of search results.",
"rag.embedding_dimensions.auto_get": "Auto Get Dimensions",
"rag.embedding_dimensions.placeholder": "Leave empty",
"rag.embedding_dimensions.tooltip": "If left blank, the dimensions parameter will not be passed",
"info": {
"dimensions_auto_success": "Dimensions auto-obtained successfully, dimensions: {{dimensions}}"
},
"error": {
"embedding_model_required": "Please select an embedding model first",
"dimensions_auto_failed": "Failed to auto-obtain dimensions",
"provider_not_found": "Provider not found",
"rag_failed": "RAG failed"
}
}
},
"quickPhrase": {
"title": "Quick Phrases",

View File

@ -702,6 +702,13 @@
"warn.yuque.exporting": "語雀にエクスポート中です。重複してエクスポートしないでください!",
"warn.siyuan.exporting": "思源ノートにエクスポート中です。重複してエクスポートしないでください!",
"error.yuque.no_config": "語雀のAPIアドレスまたはトークンが設定されていません",
"websearch": {
"rag": "RAGを実行中...",
"rag_complete": "{{countBefore}}個の結果から{{countAfter}}個を保持...",
"rag_failed": "RAGが失敗しました。空の結果を返します...",
"cutoff": "検索内容を切り詰めています...",
"fetch_complete": "{{count}}回の検索を完了しました..."
},
"download.success": "ダウンロードに成功しました",
"download.failed": "ダウンロードに失敗しました",
"error.fetchTopicName": "トピック名の取得に失敗しました"
@ -776,6 +783,7 @@
"dimensions": "{{dimensions}} 次元",
"edit": "モデルを編集",
"embedding": "埋め込み",
"embedding_dimensions": "埋め込み次元",
"embedding_model": "埋め込み模型",
"embedding_model_tooltip": "設定->モデルサービス->管理で追加",
"function_calling": "関数呼び出し",
@ -1826,8 +1834,33 @@
"overwrite_tooltip": "大規模言語モデルではなく、サービス検索を使用する",
"apikey": "API キー",
"free": "無料",
"content_limit": "内容の長さ制限",
"content_limit_tooltip": "検索結果の内容長を制限し、制限を超える内容は切り捨てられます。"
"compression": {
"title": "検索結果の圧縮",
"method": "圧縮方法",
"method.none": "圧縮しない",
"method.cutoff": "切り捨て",
"cutoff.limit": "切り捨て長",
"cutoff.limit.placeholder": "長さを入力",
"cutoff.limit.tooltip": "検索結果の内容長を制限し、制限を超える内容は切り捨てられます2000文字",
"cutoff.unit.char": "文字",
"cutoff.unit.token": "トークン",
"method.rag": "RAG",
"rag.document_count": "文書数",
"rag.document_count.default": "デフォルト",
"rag.document_count.tooltip": "単一の検索結果から抽出する文書数。実際に抽出される文書数は、この値に検索結果数を乗じたものです。",
"rag.embedding_dimensions.auto_get": "次元を自動取得",
"rag.embedding_dimensions.placeholder": "次元を設定しない",
"rag.embedding_dimensions.tooltip": "空の場合、dimensions パラメーターは渡されません",
"info": {
"dimensions_auto_success": "次元が自動取得されました。次元: {{dimensions}}"
},
"error": {
"embedding_model_required": "まず埋め込みモデルを選択してください",
"dimensions_auto_failed": "次元の自動取得に失敗しました",
"provider_not_found": "プロバイダーが見つかりません",
"rag_failed": "RAG に失敗しました"
}
}
},
"general.auto_check_update.title": "自動更新",
"general.early_access.title": "早期アクセス",

View File

@ -702,6 +702,13 @@
"success.siyuan.export": "Успешный экспорт в Siyuan",
"warn.yuque.exporting": "Экспортируется в Yuque, пожалуйста, не отправляйте повторные запросы!",
"warn.siyuan.exporting": "Экспортируется в Siyuan, пожалуйста, не отправляйте повторные запросы!",
"websearch": {
"rag": "Выполнение RAG...",
"rag_complete": "Сохранено {{countAfter}} из {{countBefore}} результатов...",
"rag_failed": "RAG не удалось, возвращается пустой результат...",
"cutoff": "Обрезка содержимого поиска...",
"fetch_complete": "Завершено {{count}} поисков..."
},
"download.success": "Скачано успешно",
"download.failed": "Скачивание не удалось",
"error.fetchTopicName": "Не удалось назвать топик"
@ -776,6 +783,7 @@
"dimensions": "{{dimensions}} мер",
"edit": "Редактировать модель",
"embedding": "Встраиваемые",
"embedding_dimensions": "Встраиваемые размерности",
"embedding_model": "Встраиваемые модели",
"embedding_model_tooltip": "Добавьте в настройки->модель сервиса->управление",
"function_calling": "Вызов функции",
@ -1826,8 +1834,33 @@
"overwrite_tooltip": "Использовать провайдера поиска вместо LLM",
"apikey": "API ключ",
"free": "Бесплатно",
"content_limit": "Ограничение длины текста",
"content_limit_tooltip": "Ограничьте длину содержимого результатов поиска, контент, превышающий ограничение, будет обрезан."
"compression": {
"title": "Сжатие результатов поиска",
"method": "Метод сжатия",
"method.none": "Не сжимать",
"method.cutoff": "Обрезка",
"cutoff.limit": "Лимит обрезки",
"cutoff.limit.placeholder": "Введите длину",
"cutoff.limit.tooltip": "Ограничьте длину содержимого результатов поиска, контент, превышающий ограничение, будет обрезан (например, 2000 символов)",
"cutoff.unit.char": "Символы",
"cutoff.unit.token": "Токены",
"method.rag": "RAG",
"rag.document_count": "Количество документов",
"rag.document_count.default": "По умолчанию",
"rag.document_count.tooltip": "Ожидаемое количество документов, которые будут извлечены из каждого результата поиска. Фактическое количество извлеченных документов равно этому значению, умноженному на количество результатов поиска.",
"rag.embedding_dimensions.auto_get": "Автоматически получить размерности",
"rag.embedding_dimensions.placeholder": "Не устанавливать размерности",
"rag.embedding_dimensions.tooltip": "Если оставить пустым, параметр dimensions не будет передан",
"info": {
"dimensions_auto_success": "Размерности успешно получены, размерности: {{dimensions}}"
},
"error": {
"embedding_model_required": "Пожалуйста, сначала выберите модель встраивания",
"dimensions_auto_failed": "Не удалось получить размерности",
"provider_not_found": "Поставщик не найден",
"rag_failed": "RAG не удалось"
}
}
},
"general.auto_check_update.title": "Автоматическое обновление",
"general.early_access.title": "Ранний доступ",

View File

@ -703,6 +703,13 @@
"success.siyuan.export": "导出到思源笔记成功",
"warn.yuque.exporting": "正在导出语雀, 请勿重复请求导出!",
"warn.siyuan.exporting": "正在导出到思源笔记,请勿重复请求导出!",
"websearch": {
"rag": "正在执行 RAG...",
"rag_complete": "保留 {{countBefore}} 个结果中的 {{countAfter}} 个...",
"rag_failed": "RAG 失败,返回空结果...",
"cutoff": "正在截断搜索内容...",
"fetch_complete": "已完成 {{count}} 次搜索..."
},
"download.success": "下载成功",
"download.failed": "下载失败"
},
@ -776,6 +783,7 @@
"dimensions": "{{dimensions}} 维",
"edit": "编辑模型",
"embedding": "嵌入",
"embedding_dimensions": "嵌入维度",
"embedding_model": "嵌入模型",
"embedding_model_tooltip": "在设置->模型服务中点击管理按钮添加",
"function_calling": "函数调用",
@ -1845,8 +1853,33 @@
"title": "网络搜索",
"apikey": "API 密钥",
"free": "免费",
"content_limit": "内容长度限制",
"content_limit_tooltip": "限制搜索结果的内容长度, 超过限制的内容将被截断"
"compression": {
"title": "搜索结果压缩",
"method": "压缩方法",
"method.none": "不压缩",
"method.cutoff": "截断",
"cutoff.limit": "截断长度",
"cutoff.limit.placeholder": "输入长度",
"cutoff.limit.tooltip": "限制搜索结果的内容长度, 超过限制的内容将被截断(例如 2000 字符)",
"cutoff.unit.char": "字符",
"cutoff.unit.token": "Token",
"method.rag": "RAG",
"rag.document_count": "文档数量",
"rag.document_count.default": "默认",
"rag.document_count.tooltip": "预期从单个搜索结果中提取的文档数量,实际提取的总数量是这个值乘以搜索结果数量。",
"rag.embedding_dimensions.auto_get": "自动获取维度",
"rag.embedding_dimensions.placeholder": "不设置维度",
"rag.embedding_dimensions.tooltip": "留空则不传递 dimensions 参数",
"info": {
"dimensions_auto_success": "维度自动获取成功,维度为 {{dimensions}}"
},
"error": {
"embedding_model_required": "请先选择嵌入模型",
"dimensions_auto_failed": "维度自动获取失败",
"provider_not_found": "未找到服务商",
"rag_failed": "RAG 失败"
}
}
},
"quickPhrase": {
"title": "快捷短语",

View File

@ -703,6 +703,13 @@
"success.siyuan.export": "導出到思源筆記成功",
"warn.yuque.exporting": "正在導出語雀,請勿重複請求導出!",
"warn.siyuan.exporting": "正在導出到思源筆記,請勿重複請求導出!",
"websearch": {
"rag": "正在執行 RAG...",
"rag_complete": "保留 {{countBefore}} 個結果中的 {{countAfter}} 個...",
"rag_failed": "RAG 失敗,返回空結果...",
"cutoff": "正在截斷搜尋內容...",
"fetch_complete": "已完成 {{count}} 次搜尋..."
},
"download.success": "下載成功",
"download.failed": "下載失敗"
},
@ -776,6 +783,7 @@
"dimensions": "{{dimensions}} 維",
"edit": "編輯模型",
"embedding": "嵌入",
"embedding_dimensions": "嵌入維度",
"embedding_model": "嵌入模型",
"embedding_model_tooltip": "在設定->模型服務中點選管理按鈕新增",
"function_calling": "函數調用",
@ -1829,8 +1837,33 @@
"overwrite_tooltip": "強制使用搜尋服務商而不是大語言模型進行搜尋",
"apikey": "API 金鑰",
"free": "免費",
"content_limit": "內容長度限制",
"content_limit_tooltip": "限制搜尋結果的內容長度,超過限制的內容將被截斷"
"compression": {
"title": "搜尋結果壓縮",
"method": "壓縮方法",
"method.none": "不壓縮",
"method.cutoff": "截斷",
"cutoff.limit": "截斷長度",
"cutoff.limit.placeholder": "輸入長度",
"cutoff.limit.tooltip": "限制搜尋結果的內容長度,超過限制的內容將被截斷(例如 2000 字符)",
"cutoff.unit.char": "字符",
"cutoff.unit.token": "Token",
"method.rag": "RAG",
"rag.document_count": "文檔數量",
"rag.document_count.default": "預設",
"rag.document_count.tooltip": "預期從單個搜尋結果中提取的文檔數量,實際提取的總數量是這個值乘以搜尋結果數量。",
"rag.embedding_dimensions.auto_get": "自動獲取維度",
"rag.embedding_dimensions.placeholder": "不設置維度",
"rag.embedding_dimensions.tooltip": "留空則不傳遞 dimensions 參數",
"info": {
"dimensions_auto_success": "維度自動獲取成功,維度為 {{dimensions}}"
},
"error": {
"embedding_model_required": "請先選擇嵌入模型",
"dimensions_auto_failed": "維度自動獲取失敗",
"provider_not_found": "未找到服務商",
"rag_failed": "RAG 失敗"
}
}
},
"general.auto_check_update.title": "自動更新",
"general.early_access.title": "搶先體驗",

View File

@ -5,13 +5,19 @@ import { selectFormattedCitationsByBlockId } from '@renderer/store/messageBlock'
import { WebSearchSource } from '@renderer/types'
import { type CitationMessageBlock, MessageBlockStatus } from '@renderer/types/newMessage'
import React, { useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import { useSelector } from 'react-redux'
import styled from 'styled-components'
import CitationsList from '../CitationsList'
function CitationBlock({ block }: { block: CitationMessageBlock }) {
const { t } = useTranslation()
const formattedCitations = useSelector((state: RootState) => selectFormattedCitationsByBlockId(state, block.id))
const { websearch } = useSelector((state: RootState) => state.runtime)
const message = useSelector((state: RootState) => state.messages.entities[block.messageId])
const userMessageId = message?.askId || block.messageId // 如果没有 askId 则回退到 messageId
const hasGeminiBlock = block.response?.source === WebSearchSource.GEMINI
const hasCitations = useMemo(() => {
return (
@ -21,8 +27,32 @@ function CitationBlock({ block }: { block: CitationMessageBlock }) {
)
}, [formattedCitations, block.knowledge, hasGeminiBlock])
const getWebSearchStatusText = (requestId: string) => {
const status = websearch.activeSearches[requestId] ?? { phase: 'default' }
switch (status.phase) {
case 'fetch_complete':
return t('message.websearch.fetch_complete', {
count: status.countAfter ?? 0
})
case 'rag':
return t('message.websearch.rag')
case 'rag_complete':
return t('message.websearch.rag_complete', {
countBefore: status.countBefore ?? 0,
countAfter: status.countAfter ?? 0
})
case 'rag_failed':
return t('message.websearch.rag_failed')
case 'cutoff':
return t('message.websearch.cutoff')
default:
return t('message.searching')
}
}
if (block.status === MessageBlockStatus.PROCESSING) {
return <Spinner text="message.searching" />
return <Spinner text={getWebSearchStatusText(userMessageId)} />
}
if (!hasCitations) {

View File

@ -41,7 +41,8 @@ const PopupContainer: React.FC<Props> = ({ base, resolve }) => {
const searchResults = await searchKnowledgeBase(value, base)
setResults(searchResults)
} catch (error) {
console.error('Search failed:', error)
console.error(`Failed to search knowledge base ${base.name}:`, error)
setResults([])
} finally {
setLoading(false)
}

View File

@ -1,18 +1,16 @@
import { useTheme } from '@renderer/context/ThemeProvider'
import { useAppDispatch, useAppSelector } from '@renderer/store'
import { setContentLimit, setMaxResult, setSearchWithTime } from '@renderer/store/websearch'
import { Input, Slider, Switch, Tooltip } from 'antd'
import { useWebSearchSettings } from '@renderer/hooks/useWebSearchProviders'
import { useAppDispatch } from '@renderer/store'
import { setMaxResult, setSearchWithTime } from '@renderer/store/websearch'
import { Slider, Switch } from 'antd'
import { t } from 'i18next'
import { Info } from 'lucide-react'
import { FC } from 'react'
import { SettingDivider, SettingGroup, SettingRow, SettingRowTitle, SettingTitle } from '..'
const BasicSettings: FC = () => {
const { theme } = useTheme()
const searchWithTime = useAppSelector((state) => state.websearch.searchWithTime)
const maxResults = useAppSelector((state) => state.websearch.maxResults)
const contentLimit = useAppSelector((state) => state.websearch.contentLimit)
const { searchWithTime, maxResults } = useWebSearchSettings()
const dispatch = useAppDispatch()
@ -38,28 +36,6 @@ const BasicSettings: FC = () => {
onChangeComplete={(value) => dispatch(setMaxResult(value))}
/>
</SettingRow>
<SettingDivider style={{ marginTop: 15, marginBottom: 10 }} />
<SettingRow>
<SettingRowTitle>
{t('settings.websearch.content_limit')}
<Tooltip title={t('settings.websearch.content_limit_tooltip')} placement="right">
<Info size={16} color="var(--color-icon)" style={{ marginLeft: 5, cursor: 'pointer' }} />
</Tooltip>
</SettingRowTitle>
<Input
style={{ width: '100px' }}
placeholder="2000"
value={contentLimit === undefined ? '' : contentLimit}
onChange={(e) => {
const value = e.target.value
if (value === '') {
dispatch(setContentLimit(undefined))
} else if (!isNaN(Number(value)) && Number(value) > 0) {
dispatch(setContentLimit(Number(value)))
}
}}
/>
</SettingRow>
</SettingGroup>
</>
)

View File

@ -0,0 +1,60 @@
import { useWebSearchSettings } from '@renderer/hooks/useWebSearchProviders'
import { SettingRow, SettingRowTitle } from '@renderer/pages/settings'
import { Input, Select, Space, Tooltip } from 'antd'
import { ChevronDown, Info } from 'lucide-react'
import { useTranslation } from 'react-i18next'
const INPUT_BOX_WIDTH = '200px'
const CutoffSettings = () => {
const { t } = useTranslation()
const { compressionConfig, updateCompressionConfig } = useWebSearchSettings()
const handleCutoffLimitChange = (value: number | null) => {
updateCompressionConfig({ cutoffLimit: value || undefined })
}
const handleCutoffUnitChange = (unit: 'char' | 'token') => {
updateCompressionConfig({ cutoffUnit: unit })
}
const unitOptions = [
{ value: 'char', label: t('settings.websearch.compression.cutoff.unit.char') },
{ value: 'token', label: t('settings.websearch.compression.cutoff.unit.token') }
]
return (
<SettingRow>
<SettingRowTitle>
{t('settings.websearch.compression.cutoff.limit')}
<Tooltip title={t('settings.websearch.compression.cutoff.limit.tooltip')} placement="right">
<Info size={16} color="var(--color-icon)" style={{ marginLeft: 5, cursor: 'pointer' }} />
</Tooltip>
</SettingRowTitle>
<Space.Compact style={{ width: INPUT_BOX_WIDTH }}>
<Input
style={{ maxWidth: '60%' }}
placeholder={t('settings.websearch.compression.cutoff.limit.placeholder')}
value={compressionConfig?.cutoffLimit === undefined ? '' : compressionConfig.cutoffLimit}
onChange={(e) => {
const value = e.target.value
if (value === '') {
handleCutoffLimitChange(null)
} else if (!isNaN(Number(value)) && Number(value) > 0) {
handleCutoffLimitChange(Number(value))
}
}}
/>
<Select
value={compressionConfig?.cutoffUnit || 'char'}
style={{ minWidth: '40%' }}
onChange={handleCutoffUnitChange}
options={unitOptions}
suffixIcon={<ChevronDown size={16} color="var(--color-border)" />}
/>
</Space.Compact>
</SettingRow>
)
}
export default CutoffSettings

View File

@ -0,0 +1,207 @@
import AiProvider from '@renderer/aiCore'
import { DEFAULT_WEBSEARCH_RAG_DOCUMENT_COUNT } from '@renderer/config/constant'
import Logger from '@renderer/config/logger'
import { isEmbeddingModel, isRerankModel } from '@renderer/config/models'
import { NOT_SUPPORTED_REANK_PROVIDERS } from '@renderer/config/providers'
import { useProviders } from '@renderer/hooks/useProvider'
import { useWebSearchSettings } from '@renderer/hooks/useWebSearchProviders'
import { SettingDivider, SettingRow, SettingRowTitle } from '@renderer/pages/settings'
import { getModelUniqId } from '@renderer/services/ModelService'
import { Model } from '@renderer/types'
import { Button, InputNumber, Select, Slider, Tooltip } from 'antd'
import { find, sortBy } from 'lodash'
import { ChevronDown, Info, RefreshCw } from 'lucide-react'
import { useMemo, useState } from 'react'
import { useTranslation } from 'react-i18next'
const INPUT_BOX_WIDTH = '200px'
const RagSettings = () => {
const { t } = useTranslation()
const { providers } = useProviders()
const { compressionConfig, updateCompressionConfig } = useWebSearchSettings()
const [loadingDimensions, setLoadingDimensions] = useState(false)
const embeddingModels = useMemo(() => {
return providers
.map((p) => p.models)
.flat()
.filter((model) => isEmbeddingModel(model))
}, [providers])
const rerankModels = useMemo(() => {
return providers
.map((p) => p.models)
.flat()
.filter((model) => isRerankModel(model))
}, [providers])
const embeddingSelectOptions = useMemo(() => {
return providers
.filter((p) => p.models.length > 0)
.map((p) => ({
label: p.isSystem ? t(`provider.${p.id}`) : p.name,
title: p.name,
options: sortBy(p.models, 'name')
.filter((model) => isEmbeddingModel(model))
.map((m) => ({
label: m.name,
value: getModelUniqId(m),
providerId: p.id,
modelId: m.id
}))
}))
.filter((group) => group.options.length > 0)
}, [providers, t])
const rerankSelectOptions = useMemo(() => {
return providers
.filter((p) => p.models.length > 0)
.filter((p) => !NOT_SUPPORTED_REANK_PROVIDERS.includes(p.id))
.map((p) => ({
label: p.isSystem ? t(`provider.${p.id}`) : p.name,
title: p.name,
options: sortBy(p.models, 'name')
.filter((model) => isRerankModel(model))
.map((m) => ({
label: m.name,
value: getModelUniqId(m)
}))
}))
.filter((group) => group.options.length > 0)
}, [providers, t])
const handleEmbeddingModelChange = (modelValue: string) => {
const selectedModel = find(embeddingModels, JSON.parse(modelValue)) as Model
updateCompressionConfig({ embeddingModel: selectedModel })
}
const handleRerankModelChange = (modelValue?: string) => {
const selectedModel = modelValue ? (find(rerankModels, JSON.parse(modelValue)) as Model) : undefined
updateCompressionConfig({ rerankModel: selectedModel })
}
const handleEmbeddingDimensionsChange = (value: number | null) => {
updateCompressionConfig({ embeddingDimensions: value || undefined })
}
const handleDocumentCountChange = (value: number) => {
updateCompressionConfig({ documentCount: value })
}
const handleAutoGetDimensions = async () => {
if (!compressionConfig?.embeddingModel) {
Logger.log('[RagSettings] handleAutoGetDimensions: no embedding model')
window.message.error(t('settings.websearch.compression.error.embedding_model_required'))
return
}
const provider = providers.find((p) => p.id === compressionConfig.embeddingModel?.provider)
if (!provider) {
Logger.log('[RagSettings] handleAutoGetDimensions: provider not found')
window.message.error(t('settings.websearch.compression.error.provider_not_found'))
return
}
setLoadingDimensions(true)
try {
const aiProvider = new AiProvider(provider)
const dimensions = await aiProvider.getEmbeddingDimensions(compressionConfig.embeddingModel)
updateCompressionConfig({ embeddingDimensions: dimensions })
window.message.success(t('settings.websearch.compression.info.dimensions_auto_success', { dimensions }))
} catch (error) {
Logger.error('[RagSettings] handleAutoGetDimensions: failed to get embedding dimensions', error)
window.message.error(t('settings.websearch.compression.error.dimensions_auto_failed'))
} finally {
setLoadingDimensions(false)
}
}
return (
<>
<SettingRow>
<SettingRowTitle>{t('models.embedding_model')}</SettingRowTitle>
<Select
value={compressionConfig?.embeddingModel ? getModelUniqId(compressionConfig.embeddingModel) : undefined}
style={{ width: INPUT_BOX_WIDTH }}
options={embeddingSelectOptions}
placeholder={t('settings.models.empty')}
onChange={handleEmbeddingModelChange}
allowClear={false}
showSearch
suffixIcon={<ChevronDown size={16} color="var(--color-border)" />}
/>
</SettingRow>
<SettingDivider />
<SettingRow>
<SettingRowTitle>
{t('models.embedding_dimensions')}
<Tooltip title={t('settings.websearch.compression.rag.embedding_dimensions.tooltip')}>
<Info size={16} color="var(--color-icon)" style={{ marginLeft: 5, cursor: 'pointer' }} />
</Tooltip>
</SettingRowTitle>
<div style={{ display: 'flex', gap: '8px', alignItems: 'center', width: INPUT_BOX_WIDTH }}>
<InputNumber
value={compressionConfig?.embeddingDimensions}
style={{ flex: 1 }}
placeholder={t('settings.websearch.compression.rag.embedding_dimensions.placeholder')}
min={0}
onChange={handleEmbeddingDimensionsChange}
/>
<Tooltip title={t('settings.websearch.compression.rag.embedding_dimensions.auto_get')}>
<Button
icon={<RefreshCw size={16} />}
loading={loadingDimensions}
disabled={!compressionConfig?.embeddingModel}
onClick={handleAutoGetDimensions}
/>
</Tooltip>
</div>
</SettingRow>
<SettingDivider />
<SettingRow>
<SettingRowTitle>{t('models.rerank_model')}</SettingRowTitle>
<Select
value={compressionConfig?.rerankModel ? getModelUniqId(compressionConfig.rerankModel) : undefined}
style={{ width: INPUT_BOX_WIDTH }}
options={rerankSelectOptions}
placeholder={t('settings.models.empty')}
onChange={handleRerankModelChange}
allowClear
showSearch
suffixIcon={<ChevronDown size={16} color="var(--color-border)" />}
/>
</SettingRow>
<SettingDivider />
<SettingRow>
<SettingRowTitle>
{t('settings.websearch.compression.rag.document_count')}
<Tooltip title={t('settings.websearch.compression.rag.document_count.tooltip')} placement="right">
<Info size={16} color="var(--color-icon)" style={{ marginLeft: 5, cursor: 'pointer' }} />
</Tooltip>
</SettingRowTitle>
<div style={{ width: INPUT_BOX_WIDTH }}>
<Slider
value={compressionConfig?.documentCount || DEFAULT_WEBSEARCH_RAG_DOCUMENT_COUNT}
min={1}
max={10}
step={1}
onChange={handleDocumentCountChange}
marks={{
1: t('settings.websearch.compression.rag.document_count.default'),
3: '3',
10: '10'
}}
/>
</div>
</SettingRow>
</>
)
}
export default RagSettings

View File

@ -0,0 +1,47 @@
import { useWebSearchSettings } from '@renderer/hooks/useWebSearchProviders'
import { SettingDivider, SettingGroup, SettingRow, SettingRowTitle, SettingTitle } from '@renderer/pages/settings'
import { Select } from 'antd'
import { ChevronDown } from 'lucide-react'
import { useTranslation } from 'react-i18next'
import CutoffSettings from './CutoffSettings'
import RagSettings from './RagSettings'
const CompressionSettings = () => {
const { t } = useTranslation()
const { compressionConfig, updateCompressionConfig } = useWebSearchSettings()
const compressionMethodOptions = [
{ value: 'none', label: t('settings.websearch.compression.method.none') },
{ value: 'cutoff', label: t('settings.websearch.compression.method.cutoff') },
{ value: 'rag', label: t('settings.websearch.compression.method.rag') }
]
const handleCompressionMethodChange = (method: 'none' | 'cutoff' | 'rag') => {
updateCompressionConfig({ method })
}
return (
<SettingGroup>
<SettingTitle>{t('settings.websearch.compression.title')}</SettingTitle>
<SettingDivider />
<SettingRow>
<SettingRowTitle>{t('settings.websearch.compression.method')}</SettingRowTitle>
<Select
value={compressionConfig?.method || 'none'}
style={{ width: '200px' }}
onChange={handleCompressionMethodChange}
options={compressionMethodOptions}
suffixIcon={<ChevronDown size={16} color="var(--color-border)" />}
/>
</SettingRow>
<SettingDivider />
{compressionConfig?.method === 'cutoff' && <CutoffSettings />}
{compressionConfig?.method === 'rag' && <RagSettings />}
</SettingGroup>
)
}
export default CompressionSettings

View File

@ -9,6 +9,7 @@ import { useTranslation } from 'react-i18next'
import { SettingContainer, SettingDivider, SettingGroup, SettingRow, SettingRowTitle, SettingTitle } from '..'
import BasicSettings from './BasicSettings'
import BlacklistSettings from './BlacklistSettings'
import CompressionSettings from './CompressionSettings'
import WebSearchProviderSetting from './WebSearchProviderSetting'
const WebSearchSettings: FC = () => {
@ -56,6 +57,7 @@ const WebSearchSettings: FC = () => {
</SettingGroup>
)}
<BasicSettings />
<CompressionSettings />
<BlacklistSettings />
</SettingContainer>
)

View File

@ -26,15 +26,13 @@ export default class BochaProvider extends BaseWebSearchProvider {
Authorization: `Bearer ${this.apiKey}`
}
const contentLimit = websearch.contentLimit
const params: BochaSearchParams = {
query,
count: websearch.maxResults,
exclude: websearch.excludeDomains.join(','),
freshness: websearch.searchWithTime ? 'oneDay' : 'noLimit',
summary: false,
page: contentLimit ? Math.ceil(contentLimit / websearch.maxResults) : 1
summary: true,
page: 1
}
const response = await fetch(`${this.apiHost}/v1/web-search`, {
@ -58,7 +56,8 @@ export default class BochaProvider extends BaseWebSearchProvider {
query: resp.data.queryContext.originalQuery,
results: resp.data.webPages.value.map((result) => ({
title: result.name,
content: result.snippet,
// 优先使用 summary更详细如果没有则使用 snippet
content: result.summary || result.snippet || '',
url: result.url
}))
}

View File

@ -35,14 +35,9 @@ export default class ExaProvider extends BaseWebSearchProvider {
return {
query: response.autopromptString,
results: response.results.slice(0, websearch.maxResults).map((result) => {
let content = result.text || ''
if (websearch.contentLimit && content.length > websearch.contentLimit) {
content = content.slice(0, websearch.contentLimit) + '...'
}
return {
title: result.title || 'No title',
content: content,
content: result.text || '',
url: result.url || ''
}
})

View File

@ -55,11 +55,7 @@ export default class LocalSearchProvider extends BaseWebSearchProvider {
// Fetch content for each URL concurrently
const fetchPromises = validItems.map(async (item) => {
// Logger.log(`Fetching content for ${item.url}...`)
const result = await fetchWebContent(item.url, 'markdown', this.provider.usingBrowser, httpOptions)
if (websearch.contentLimit && result.content.length > websearch.contentLimit) {
result.content = result.content.slice(0, websearch.contentLimit) + '...'
}
return result
return await fetchWebContent(item.url, 'markdown', this.provider.usingBrowser, httpOptions)
})
// Wait for all fetches to complete

View File

@ -122,11 +122,7 @@ export default class SearxngProvider extends BaseWebSearchProvider {
// Fetch content for each URL concurrently
const fetchPromises = validItems.map(async (item) => {
// Logger.log(`Fetching content for ${item.url}...`)
const result = await fetchWebContent(item.url, 'markdown', this.provider.usingBrowser)
if (websearch.contentLimit && result.content.length > websearch.contentLimit) {
result.content = result.content.slice(0, websearch.contentLimit) + '...'
}
return result
return await fetchWebContent(item.url, 'markdown', this.provider.usingBrowser)
})
// Wait for all fetches to complete

View File

@ -31,14 +31,9 @@ export default class TavilyProvider extends BaseWebSearchProvider {
return {
query: result.query,
results: result.results.slice(0, websearch.maxResults).map((result) => {
let content = result.content || ''
if (websearch.contentLimit && content.length > websearch.contentLimit) {
content = content.slice(0, websearch.contentLimit) + '...'
}
return {
title: result.title || 'No title',
content: content,
content: result.content || '',
url: result.url || ''
}
})

View File

@ -157,8 +157,13 @@ async function fetchExternalTool(
try {
// Use the consolidated processWebsearch function
WebSearchService.createAbortSignal(lastUserMessage.id)
const webSearchResponse = await WebSearchService.processWebsearch(
webSearchProvider!,
extractResults,
lastUserMessage.id
)
return {
results: await WebSearchService.processWebsearch(webSearchProvider!, extractResults),
results: webSearchResponse,
source: WebSearchSource.WEBSEARCH
}
} catch (error) {

View File

@ -130,7 +130,7 @@ export const searchKnowledgeBase = async (
)
} catch (error) {
Logger.error(`Error searching knowledge base ${base.name}:`, error)
return []
throw error
}
}

View File

@ -1,13 +1,38 @@
import { DEFAULT_WEBSEARCH_RAG_DOCUMENT_COUNT } from '@renderer/config/constant'
import Logger from '@renderer/config/logger'
import i18n from '@renderer/i18n'
import WebSearchEngineProvider from '@renderer/providers/WebSearchProvider'
import store from '@renderer/store'
import { WebSearchState } from '@renderer/store/websearch'
import { WebSearchProvider, WebSearchProviderResponse } from '@renderer/types'
import { hasObjectKey } from '@renderer/utils'
import { setWebSearchStatus } from '@renderer/store/runtime'
import { CompressionConfig, WebSearchState } from '@renderer/store/websearch'
import {
KnowledgeBase,
KnowledgeItem,
KnowledgeReference,
WebSearchProvider,
WebSearchProviderResponse,
WebSearchProviderResult,
WebSearchStatus
} from '@renderer/types'
import { hasObjectKey, uuid } from '@renderer/utils'
import { addAbortController } from '@renderer/utils/abortController'
import { formatErrorMessage } from '@renderer/utils/error'
import { ExtractResults } from '@renderer/utils/extract'
import { fetchWebContents } from '@renderer/utils/fetch'
import { consolidateReferencesByUrl, selectReferences } from '@renderer/utils/websearch'
import dayjs from 'dayjs'
import { LRUCache } from 'lru-cache'
import { getKnowledgeBaseParams } from './KnowledgeService'
import { getKnowledgeSourceUrl, searchKnowledgeBase } from './KnowledgeService'
interface RequestState {
signal: AbortSignal | null
searchBase?: KnowledgeBase
isPaused: boolean
createdAt: number
}
/**
*
*/
@ -19,12 +44,47 @@ class WebSearchService {
isPaused = false
createAbortSignal(key: string) {
// 管理不同请求的状态
private requestStates = new LRUCache<string, RequestState>({
max: 5, // 最多5个并发请求
ttl: 1000 * 60 * 2, // 2分钟过期
dispose: (requestState: RequestState, requestId: string) => {
if (!requestState.searchBase) return
window.api.knowledgeBase
.delete(requestState.searchBase.id)
.catch((error) => Logger.warn(`[WebSearchService] Failed to cleanup search base for ${requestId}:`, error))
}
})
/**
*
* @param requestId ID ID
*/
private getRequestState(requestId: string): RequestState {
let state = this.requestStates.get(requestId)
if (!state) {
state = {
signal: null,
isPaused: false,
createdAt: Date.now()
}
this.requestStates.set(requestId, state)
}
return state
}
createAbortSignal(requestId: string) {
const controller = new AbortController()
this.signal = controller.signal
addAbortController(key, () => {
this.isPaused = true
this.signal = controller.signal // 保持向后兼容
const state = this.getRequestState(requestId)
state.signal = controller.signal
addAbortController(requestId, () => {
this.isPaused = true // 保持向后兼容
state.isPaused = true
this.signal = null
this.requestStates.delete(requestId)
controller.abort()
})
return controller
@ -137,45 +197,338 @@ class WebSearchService {
}
}
/**
*
*/
private async setWebSearchStatus(requestId: string, status: WebSearchStatus, delayMs?: number) {
store.dispatch(setWebSearchStatus({ requestId, status }))
if (delayMs) {
await new Promise((resolve) => setTimeout(resolve, delayMs))
}
}
/**
*
*/
private async ensureSearchBase(
config: CompressionConfig,
documentCount: number,
requestId: string
): Promise<KnowledgeBase> {
const baseId = `websearch-compression-${requestId}`
const state = this.getRequestState(requestId)
// 如果已存在且配置未变,直接复用
if (state.searchBase && this.isConfigMatched(state.searchBase, config)) {
return state.searchBase
}
// 清理旧的知识库
if (state.searchBase) {
await window.api.knowledgeBase.delete(state.searchBase.id)
}
if (!config.embeddingModel) {
throw new Error('Embedding model is required for RAG compression')
}
// 创建新的知识库
state.searchBase = {
id: baseId,
name: `WebSearch-RAG-${requestId}`,
model: config.embeddingModel,
rerankModel: config.rerankModel,
dimensions: config.embeddingDimensions,
documentCount,
items: [],
created_at: Date.now(),
updated_at: Date.now(),
version: 1
}
// 更新LRU cache
this.requestStates.set(requestId, state)
// 创建知识库
const baseParams = getKnowledgeBaseParams(state.searchBase)
await window.api.knowledgeBase.create(baseParams)
return state.searchBase
}
/**
*
*/
private isConfigMatched(base: KnowledgeBase, config: CompressionConfig): boolean {
return (
base.model.id === config.embeddingModel?.id &&
base.rerankModel?.id === config.rerankModel?.id &&
base.dimensions === config.embeddingDimensions
)
}
/**
*
* @param questions
* @param searchBase
* @returns
*/
private async querySearchBase(questions: string[], searchBase: KnowledgeBase): Promise<KnowledgeReference[]> {
// 1. 单独搜索每个问题
const searchPromises = questions.map((question) => searchKnowledgeBase(question, searchBase))
const allResults = await Promise.all(searchPromises)
// 2. 合并所有结果并按分数排序
const flatResults = allResults.flat().sort((a, b) => b.score - a.score)
// 3. 去重,保留最高分的重复内容
const seen = new Set<string>()
const uniqueResults = flatResults.filter((item) => {
if (seen.has(item.pageContent)) {
return false
}
seen.add(item.pageContent)
return true
})
// 4. 转换为引用格式
return await Promise.all(
uniqueResults.map(async (result, index) => ({
id: index + 1,
content: result.pageContent,
sourceUrl: await getKnowledgeSourceUrl(result),
type: 'url' as const
}))
)
}
/**
* 使RAG压缩搜索结果
* -
* - retrieve
* - sourceUrl
*
* @param questions
* @param rawResults
* @param config
* @param requestId ID
* @returns
*/
private async compressWithSearchBase(
questions: string[],
rawResults: WebSearchProviderResult[],
config: CompressionConfig,
requestId: string
): Promise<WebSearchProviderResult[]> {
// 根据搜索次数计算所需的文档数量
const totalDocumentCount =
Math.max(0, rawResults.length) * (config.documentCount ?? DEFAULT_WEBSEARCH_RAG_DOCUMENT_COUNT)
const searchBase = await this.ensureSearchBase(config, totalDocumentCount, requestId)
// 1. 清空知识库
await window.api.knowledgeBase.reset(getKnowledgeBaseParams(searchBase))
// 2. 一次性添加所有搜索结果到知识库
const addPromises = rawResults.map(async (result) => {
const item: KnowledgeItem & { sourceUrl?: string } = {
id: uuid(),
type: 'note',
content: result.content,
sourceUrl: result.url, // 设置 sourceUrl 用于映射
created_at: Date.now(),
updated_at: Date.now(),
processingStatus: 'pending'
}
await window.api.knowledgeBase.add({
base: getKnowledgeBaseParams(searchBase),
item
})
})
// 等待所有结果添加完成
await Promise.all(addPromises)
// 3. 对知识库执行多问题搜索获取压缩结果
const references = await this.querySearchBase(questions, searchBase)
// 4. 使用 Round Robin 策略选择引用
const selectedReferences = selectReferences(rawResults, references, totalDocumentCount)
Logger.log('[WebSearchService] With RAG, the number of search results:', {
raw: rawResults.length,
retrieved: references.length,
selected: selectedReferences.length
})
// 5. 按 sourceUrl 分组并合并同源片段
return consolidateReferencesByUrl(rawResults, selectedReferences)
}
/**
* 使 char token
*
* @param rawResults
* @param config
* @returns
*/
private async compressWithCutoff(
rawResults: WebSearchProviderResult[],
config: CompressionConfig
): Promise<WebSearchProviderResult[]> {
if (!config.cutoffLimit) {
Logger.warn('[WebSearchService] Cutoff limit is not set, skipping compression')
return rawResults
}
const perResultLimit = Math.max(1, Math.floor(config.cutoffLimit / rawResults.length))
// 动态导入 tokenx
const { sliceByTokens } = await import('tokenx')
return rawResults.map((result) => {
if (config.cutoffUnit === 'token') {
// 使用 token 截断
const slicedContent = sliceByTokens(result.content, 0, perResultLimit)
return {
...result,
content: slicedContent.length < result.content.length ? slicedContent + '...' : slicedContent
}
} else {
// 使用字符截断(默认行为)
return {
...result,
content:
result.content.length > perResultLimit ? result.content.slice(0, perResultLimit) + '...' : result.content
}
}
})
}
/**
* UI 使
*
*
* -
* - summarize请求
* -
* -
* - RAG或截断
* -
*
* @param webSearchProvider - 使
* @param extractResults -
* @param requestId -
*
* @returns
*/
public async processWebsearch(
webSearchProvider: WebSearchProvider,
extractResults: ExtractResults
extractResults: ExtractResults,
requestId: string
): Promise<WebSearchProviderResponse> {
// 重置状态
await this.setWebSearchStatus(requestId, { phase: 'default' })
// 检查 websearch 和 question 是否有效
if (!extractResults.websearch?.question || extractResults.websearch.question.length === 0) {
Logger.log('[processWebsearch] No valid question found in extractResults.websearch')
return { results: [] }
}
// 使用请求特定的signal如果没有则回退到全局signal
const signal = this.getRequestState(requestId).signal || this.signal
const questions = extractResults.websearch.question
const links = extractResults.websearch.links
const firstQuestion = questions[0]
if (firstQuestion === 'summarize' && links && links.length > 0) {
const contents = await fetchWebContents(links, undefined, undefined, {
signal: this.signal
})
return {
query: 'summaries',
results: contents
}
}
const searchPromises = questions.map((q) => this.search(webSearchProvider, q, { signal: this.signal }))
const searchResults = await Promise.allSettled(searchPromises)
const aggregatedResults: any[] = []
// 处理 summarize
if (questions[0] === 'summarize' && links && links.length > 0) {
const contents = await fetchWebContents(links, undefined, undefined, { signal })
return { query: 'summaries', results: contents }
}
const searchPromises = questions.map((q) => this.search(webSearchProvider, q, { signal }))
const searchResults = await Promise.allSettled(searchPromises)
// 统计成功完成的搜索数量
const successfulSearchCount = searchResults.filter((result) => result.status === 'fulfilled').length
if (successfulSearchCount > 1) {
await this.setWebSearchStatus(
requestId,
{
phase: 'fetch_complete',
countAfter: successfulSearchCount
},
1000
)
}
let finalResults: WebSearchProviderResult[] = []
searchResults.forEach((result) => {
if (result.status === 'fulfilled') {
if (result.value.results) {
aggregatedResults.push(...result.value.results)
finalResults.push(...result.value.results)
}
}
if (result.status === 'rejected') {
throw result.reason
}
})
// 如果没有搜索结果,直接返回空结果
if (finalResults.length === 0) {
await this.setWebSearchStatus(requestId, { phase: 'default' })
return {
query: questions.join(' | '),
results: []
}
}
const { compressionConfig } = this.getWebSearchState()
// RAG压缩处理
if (compressionConfig?.method === 'rag' && requestId) {
await this.setWebSearchStatus(requestId, { phase: 'rag' }, 500)
const originalCount = finalResults.length
try {
finalResults = await this.compressWithSearchBase(questions, finalResults, compressionConfig, requestId)
await this.setWebSearchStatus(
requestId,
{
phase: 'rag_complete',
countBefore: originalCount,
countAfter: finalResults.length
},
1000
)
} catch (error) {
Logger.warn('[WebSearchService] RAG compression failed, will return empty results:', error)
window.message.error({
key: 'websearch-rag-failed',
duration: 10,
content: `${i18n.t('settings.websearch.compression.error.rag_failed')}: ${formatErrorMessage(error)}`
})
finalResults = []
await this.setWebSearchStatus(requestId, { phase: 'rag_failed' }, 1000)
}
}
// 截断压缩处理
else if (compressionConfig?.method === 'cutoff' && compressionConfig.cutoffLimit) {
await this.setWebSearchStatus(requestId, { phase: 'cutoff' }, 500)
finalResults = await this.compressWithCutoff(finalResults, compressionConfig)
}
// 重置状态
await this.setWebSearchStatus(requestId, { phase: 'default' })
return {
query: questions.join(' | '),
results: aggregatedResults
results: finalResults
}
}
}

View File

@ -50,7 +50,7 @@ const persistedReducer = persistReducer(
{
key: 'cherry-studio',
storage,
version: 115,
version: 116,
blacklist: ['runtime', 'messages', 'messageBlocks'],
migrate
},

View File

@ -1631,6 +1631,31 @@ const migrateConfig = {
if (state.settings) {
state.settings.upgradeChannel = UpgradeChannel.LATEST
}
return state
} catch (error) {
return state
}
},
'116': (state: RootState) => {
try {
if (state.websearch) {
// migrate contentLimit to cutoffLimit
// @ts-ignore eslint-disable-next-line
if (state.websearch.contentLimit) {
state.websearch.compressionConfig = {
method: 'cutoff',
cutoffUnit: 'char',
// @ts-ignore eslint-disable-next-line
cutoffLimit: state.websearch.contentLimit
}
} else {
state.websearch.compressionConfig = { method: 'none', cutoffUnit: 'char' }
}
// @ts-ignore eslint-disable-next-line
delete state.websearch.contentLimit
}
return state
} catch (error) {
return state

View File

@ -1,6 +1,6 @@
import { createSlice, PayloadAction } from '@reduxjs/toolkit'
import { AppLogo, UserAvatar } from '@renderer/config/env'
import type { MinAppType, Topic } from '@renderer/types'
import type { MinAppType, Topic, WebSearchStatus } from '@renderer/types'
import type { UpdateInfo } from 'builder-util-runtime'
export interface ChatState {
@ -13,6 +13,10 @@ export interface ChatState {
newlyRenamedTopics: string[]
}
export interface WebSearchState {
activeSearches: Record<string, WebSearchStatus>
}
export interface UpdateState {
info: UpdateInfo | null
checking: boolean
@ -39,6 +43,7 @@ export interface RuntimeState {
update: UpdateState
export: ExportState
chat: ChatState
websearch: WebSearchState
}
export interface ExportState {
@ -72,6 +77,9 @@ const initialState: RuntimeState = {
activeTopic: null,
renamingTopics: [],
newlyRenamedTopics: []
},
websearch: {
activeSearches: {}
}
}
@ -130,6 +138,17 @@ const runtimeSlice = createSlice({
},
setNewlyRenamedTopics: (state, action: PayloadAction<string[]>) => {
state.chat.newlyRenamedTopics = action.payload
},
// WebSearch related actions
setActiveSearches: (state, action: PayloadAction<Record<string, WebSearchStatus>>) => {
state.websearch.activeSearches = action.payload
},
setWebSearchStatus: (state, action: PayloadAction<{ requestId: string; status: WebSearchStatus }>) => {
const { requestId, status } = action.payload
if (status.phase === 'default') {
delete state.websearch.activeSearches[requestId]
}
state.websearch.activeSearches[requestId] = status
}
}
})
@ -151,7 +170,10 @@ export const {
setSelectedMessageIds,
setActiveTopic,
setRenamingTopics,
setNewlyRenamedTopics
setNewlyRenamedTopics,
// WebSearch related actions
setActiveSearches,
setWebSearchStatus
} = runtimeSlice.actions
export default runtimeSlice.reducer

View File

@ -1,5 +1,5 @@
import { createSlice, PayloadAction } from '@reduxjs/toolkit'
import type { WebSearchProvider } from '@renderer/types'
import type { Model, WebSearchProvider } from '@renderer/types'
export interface SubscribeSource {
key: number
url: string
@ -7,6 +7,16 @@ export interface SubscribeSource {
blacklist?: string[] // 存储从该订阅源获取的黑名单
}
export interface CompressionConfig {
method: 'none' | 'cutoff' | 'rag'
cutoffLimit?: number
cutoffUnit?: 'char' | 'token'
embeddingModel?: Model
embeddingDimensions?: number // undefined表示自动获取
documentCount?: number // 每个搜索结果的文档数量(只是预期值)
rerankModel?: Model
}
export interface WebSearchState {
// 默认搜索提供商的ID
/** @deprecated 支持在快捷菜单中自选搜索供应商,所以这个不再适用 */
@ -24,12 +34,13 @@ export interface WebSearchState {
// 是否覆盖服务商搜索
/** @deprecated 支持在快捷菜单中自选搜索供应商,所以这个不再适用 */
overwrite: boolean
contentLimit?: number
// 搜索结果压缩
compressionConfig?: CompressionConfig
// 具体供应商的配置
providerConfig: Record<string, any>
}
const initialState: WebSearchState = {
export const initialState: WebSearchState = {
defaultProvider: 'local-bing',
providers: [
{
@ -78,6 +89,10 @@ const initialState: WebSearchState = {
excludeDomains: [],
subscribeSources: [],
overwrite: false,
compressionConfig: {
method: 'none',
cutoffUnit: 'char'
},
providerConfig: {}
}
@ -150,8 +165,14 @@ const websearchSlice = createSlice({
state.providers.push(action.payload)
}
},
setContentLimit: (state, action: PayloadAction<number | undefined>) => {
state.contentLimit = action.payload
setCompressionConfig: (state, action: PayloadAction<CompressionConfig>) => {
state.compressionConfig = action.payload
},
updateCompressionConfig: (state, action: PayloadAction<Partial<CompressionConfig>>) => {
state.compressionConfig = {
...state.compressionConfig,
...action.payload
} as CompressionConfig
},
setProviderConfig: (state, action: PayloadAction<Record<string, any>>) => {
state.providerConfig = action.payload
@ -176,7 +197,8 @@ export const {
setSubscribeSources,
setOverwrite,
addWebSearchProvider,
setContentLimit,
setCompressionConfig,
updateCompressionConfig,
setProviderConfig,
updateProviderConfig
} = websearchSlice.actions

View File

@ -500,7 +500,6 @@ export type WebSearchProvider = {
url?: string
basicAuthUsername?: string
basicAuthPassword?: string
contentLimit?: number
usingBrowser?: boolean
}
@ -542,6 +541,14 @@ export type WebSearchResponse = {
source: WebSearchSource
}
export type WebSearchPhase = 'default' | 'fetch_complete' | 'rag' | 'rag_complete' | 'rag_failed' | 'cutoff'
export type WebSearchStatus = {
phase: WebSearchPhase
countBefore?: number
countAfter?: number
}
export type KnowledgeReference = {
id: number
content: string

View File

@ -0,0 +1,226 @@
import { KnowledgeReference, WebSearchProviderResult } from '@renderer/types'
import { describe, expect, it } from 'vitest'
import { consolidateReferencesByUrl, selectReferences } from '../websearch'
describe('websearch', () => {
describe('consolidateReferencesByUrl', () => {
const createMockRawResult = (url: string, title: string): WebSearchProviderResult => ({
title,
url,
content: `Original content for ${title}`
})
const createMockReference = (sourceUrl: string, content: string, id: number = 1): KnowledgeReference => ({
id,
sourceUrl,
content,
type: 'url'
})
it('should consolidate single reference to matching raw result', () => {
// 基本功能:单个引用与原始结果匹配
const rawResults = [createMockRawResult('https://example.com', 'Example Title')]
const references = [createMockReference('https://example.com', 'Retrieved content')]
const result = consolidateReferencesByUrl(rawResults, references)
expect(result).toHaveLength(1)
expect(result[0]).toEqual({
title: 'Example Title',
url: 'https://example.com',
content: 'Retrieved content'
})
})
it('should consolidate multiple references from same source URL', () => {
// 多个片段合并到同一个URL
const rawResults = [createMockRawResult('https://example.com', 'Example Title')]
const references = [
createMockReference('https://example.com', 'First content', 1),
createMockReference('https://example.com', 'Second content', 2)
]
const result = consolidateReferencesByUrl(rawResults, references)
expect(result).toHaveLength(1)
expect(result[0]).toEqual({
title: 'Example Title',
url: 'https://example.com',
content: 'First content\n\n---\n\nSecond content'
})
})
it('should consolidate references from multiple source URLs', () => {
// 多个不同URL的引用
const rawResults = [
createMockRawResult('https://example.com', 'Example Title'),
createMockRawResult('https://test.com', 'Test Title')
]
const references = [
createMockReference('https://example.com', 'Example content', 1),
createMockReference('https://test.com', 'Test content', 2)
]
const result = consolidateReferencesByUrl(rawResults, references)
expect(result).toHaveLength(2)
// 结果顺序可能不确定,使用 toContainEqual
expect(result).toContainEqual({
title: 'Example Title',
url: 'https://example.com',
content: 'Example content'
})
expect(result).toContainEqual({
title: 'Test Title',
url: 'https://test.com',
content: 'Test content'
})
})
it('should use custom separator for multiple references', () => {
// 自定义分隔符
const rawResults = [createMockRawResult('https://example.com', 'Example Title')]
const references = [
createMockReference('https://example.com', 'First content', 1),
createMockReference('https://example.com', 'Second content', 2)
]
const result = consolidateReferencesByUrl(rawResults, references, ' | ')
expect(result).toHaveLength(1)
expect(result[0].content).toBe('First content | Second content')
})
it('should ignore references with no matching raw result', () => {
// 无匹配的引用
const rawResults = [createMockRawResult('https://example.com', 'Example Title')]
const references = [
createMockReference('https://example.com', 'Matching content', 1),
createMockReference('https://nonexistent.com', 'Non-matching content', 2)
]
const result = consolidateReferencesByUrl(rawResults, references)
expect(result).toHaveLength(1)
expect(result[0]).toEqual({
title: 'Example Title',
url: 'https://example.com',
content: 'Matching content'
})
})
it('should return empty array when no references match raw results', () => {
// 完全无匹配的情况
const rawResults = [createMockRawResult('https://example.com', 'Example Title')]
const references = [createMockReference('https://nonexistent.com', 'Non-matching content', 1)]
const result = consolidateReferencesByUrl(rawResults, references)
expect(result).toHaveLength(0)
})
it('should handle empty inputs', () => {
// 边界条件:空输入
expect(consolidateReferencesByUrl([], [])).toEqual([])
const rawResults = [createMockRawResult('https://example.com', 'Example Title')]
expect(consolidateReferencesByUrl(rawResults, [])).toEqual([])
const references = [createMockReference('https://example.com', 'Content', 1)]
expect(consolidateReferencesByUrl([], references)).toEqual([])
})
it('should preserve original result metadata', () => {
// 验证原始结果的元数据保持不变
const rawResults = [createMockRawResult('https://example.com', 'Complex Title with Special Characters & Symbols')]
const references = [createMockReference('https://example.com', 'New content', 1)]
const result = consolidateReferencesByUrl(rawResults, references)
expect(result[0].title).toBe('Complex Title with Special Characters & Symbols')
expect(result[0].url).toBe('https://example.com')
})
})
describe('selectReferences', () => {
const createMockRawResult = (url: string, title: string): WebSearchProviderResult => ({
title,
url,
content: `Original content for ${title}`
})
const createMockReference = (sourceUrl: string, content: string, id: number = 1): KnowledgeReference => ({
id,
sourceUrl,
content,
type: 'url'
})
it('should select references using round robin strategy', () => {
const rawResults = [
createMockRawResult('https://a.com', 'A'),
createMockRawResult('https://b.com', 'B'),
createMockRawResult('https://c.com', 'C')
]
const references = [
createMockReference('https://a.com', 'A1', 1),
createMockReference('https://a.com', 'A2', 2),
createMockReference('https://b.com', 'B1', 3),
createMockReference('https://c.com', 'C1', 4),
createMockReference('https://c.com', 'C2', 5)
]
const result = selectReferences(rawResults, references, 4)
expect(result).toHaveLength(4)
// 按照 rawResults 顺序轮询A1, B1, C1, A2
expect(result[0].content).toBe('A1')
expect(result[1].content).toBe('B1')
expect(result[2].content).toBe('C1')
expect(result[3].content).toBe('A2')
})
it('should handle maxRefs larger than available references', () => {
const rawResults = [createMockRawResult('https://a.com', 'A')]
const references = [createMockReference('https://a.com', 'A1', 1)]
const result = selectReferences(rawResults, references, 10)
expect(result).toHaveLength(1)
expect(result[0].content).toBe('A1')
})
it('should return empty array for edge cases', () => {
const rawResults = [createMockRawResult('https://a.com', 'A')]
const references = [createMockReference('https://a.com', 'A1', 1)]
// maxRefs is 0
expect(selectReferences(rawResults, references, 0)).toEqual([])
// empty references
expect(selectReferences(rawResults, [], 5)).toEqual([])
// no matching URLs
const nonMatchingRefs = [createMockReference('https://different.com', 'Content', 1)]
expect(selectReferences(rawResults, nonMatchingRefs, 5)).toEqual([])
})
it('should preserve rawResults order in round robin', () => {
// rawResults 的顺序应该影响轮询顺序
const rawResults = [
createMockRawResult('https://z.com', 'Z'), // 应该第一个被选择
createMockRawResult('https://a.com', 'A') // 应该第二个被选择
]
const references = [createMockReference('https://a.com', 'A1', 1), createMockReference('https://z.com', 'Z1', 2)]
const result = selectReferences(rawResults, references, 2)
expect(result).toHaveLength(2)
expect(result[0].content).toBe('Z1') // Z 先被选择
expect(result[1].content).toBe('A1') // A 后被选择
})
})
})

View File

@ -0,0 +1,116 @@
import { KnowledgeReference, WebSearchProviderResult } from '@renderer/types'
/**
* URL整合为搜索结果
*
*
* URL分组并合并为最终的搜索结果
*
* @param rawResults URL信息
* @param references
* @param separator 使 '\n\n---\n\n'
* @returns
*/
export function consolidateReferencesByUrl(
rawResults: WebSearchProviderResult[],
references: KnowledgeReference[],
separator: string = '\n\n---\n\n'
): WebSearchProviderResult[] {
// 创建URL到原始结果的映射用于快速查找
const urlToOriginalResult = new Map(rawResults.map((result) => [result.url, result]))
// 使用 reduce 进行分组和内容收集
const sourceGroups = references.reduce((groups, reference) => {
const originalResult = urlToOriginalResult.get(reference.sourceUrl)
if (!originalResult) return groups
const existing = groups.get(reference.sourceUrl)
if (existing) {
// 如果已存在该URL的分组直接添加内容
existing.contents.push(reference.content)
} else {
// 创建新的分组
groups.set(reference.sourceUrl, {
originalResult,
contents: [reference.content]
})
}
return groups
}, new Map<string, { originalResult: WebSearchProviderResult; contents: string[] }>())
// 转换为最终结果
return Array.from(sourceGroups.values(), (group) => ({
title: group.originalResult.title,
url: group.originalResult.url,
content: group.contents.join(separator)
}))
}
/**
* 使 Round Robin
*
*
* @param rawResults
* @param references
* @param maxRefs
* @returns Round Robin
*/
export function selectReferences(
rawResults: WebSearchProviderResult[],
references: KnowledgeReference[],
maxRefs: number
): KnowledgeReference[] {
if (maxRefs <= 0 || references.length === 0) {
return []
}
// 建立URL到索引的映射用于确定轮询顺序
const urlToIndex = new Map<string, number>()
rawResults.forEach((result, index) => {
urlToIndex.set(result.url, index)
})
// 按sourceUrl分组references每组内按原顺序保持已按分数排序
const groupsByUrl = new Map<string, KnowledgeReference[]>()
references.forEach((ref) => {
if (!groupsByUrl.has(ref.sourceUrl)) {
groupsByUrl.set(ref.sourceUrl, [])
}
groupsByUrl.get(ref.sourceUrl)!.push(ref)
})
// 获取有效的URL列表按rawResults顺序排序
const availableUrls = Array.from(groupsByUrl.keys())
.filter((url) => urlToIndex.has(url))
.sort((a, b) => urlToIndex.get(a)! - urlToIndex.get(b)!)
if (availableUrls.length === 0) {
return []
}
// Round Robin 选择
const selected: KnowledgeReference[] = []
let roundIndex = 0
while (selected.length < maxRefs && availableUrls.length > 0) {
const currentUrl = availableUrls[roundIndex]
const group = groupsByUrl.get(currentUrl)!
if (group.length > 0) {
selected.push(group.shift()!)
}
// 如果当前组为空从可用URL列表中移除
if (group.length === 0) {
availableUrls.splice(roundIndex, 1)
// 调整索引避免跳过下一个URL
if (roundIndex >= availableUrls.length) {
roundIndex = 0
}
} else {
roundIndex = (roundIndex + 1) % availableUrls.length
}
}
return selected
}

View File

@ -5745,7 +5745,7 @@ __metadata:
styled-components: "npm:^6.1.11"
tar: "npm:^7.4.3"
tiny-pinyin: "npm:^1.3.2"
tokenx: "npm:^0.4.1"
tokenx: "npm:^1.1.0"
turndown: "npm:7.2.0"
typescript: "npm:^5.6.2"
uuid: "npm:^10.0.0"
@ -17588,10 +17588,10 @@ __metadata:
languageName: node
linkType: hard
"tokenx@npm:^0.4.1":
version: 0.4.1
resolution: "tokenx@npm:0.4.1"
checksum: 10c0/377f4e3c31ff9dc57b5b6af0fb1ae821227dee5e1d87b92a3ab1a0ed25454f01185c709d73592002b0d3024de1c904c8f029c46ae1806677816e4659fb8c481e
"tokenx@npm:^1.1.0":
version: 1.1.0
resolution: "tokenx@npm:1.1.0"
checksum: 10c0/8214bce58b48e130bcf4a27ac1bb5abf486c395310fb0c8f54e31656acacf97da533372afb9e8ac8f7736e6c3f29af86ea9623d4875f1399e66a5203b80609db
languageName: node
linkType: hard