升级了下筛选

2025-12-25 11:20:07 +08:00 · 2025-04-24 01:34:35 +08:00 · 2025-04-24 01:34:35 +08:00 · 05d110e4af
commit 05d110e4af
parent d33e16fa81
4 changed files with 438 additions and 126 deletions
--- a/src/renderer/src/components/WebSearchInitializer.tsx
+++ b/src/renderer/src/components/WebSearchInitializer.tsx
@ -22,9 +22,9 @@ const WebSearchInitializer = () => {
      const deepSearchProvider: WebSearchProvider = {
        id: 'deep-search',
        name: 'DeepSearch',
-        description: '多引擎深度搜索',
        usingBrowser: true,
-        contentLimit: 10000
+        contentLimit: 10000,
+        description: '多引擎深度搜索'
      }
      dispatch(addWebSearchProvider(deepSearchProvider))
    }
--- a/src/renderer/src/providers/WebSearchProvider/DeepSearchProvider.ts
+++ b/src/renderer/src/providers/WebSearchProvider/DeepSearchProvider.ts
@ -25,6 +25,41 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {
    }
  ]

+  // 定义URL过滤规则
+  private urlFilters = {
+    // 排除的域名
+    excludedDomains: [
+      'login', 'signin', 'signup', 'register', 'account',
+      'download', 'shop', 'store', 'buy', 'cart', 'checkout',
+      'ads', 'advertisement', 'sponsor', 'tracking',
+      'facebook.com', 'twitter.com', 'instagram.com', 'pinterest.com',
+      'youtube.com/channel', 'youtube.com/user', 'youtube.com/c/',
+      'tiktok.com', 'douyin.com', 'weibo.com', 'zhihu.com/question',
+      'baike.baidu.com', 'wiki.com', 'wikipedia.org/wiki/Help:',
+      'wikipedia.org/wiki/Wikipedia:', 'wikipedia.org/wiki/Template:',
+      'wikipedia.org/wiki/File:', 'wikipedia.org/wiki/Category:',
+      'amazon.com/s', 'amazon.cn/s', 'taobao.com/search', 'jd.com/search',
+      'tmall.com/search', 'ebay.com/sch', 'aliexpress.com/wholesale'
+    ],
+    // 优先的域名（相关性更高）
+    priorityDomains: [
+      'github.com/augment', 'augmentcode.com', 'augment.dev',
+      'github.com', 'stackoverflow.com', 'dev.to', 'medium.com',
+      'docs.github.com', 'npmjs.com', 'pypi.org', 'microsoft.com/en-us/learn',
+      'developer.mozilla.org', 'w3schools.com', 'reactjs.org', 'vuejs.org',
+      'angular.io', 'tensorflow.org', 'pytorch.org', 'kubernetes.io',
+      'docker.com', 'aws.amazon.com/documentation', 'cloud.google.com/docs',
+      'azure.microsoft.com/en-us/documentation'
+    ],
+    // 排除的文件类型
+    excludedFileTypes: [
+      '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.webp',
+      '.mp3', '.mp4', '.avi', '.mov', '.wmv', '.flv', '.wav', '.ogg',
+      '.zip', '.rar', '.7z', '.tar', '.gz', '.exe', '.dmg', '.apk',
+      '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx'
+    ]
+  }
+
  // 分析模型配置
  private analyzeConfig = {
    enabled: true, // 是否启用预分析
@ -198,56 +233,246 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {
    const batchSize = this.analyzeConfig.batchSize
    const analyzedResults: AnalyzedResult[] = [...results] // 复制原始结果

-    // 简单的分析逻辑：提取前几句作为摘要
+    // 预处理查询，提取重要关键词
+    const queryWords = query.toLowerCase().split(/\s+/)
+
+    // 过滤掉常见的停用词
+    const stopWords = new Set([
+      'a', 'an', 'the', 'and', 'or', 'but', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
+      'in', 'on', 'at', 'to', 'for', 'with', 'by', 'about', 'against', 'between', 'into', 'through',
+      'during', 'before', 'after', 'above', 'below', 'from', 'up', 'down', 'of', 'off', 'over', 'under',
+      'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any',
+      'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own',
+      'same', 'so', 'than', 'too', 'very', 'can', 'will', 'just', 'should', 'now'
+    ])
+
+    // 提取重要关键词，并为每个词分配权重
+    const keywordWeights = new Map<string, number>()
+    queryWords.forEach((word, index) => {
+      if (word.length > 2 && !stopWords.has(word)) {
+        // 根据词的位置分配权重，前面的词权重更高
+        const positionWeight = 1 - (index / queryWords.length) * 0.5 // 位置权重范围：0.5-1.0
+        // 根据词的长度分配权重，更长的词权重更高
+        const lengthWeight = Math.min(1, word.length / 10) // 长度权重最高为1
+        // 组合权重
+        const weight = positionWeight * 0.7 + lengthWeight * 0.3
+        keywordWeights.set(word, weight)
+      }
+    })
+
+    // 如果没有提取到关键词，使用原始查询词
+    if (keywordWeights.size === 0) {
+      queryWords.forEach(word => {
+        if (word.length > 2) {
+          keywordWeights.set(word, 1.0)
+        }
+      })
+    }
+
+    // 分析每个结果
    for (let i = 0; i < results.length; i++) {
      const result = results[i]
      if (result.content === noContent) continue

      try {
-        // 提取摘要（简单实现，取前300个字符）
+        // 提取摘要（改进实现，尝试找到包含关键词的最相关段落）
        const maxLength = this.analyzeConfig.maxSummaryLength
-        let summary = result.content.replace(/\n+/g, ' ').replace(/\s+/g, ' ').trim()
+        const content = result.content.replace(/\n+/g, ' ').replace(/\s+/g, ' ').trim()

-        if (summary.length > maxLength) {
-          // 截取到最后一个完整的句子
-          summary = summary.substring(0, maxLength)
+        // 将内容分成句子
+        const sentences = content.split(/(?<=[.!?])\s+/)
+
+        // 为每个句子评分
+        const sentenceScores = sentences.map(sentence => {
+          const sentenceLower = sentence.toLowerCase()
+          let score = 0
+
+          // 根据句子中包含的关键词计算分数
+          for (const [keyword, weight] of keywordWeights.entries()) {
+            if (sentenceLower.includes(keyword)) {
+              score += weight
+
+              // 如果关键词在句子开头，给予额外加分
+              if (sentenceLower.indexOf(keyword) < 20) {
+                score += weight * 0.5
+              }
+            }
+          }
+
+          return { sentence, score }
+        })
+
+        // 按分数排序句子
+        sentenceScores.sort((a, b) => b.score - a.score)
+
+        // 选择最相关的句子作为摘要
+        let summary = ''
+        let currentLength = 0
+
+        // 首先添加得分最高的句子
+        if (sentenceScores.length > 0 && sentenceScores[0].score > 0) {
+          summary = sentenceScores[0].sentence
+          currentLength = summary.length
+        }
+
+        // 如果还有空间，添加更多相关句子
+        for (let j = 1; j < sentenceScores.length && currentLength < maxLength; j++) {
+          if (sentenceScores[j].score > 0) {
+            const nextSentence = sentenceScores[j].sentence
+            if (currentLength + nextSentence.length + 1 <= maxLength) {
+              summary += ' ' + nextSentence
+              currentLength += nextSentence.length + 1
+            }
+          }
+        }
+
+        // 如果没有找到相关句子，回退到简单摘要提取
+        if (summary.length === 0) {
+          summary = content.substring(0, maxLength)
          const lastPeriod = summary.lastIndexOf('.')
          if (lastPeriod > maxLength * 0.7) {
-            // 至少要有总长度的70%
            summary = summary.substring(0, lastPeriod + 1)
          }
          summary += '...'
+        } else if (summary.length < content.length) {
+          summary += '...'
        }

-        // 提取关键词（简单实现，基于查询词拆分）
-        const keywords = query
-          .split(/\s+/)
-          .filter((word) => word.length > 2 && result.content.toLowerCase().includes(word.toLowerCase()))
+        // 提取关键词（改进实现）
+        const contentLower = result.content.toLowerCase()
+        const keywordScores = new Map<string, number>()

-        // 计算相关性评分（简单实现，基于关键词出现频率）
-        let relevanceScore = 0
-        if (keywords.length > 0) {
-          const contentLower = result.content.toLowerCase()
-          for (const word of keywords) {
-            const wordLower = word.toLowerCase()
-            // 计算关键词出现的次数
-            let count = 0
-            let pos = contentLower.indexOf(wordLower)
-            while (pos !== -1) {
-              count++
-              pos = contentLower.indexOf(wordLower, pos + 1)
-            }
-            relevanceScore += count
+        // 从内容中提取潜在关键词
+        const contentWords = contentLower.split(/\W+/).filter(word =>
+          word.length > 3 && !stopWords.has(word)
+        )
+
+        // 计算词频
+        const wordFrequency = new Map<string, number>()
+        contentWords.forEach(word => {
+          wordFrequency.set(word, (wordFrequency.get(word) || 0) + 1)
+        })
+
+        // 为每个词评分
+        for (const [word, freq] of wordFrequency.entries()) {
+          // 基础分数是词频
+          let score = freq
+
+          // 如果是查询关键词，增加分数
+          if (keywordWeights.has(word)) {
+            score += freq * keywordWeights.get(word)! * 3
          }
-          // 标准化评分，范围为0-1
-          relevanceScore = Math.min(1, relevanceScore / (contentLower.length / 100))
+
+          // 如果在标题中出现，增加分数
+          if (result.title.toLowerCase().includes(word)) {
+            score += 5
+          }
+
+          keywordScores.set(word, score)
        }

+        // 添加查询关键词（如果内容中包含）
+        for (const [keyword, weight] of keywordWeights.entries()) {
+          if (contentLower.includes(keyword) && !keywordScores.has(keyword)) {
+            keywordScores.set(keyword, weight * 3)
+          }
+        }
+
+        // 选择得分最高的关键词
+        const sortedKeywords = Array.from(keywordScores.entries())
+          .sort((a, b) => b[1] - a[1])
+          .slice(0, 5)
+          .map(entry => entry[0])
+
+        // 计算相关性评分（改进实现）
+        let relevanceScore = 0
+
+        // 1. 基于关键词匹配度的评分
+        let keywordMatchScore = 0
+        let keywordCount = 0
+
+        for (const [keyword, weight] of keywordWeights.entries()) {
+          // 计算关键词出现的次数
+          let count = 0
+          let pos = contentLower.indexOf(keyword)
+          while (pos !== -1) {
+            count++
+            pos = contentLower.indexOf(keyword, pos + 1)
+          }
+
+          if (count > 0) {
+            keywordCount++
+            // 权重 * 出现次数 * 归一化因子
+            keywordMatchScore += weight * Math.min(10, count) / 10
+          }
+        }
+
+        // 归一化关键词匹配分数
+        if (keywordWeights.size > 0) {
+          keywordMatchScore = keywordMatchScore / keywordWeights.size
+        }
+
+        // 2. 基于标题相关性的评分
+        let titleScore = 0
+        const titleLower = result.title.toLowerCase()
+
+        for (const [keyword, weight] of keywordWeights.entries()) {
+          if (titleLower.includes(keyword)) {
+            titleScore += weight
+          }
+        }
+
+        // 归一化标题分数
+        if (keywordWeights.size > 0) {
+          titleScore = titleScore / keywordWeights.size
+        }
+
+        // 3. 基于内容长度的评分（适中长度最佳）
+        const contentLength = result.content.length
+        const lengthScore = Math.min(1, contentLength / 2000) * (contentLength < 50000 ? 1 : 0.5)
+
+        // 4. 基于URL的评分（官方网站、知名网站加分）
+        let urlScore = 0
+        const url = result.url.toLowerCase()
+
+        // 首先检查是否有预先计算的优先级分数
+        if (result.meta && result.meta.priorityScore) {
+          // 使用预先计算的分数
+          urlScore = result.meta.priorityScore
+        } else {
+          // 如果没有预先计算的分数，使用基于域名的评分
+          // 检查是否是官方网站或知名网站
+          if (url.includes('github.com/augment') ||
+              url.includes('augmentcode.com') ||
+              url.includes('augment.dev')) {
+            urlScore = 1.0  // 官方网站最高分
+          } else if (url.includes('github.com') ||
+                    url.includes('stackoverflow.com') ||
+                    url.includes('medium.com') ||
+                    url.includes('dev.to')) {
+            urlScore = 0.8  // 知名技术网站高分
+          } else if (!url.includes('login') &&
+                    !url.includes('signup') &&
+                    !url.includes('register')) {
+            urlScore = 0.5  // 普通网站中等分
+          }
+        }
+
+        // 组合所有评分因素，调整权重以提高URL质量的重要性
+        relevanceScore =
+          keywordMatchScore * 0.4 + // 关键词匹配度占40%
+          titleScore * 0.3 +        // 标题相关性占30%
+          lengthScore * 0.05 +      // 内容长度占5%
+          urlScore * 0.25           // URL质量占25%，增加了权重
+
+        // 确保分数在0-1范围内
+        relevanceScore = Math.min(1, Math.max(0, relevanceScore))
+
        // 更新分析结果
        analyzedResults[i] = {
          ...analyzedResults[i],
          summary,
-          keywords,
+          keywords: sortedKeywords,
          relevanceScore
        }

@ -267,16 +492,22 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {
      return scoreB - scoreA
    })

-    console.log(`[DeepSearch] 完成分析 ${results.length} 个结果`)
-    return analyzedResults
+    // 过滤掉明显不相关的结果，提高阈值以只保留更相关的结果
+    const filteredResults = analyzedResults.filter(result => {
+      const score = (result as AnalyzedResult).relevanceScore || 0
+      return score > 0.2 // 提高阈值到 0.2，只保留相关性分数较高的结果
+    })
+
+    console.log(`[DeepSearch] 完成分析 ${results.length} 个结果，过滤后剩余 ${filteredResults.length} 个结果`)
+    return filteredResults
  }

  /**
   * 解析搜索结果页面中的URL
   * 默认实现，子类可以覆盖此方法以适应不同的搜索引擎
   */
-  protected parseValidUrls(htmlContent: string): Array<{ title: string; url: string }> {
-    const results: Array<{ title: string; url: string }> = []
+  protected parseValidUrls(htmlContent: string): Array<{ title: string; url: string; meta?: Record<string, any> }> {
+    const results: Array<{ title: string; url: string; meta?: Record<string, any> }> = []

    try {
      // 通用解析逻辑，查找所有链接
@ -288,7 +519,11 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {
        ...doc.querySelectorAll('#content_left .result h3 a'),
        ...doc.querySelectorAll('#content_left .c-container h3 a'),
        ...doc.querySelectorAll('#content_left .c-container a.c-title'),
-        ...doc.querySelectorAll('#content_left a[data-click]')
+        ...doc.querySelectorAll('#content_left a[data-click]'),
+        // 添加更多选择器来适应百度的变化
+        ...doc.querySelectorAll('#content_left .result-op a[href*="http"]'),
+        ...doc.querySelectorAll('#content_left .c-container .t a'),
+        ...doc.querySelectorAll('#content_left .c-container .c-title-text')
      ]

      // 尝试解析Bing搜索结果 - 使用多个选择器来获取更多结果
@ -296,7 +531,11 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {
        ...doc.querySelectorAll('.b_algo h2 a'),
        ...doc.querySelectorAll('.b_algo a.tilk'),
        ...doc.querySelectorAll('.b_algo a.b_title'),
-        ...doc.querySelectorAll('.b_results a.b_restorLink')
+        ...doc.querySelectorAll('.b_results a.b_restorLink'),
+        // 添加更多选择器来适应Bing的变化
+        ...doc.querySelectorAll('.b_algo .b_caption a'),
+        ...doc.querySelectorAll('.b_algo .b_attribution cite'),
+        ...doc.querySelectorAll('.b_algo .b_deep a')
      ]

      // 尝试解析DuckDuckGo搜索结果 - 使用多个选择器来获取更多结果
@ -368,19 +607,17 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {
            const url = (link as HTMLAnchorElement).href
            const title = link.textContent || url

-            // 过滤掉搜索引擎内部链接和重复链接
-            if (
-              url &&
-              (url.startsWith('http') || url.startsWith('https')) &&
-              !url.includes('google.com/search') &&
-              !url.includes('bing.com/search') &&
-              !url.includes('baidu.com/s?') &&
-              !uniqueUrls.has(url)
-            ) {
+            // 使用过滤方法检查URL
+            if (url && !this.shouldFilterUrl(url) && !uniqueUrls.has(url)) {
+              // 计算URL优先级分数
+              const priorityScore = this.getUrlPriorityScore(url)
+
              uniqueUrls.add(url)
              results.push({
                title: title.trim() || url,
-                url: url
+                url: url,
+                // 将优先级分数作为元数据保存
+                meta: { priorityScore }
              })
            }
          } catch (error) {
@ -399,19 +636,17 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {
            const url = (link as HTMLAnchorElement).href
            const title = link.textContent || url

-            // 过滤掉搜索引擎内部链接和重复链接
-            if (
-              url &&
-              (url.startsWith('http') || url.startsWith('https')) &&
-              !url.includes('google.com/search') &&
-              !url.includes('bing.com/search') &&
-              !url.includes('baidu.com/s?') &&
-              !uniqueUrls.has(url)
-            ) {
+            // 使用过滤方法检查URL
+            if (url && !this.shouldFilterUrl(url) && !uniqueUrls.has(url)) {
+              // 计算URL优先级分数
+              const priorityScore = this.getUrlPriorityScore(url)
+
              uniqueUrls.add(url)
              results.push({
                title: title.trim() || url,
-                url: url
+                url: url,
+                // 将优先级分数作为元数据保存
+                meta: { priorityScore }
              })
            }
          } catch (error) {
@ -430,21 +665,17 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {
            const url = (link as HTMLAnchorElement).href
            const title = link.textContent || url

-            // 过滤掉搜索引擎内部链接和重复链接
-            if (
-              url &&
-              (url.startsWith('http') || url.startsWith('https')) &&
-              !url.includes('google.com/search') &&
-              !url.includes('bing.com/search') &&
-              !url.includes('baidu.com/s?') &&
-              !url.includes('sogou.com/web') &&
-              !url.includes('duckduckgo.com/?q=') &&
-              !uniqueUrls.has(url)
-            ) {
+            // 使用过滤方法检查URL
+            if (url && !this.shouldFilterUrl(url) && !uniqueUrls.has(url)) {
+              // 计算URL优先级分数
+              const priorityScore = this.getUrlPriorityScore(url)
+
              uniqueUrls.add(url)
              results.push({
                title: title.trim() || url,
-                url: url
+                url: url,
+                // 将优先级分数作为元数据保存
+                meta: { priorityScore }
              })
            }
          } catch (error) {
@ -465,21 +696,25 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {
              const url = (link as HTMLAnchorElement).href
              const title = link.textContent || url

-              // 更宽松的过滤条件
+              // 使用过滤方法检查URL，但使用更宽松的条件
              if (
                url &&
                (url.startsWith('http') || url.startsWith('https')) &&
-                !url.includes('sogou.com/web') &&
+                !url.includes('sogou.com/web') && // 仍然过滤掉搜索引擎内部链接
                !url.includes('javascript:') &&
                !url.includes('mailto:') &&
                !url.includes('tel:') &&
                !uniqueUrls.has(url) &&
                title.trim().length > 0
              ) {
+                // 计算URL优先级分数
+                const priorityScore = this.getUrlPriorityScore(url)
+
                uniqueUrls.add(url)
                results.push({
                  title: title.trim() || url,
-                  url: url
+                  url: url,
+                  meta: { priorityScore }
                })
              }
            } catch (error) {
@ -501,22 +736,17 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {
            const url = (link as HTMLAnchorElement).href
            const title = link.textContent || url

-            // 过滤掉搜索引擎内部链接和重复链接
-            if (
-              url &&
-              (url.startsWith('http') || url.startsWith('https')) &&
-              !url.includes('google.com/search') &&
-              !url.includes('bing.com/search') &&
-              !url.includes('baidu.com/s?') &&
-              !url.includes('sogou.com/web') &&
-              !url.includes('duckduckgo.com/?q=') &&
-              !url.includes('searx.tiekoetter.com/search') &&
-              !uniqueUrls.has(url)
-            ) {
+            // 使用过滤方法检查URL
+            if (url && !this.shouldFilterUrl(url) && !uniqueUrls.has(url)) {
+              // 计算URL优先级分数
+              const priorityScore = this.getUrlPriorityScore(url)
+
              uniqueUrls.add(url)
              results.push({
                title: title.trim() || url,
-                url: url
+                url: url,
+                // 将优先级分数作为元数据保存
+                meta: { priorityScore }
              })
            }
          } catch (error) {
@ -547,10 +777,14 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {
                !uniqueUrls.has(url) &&
                title.trim().length > 0
              ) {
+                // 计算URL优先级分数
+                const priorityScore = this.getUrlPriorityScore(url)
+
                uniqueUrls.add(url)
                results.push({
                  title: title.trim() || url,
-                  url: url
+                  url: url,
+                  meta: { priorityScore }
                })
              }
            } catch (error) {
@ -600,10 +834,14 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {
                !uniqueUrls.has(url) &&
                title.trim().length > 0
              ) {
+                // 计算URL优先级分数
+                const priorityScore = this.getUrlPriorityScore(url)
+
                uniqueUrls.add(url)
                results.push({
                  title: title.trim() || url,
-                  url: url
+                  url: url,
+                  meta: { priorityScore }
                })
              }
            } catch (error) {
@ -617,20 +855,17 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {
              const url = (link as HTMLAnchorElement).href
              const title = link.textContent || url

-              // 过滤掉搜索引擎内部链接和重复链接
-              if (
-                url &&
-                (url.startsWith('http') || url.startsWith('https')) &&
-                !url.includes('google.com/search') &&
-                !url.includes('bing.com/search') &&
-                !url.includes('baidu.com/s?') &&
-                !url.includes('duckduckgo.com/?q=') &&
-                !uniqueUrls.has(url)
-              ) {
+              // 使用过滤方法检查URL
+              if (url && !this.shouldFilterUrl(url) && !uniqueUrls.has(url)) {
+                // 计算URL优先级分数
+                const priorityScore = this.getUrlPriorityScore(url)
+
                uniqueUrls.add(url)
                results.push({
                  title: title.trim() || url,
-                  url: url
+                  url: url,
+                  // 将优先级分数作为元数据保存
+                  meta: { priorityScore }
                })
              }
            } catch (error) {
@ -661,10 +896,14 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {
              !url.includes('searx.tiekoetter.com/search') &&
              !uniqueUrls.has(url)
            ) {
+              // 计算URL优先级分数
+              const priorityScore = this.getUrlPriorityScore(url)
+
              uniqueUrls.add(url)
              results.push({
                title: url,
-                url: url
+                url: url,
+                meta: { priorityScore }
              })
            }
          }
@ -684,31 +923,17 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {
            const url = (link as HTMLAnchorElement).href
            const title = link.textContent || url

-            // 过滤掉无效链接和搜索引擎内部链接
-            if (
-              url &&
-              (url.startsWith('http') || url.startsWith('https')) &&
-              !url.includes('google.com/search') &&
-              !url.includes('bing.com/search') &&
-              !url.includes('baidu.com/s?') &&
-              !url.includes('duckduckgo.com/?q=') &&
-              !url.includes('sogou.com/web') &&
-              !url.includes('searx.tiekoetter.com/search') &&
-              !uniqueUrls.has(url) &&
-              // 过滤掉常见的无用链接
-              !url.includes('javascript:') &&
-              !url.includes('mailto:') &&
-              !url.includes('tel:') &&
-              !url.includes('login') &&
-              !url.includes('register') &&
-              !url.includes('signup') &&
-              !url.includes('signin') &&
-              title.trim().length > 0
-            ) {
+            // 使用过滤方法检查URL
+            if (url && !this.shouldFilterUrl(url) && !uniqueUrls.has(url) && title.trim().length > 0) {
+              // 计算URL优先级分数
+              const priorityScore = this.getUrlPriorityScore(url)
+
              uniqueUrls.add(url)
              results.push({
                title: title.trim(),
-                url: url
+                url: url,
+                // 将优先级分数作为元数据保存
+                meta: { priorityScore }
              })
            }
          } catch (error) {
@ -725,6 +950,81 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {
    return results
  }

+  /**
+   * 检查URL是否应该被过滤掉
+   * @param url 要检查的URL
+   * @returns 如果应该过滤掉返回true，否则返回false
+   */
+  private shouldFilterUrl(url: string): boolean {
+    if (!url || !url.startsWith('http')) {
+      return true
+    }
+
+    const urlLower = url.toLowerCase()
+
+    // 检查是否是搜索引擎内部链接
+    if (
+      urlLower.includes('google.com/search') ||
+      urlLower.includes('bing.com/search') ||
+      urlLower.includes('baidu.com/s?') ||
+      urlLower.includes('sogou.com/web') ||
+      urlLower.includes('duckduckgo.com/?q=') ||
+      urlLower.includes('searx.tiekoetter.com/search')
+    ) {
+      return true
+    }
+
+    // 检查是否包含排除的域名
+    for (const domain of this.urlFilters.excludedDomains) {
+      if (urlLower.includes(domain)) {
+        return true
+      }
+    }
+
+    // 检查是否是排除的文件类型
+    for (const fileType of this.urlFilters.excludedFileTypes) {
+      if (urlLower.endsWith(fileType)) {
+        return true
+      }
+    }
+
+    // 检查是否是常见的无用链接
+    if (
+      urlLower.includes('javascript:') ||
+      urlLower.includes('mailto:') ||
+      urlLower.includes('tel:') ||
+      urlLower.includes('about:') ||
+      urlLower.includes('chrome:') ||
+      urlLower.includes('file:')
+    ) {
+      return true
+    }
+
+    return false
+  }
+
+  /**
+   * 计算URL的优先级分数
+   * @param url 要计算的URL
+   * @returns 优先级分数，范围从0到1，越高越优先
+   */
+  private getUrlPriorityScore(url: string): number {
+    if (!url) return 0
+
+    const urlLower = url.toLowerCase()
+
+    // 检查是否是优先域名
+    for (let i = 0; i < this.urlFilters.priorityDomains.length; i++) {
+      const domain = this.urlFilters.priorityDomains[i]
+      if (urlLower.includes(domain)) {
+        // 根据域名在数组中的位置计算分数，前面的域名分数更高
+        return 1 - (i / this.urlFilters.priorityDomains.length) * 0.5
+      }
+    }
+
+    return 0
+  }
+
  /**
   * 深度抓取内容
   * 不仅抓取搜索结果页面，还会抓取页面中的链接
@ -848,8 +1148,13 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {

    while ((match = markdownLinkRegex.exec(markdown)) !== null) {
      const url = match[2]
-      if (url && (url.startsWith('http') || url.startsWith('https'))) {
-        urls.add(url)
+      if (url && (url.startsWith('http') || url.startsWith('https')) && !this.shouldFilterUrl(url)) {
+        // 计算URL优先级分数
+        const priorityScore = this.getUrlPriorityScore(url)
+        // 优先级较高的URL更有可能被添加
+        if (priorityScore > 0.3 || Math.random() < 0.7) {
+          urls.add(url)
+        }
      }
    }

@ -857,8 +1162,13 @@ export default class DeepSearchProvider extends BaseWebSearchProvider {
    const urlRegex = /(https?:\/\/[^\s]+)/g
    while ((match = urlRegex.exec(markdown)) !== null) {
      const url = match[1]
-      if (url) {
-        urls.add(url)
+      if (url && !this.shouldFilterUrl(url)) {
+        // 计算URL优先级分数
+        const priorityScore = this.getUrlPriorityScore(url)
+        // 优先级较高的URL更有可能被添加
+        if (priorityScore > 0.3 || Math.random() < 0.5) {
+          urls.add(url)
+        }
      }
    }

--- a/src/renderer/src/store/websearch.ts
+++ b/src/renderer/src/store/websearch.ts
@ -46,9 +46,9 @@ const initialState: WebSearchState = {
    {
      id: 'deep-search',
      name: 'DeepSearch',
-      description: '多引擎深度搜索',
      usingBrowser: true,
-      contentLimit: 10000
+      contentLimit: 10000,
+      description: '多引擎深度搜索'
    },
    {
      id: 'local-google',
--- a/src/renderer/src/types/index.ts
+++ b/src/renderer/src/types/index.ts
@ -351,6 +351,7 @@ export type WebSearchProvider = {
  basicAuthPassword?: string
  contentLimit?: number
  usingBrowser?: boolean
+  description?: string
 }

 export type WebSearchResponse = {
@ -363,6 +364,7 @@ export type WebSearchResult = {
  content: string
  url: string
  source?: string
+  meta?: Record<string, any> // 添加meta字段用于存储元数据
 }

 export type KnowledgeReference = {