From b83fbc0acec44da14ab87385aa1779840470a17f Mon Sep 17 00:00:00 2001 From: Le Bao <77217928+TacKana@users.noreply.github.com> Date: Wed, 7 Jan 2026 17:23:17 +0800 Subject: [PATCH] fix(SearchService): Fix inability to retrieve search results from Bing, Baidu, and Google This commit fixes a bug where search results could not be retrieved from Bing, Baidu, and Google. The root cause of this issue was a discrepancy in page content when the Electron window was hidden versus when it was visible. Additionally, the previous use of `did-finish-load` caused page jitter within the window, leading to sporadic failures in fetching search content. To resolve this, I've enabled offscreen rendering, ensuring consistent page content regardless of window visibility. Furthermore, I've switched to using the `ready-to-show` event to ensure the complete page DOM is available before attempting to retrieve content, thereby eliminating the search bug. * feat(fetch): add request throttling (already present in the original, keeping it) Co-authored-by: suyao --- src/main/services/SearchService.ts | 10 +++-- .../src/utils/__tests__/fetch.test.ts | 41 +++++++++++++++++++ src/renderer/src/utils/fetch.ts | 39 ++++++++++++++++-- 3 files changed, 84 insertions(+), 6 deletions(-) diff --git a/src/main/services/SearchService.ts b/src/main/services/SearchService.ts index 6c69f80889..03a154dcd4 100644 --- a/src/main/services/SearchService.ts +++ b/src/main/services/SearchService.ts @@ -22,7 +22,8 @@ export class SearchService { webPreferences: { nodeIntegration: true, contextIsolation: false, - devTools: is.dev + devTools: is.dev, + offscreen: true // 启用离屏渲染 } }) @@ -68,7 +69,8 @@ export class SearchService { // Wait for the page to fully load before getting the content await new Promise((resolve) => { const loadTimeout = setTimeout(() => resolve(), 10000) // 10 second timeout - window.webContents.once('did-finish-load', () => { + window.once('ready-to-show', () => { + //让网页加载完成后执行,原来的.webContents.once('did-finish-load'会导致网页抖动 clearTimeout(loadTimeout) // Small delay to ensure JavaScript has executed setTimeout(resolve, 500) @@ -76,7 +78,9 @@ export class SearchService { }) // Get the page content after ensuring it's fully loaded - return await window.webContents.executeJavaScript('document.documentElement.outerHTML') + const executeJavaScript = await window.webContents.executeJavaScript('document.documentElement.outerHTML') + // logger.info(executeJavaScript) + return executeJavaScript } } diff --git a/src/renderer/src/utils/__tests__/fetch.test.ts b/src/renderer/src/utils/__tests__/fetch.test.ts index 6b36cb41f8..38f51fcb42 100644 --- a/src/renderer/src/utils/__tests__/fetch.test.ts +++ b/src/renderer/src/utils/__tests__/fetch.test.ts @@ -181,6 +181,47 @@ describe('fetch', () => { consoleSpy.mockRestore() }) + + it('should throttle requests to the same domain', async () => { + const fetchCallTimes: number[] = [] + vi.mocked(global.fetch).mockImplementation(async () => { + fetchCallTimes.push(Date.now()) + return createMockResponse() + }) + + // 3 URLs from the same domain + const urls = ['https://zhihu.com/a', 'https://zhihu.com/b', 'https://zhihu.com/c'] + await fetchWebContents(urls) + + expect(fetchCallTimes).toHaveLength(3) + // Verify that requests are spaced out (at least 400ms apart due to 500ms interval) + if (fetchCallTimes.length >= 2) { + const timeDiff1 = fetchCallTimes[1] - fetchCallTimes[0] + expect(timeDiff1).toBeGreaterThanOrEqual(400) + } + if (fetchCallTimes.length >= 3) { + const timeDiff2 = fetchCallTimes[2] - fetchCallTimes[1] + expect(timeDiff2).toBeGreaterThanOrEqual(400) + } + }) + + it('should allow parallel requests to different domains', async () => { + const fetchCallTimes: Map = new Map() + vi.mocked(global.fetch).mockImplementation(async (url) => { + fetchCallTimes.set(url as string, Date.now()) + return createMockResponse() + }) + + // URLs from different domains + const urls = ['https://zhihu.com/a', 'https://douban.com/b', 'https://github.com/c'] + await fetchWebContents(urls) + + expect(fetchCallTimes.size).toBe(3) + // Different domains should start nearly simultaneously (within 100ms) + const times = Array.from(fetchCallTimes.values()) + const maxDiff = Math.max(...times) - Math.min(...times) + expect(maxDiff).toBeLessThan(100) + }) }) describe('fetchRedirectUrl', () => { diff --git a/src/renderer/src/utils/fetch.ts b/src/renderer/src/utils/fetch.ts index 52c91c0896..c9da595cd9 100644 --- a/src/renderer/src/utils/fetch.ts +++ b/src/renderer/src/utils/fetch.ts @@ -4,6 +4,7 @@ import { nanoid } from '@reduxjs/toolkit' import type { WebSearchProviderResult } from '@renderer/types' import { createAbortPromise } from '@renderer/utils/abortController' import { isAbortError } from '@renderer/utils/error' +import PQueue from 'p-queue' import TurndownService from 'turndown' const logger = loggerService.withContext('Utils:fetch') @@ -13,6 +14,33 @@ export const noContent = 'No content found' type ResponseFormat = 'markdown' | 'html' | 'text' +// Domain queue management for throttling requests to the same domain +const domainQueues = new Map() +const DOMAIN_CONCURRENCY = 1 +const DOMAIN_INTERVAL = 500 // ms between requests to the same domain + +function getDomainQueue(domain: string): PQueue { + if (!domainQueues.has(domain)) { + domainQueues.set( + domain, + new PQueue({ + concurrency: DOMAIN_CONCURRENCY, + interval: DOMAIN_INTERVAL, + intervalCap: 1 + }) + ) + } + return domainQueues.get(domain)! +} + +function getDomain(url: string): string { + try { + return new URL(url).hostname + } catch { + return 'unknown' + } +} + /** * Validates if the string is a properly formatted URL */ @@ -31,10 +59,15 @@ export async function fetchWebContents( usingBrowser: boolean = false, httpOptions: RequestInit = {} ): Promise { - // parallel using fetchWebContent - const results = await Promise.allSettled(urls.map((url) => fetchWebContent(url, format, usingBrowser, httpOptions))) + const results = await Promise.allSettled( + urls.map((url) => { + const domain = getDomain(url) + const queue = getDomainQueue(domain) + return queue.add(() => fetchWebContent(url, format, usingBrowser, httpOptions), { throwOnTimeout: true }) + }) + ) return results.map((result, index) => { - if (result.status === 'fulfilled') { + if (result.status === 'fulfilled' && result.value) { return result.value } else { return {