fix(SearchService): Fix inability to retrieve search results from Bing, Baidu, and Google

This commit fixes a bug where search results could not be retrieved from Bing, Baidu, and Google.
The root cause of this issue was a discrepancy in page content when the Electron window was hidden versus when it was visible. Additionally, the previous use of `did-finish-load` caused page jitter within the window, leading to sporadic failures in fetching search content.
To resolve this, I've enabled offscreen rendering, ensuring consistent page content regardless of window visibility. Furthermore, I've switched to using the `ready-to-show` event to ensure the complete page DOM is available before attempting to retrieve content, thereby eliminating the search bug.
* feat(fetch): add request throttling (already present in the original, keeping it)
Co-authored-by: suyao <sy20010504@gmail.com>
This commit is contained in:
Le Bao 2026-01-07 17:23:17 +08:00 committed by GitHub
parent 040f4daa98
commit b83fbc0ace
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 84 additions and 6 deletions

View File

@ -22,7 +22,8 @@ export class SearchService {
webPreferences: {
nodeIntegration: true,
contextIsolation: false,
devTools: is.dev
devTools: is.dev,
offscreen: true // 启用离屏渲染
}
})
@ -68,7 +69,8 @@ export class SearchService {
// Wait for the page to fully load before getting the content
await new Promise<void>((resolve) => {
const loadTimeout = setTimeout(() => resolve(), 10000) // 10 second timeout
window.webContents.once('did-finish-load', () => {
window.once('ready-to-show', () => {
//让网页加载完成后执行,原来的.webContents.once('did-finish-load'会导致网页抖动
clearTimeout(loadTimeout)
// Small delay to ensure JavaScript has executed
setTimeout(resolve, 500)
@ -76,7 +78,9 @@ export class SearchService {
})
// Get the page content after ensuring it's fully loaded
return await window.webContents.executeJavaScript('document.documentElement.outerHTML')
const executeJavaScript = await window.webContents.executeJavaScript('document.documentElement.outerHTML')
// logger.info(executeJavaScript)
return executeJavaScript
}
}

View File

@ -181,6 +181,47 @@ describe('fetch', () => {
consoleSpy.mockRestore()
})
it('should throttle requests to the same domain', async () => {
const fetchCallTimes: number[] = []
vi.mocked(global.fetch).mockImplementation(async () => {
fetchCallTimes.push(Date.now())
return createMockResponse()
})
// 3 URLs from the same domain
const urls = ['https://zhihu.com/a', 'https://zhihu.com/b', 'https://zhihu.com/c']
await fetchWebContents(urls)
expect(fetchCallTimes).toHaveLength(3)
// Verify that requests are spaced out (at least 400ms apart due to 500ms interval)
if (fetchCallTimes.length >= 2) {
const timeDiff1 = fetchCallTimes[1] - fetchCallTimes[0]
expect(timeDiff1).toBeGreaterThanOrEqual(400)
}
if (fetchCallTimes.length >= 3) {
const timeDiff2 = fetchCallTimes[2] - fetchCallTimes[1]
expect(timeDiff2).toBeGreaterThanOrEqual(400)
}
})
it('should allow parallel requests to different domains', async () => {
const fetchCallTimes: Map<string, number> = new Map()
vi.mocked(global.fetch).mockImplementation(async (url) => {
fetchCallTimes.set(url as string, Date.now())
return createMockResponse()
})
// URLs from different domains
const urls = ['https://zhihu.com/a', 'https://douban.com/b', 'https://github.com/c']
await fetchWebContents(urls)
expect(fetchCallTimes.size).toBe(3)
// Different domains should start nearly simultaneously (within 100ms)
const times = Array.from(fetchCallTimes.values())
const maxDiff = Math.max(...times) - Math.min(...times)
expect(maxDiff).toBeLessThan(100)
})
})
describe('fetchRedirectUrl', () => {

View File

@ -4,6 +4,7 @@ import { nanoid } from '@reduxjs/toolkit'
import type { WebSearchProviderResult } from '@renderer/types'
import { createAbortPromise } from '@renderer/utils/abortController'
import { isAbortError } from '@renderer/utils/error'
import PQueue from 'p-queue'
import TurndownService from 'turndown'
const logger = loggerService.withContext('Utils:fetch')
@ -13,6 +14,33 @@ export const noContent = 'No content found'
type ResponseFormat = 'markdown' | 'html' | 'text'
// Domain queue management for throttling requests to the same domain
const domainQueues = new Map<string, PQueue>()
const DOMAIN_CONCURRENCY = 1
const DOMAIN_INTERVAL = 500 // ms between requests to the same domain
function getDomainQueue(domain: string): PQueue {
if (!domainQueues.has(domain)) {
domainQueues.set(
domain,
new PQueue({
concurrency: DOMAIN_CONCURRENCY,
interval: DOMAIN_INTERVAL,
intervalCap: 1
})
)
}
return domainQueues.get(domain)!
}
function getDomain(url: string): string {
try {
return new URL(url).hostname
} catch {
return 'unknown'
}
}
/**
* Validates if the string is a properly formatted URL
*/
@ -31,10 +59,15 @@ export async function fetchWebContents(
usingBrowser: boolean = false,
httpOptions: RequestInit = {}
): Promise<WebSearchProviderResult[]> {
// parallel using fetchWebContent
const results = await Promise.allSettled(urls.map((url) => fetchWebContent(url, format, usingBrowser, httpOptions)))
const results = await Promise.allSettled(
urls.map((url) => {
const domain = getDomain(url)
const queue = getDomainQueue(domain)
return queue.add(() => fetchWebContent(url, format, usingBrowser, httpOptions), { throwOnTimeout: true })
})
)
return results.map((result, index) => {
if (result.status === 'fulfilled') {
if (result.status === 'fulfilled' && result.value) {
return result.value
} else {
return {