mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-01-10 07:19:02 +08:00
Revert "fix(SearchService): Fix inability to retrieve search results from Bing, Baidu, and Google"
This reverts commit b83fbc0ace.
This commit is contained in:
parent
3ec6e1167f
commit
7766438853
@ -22,8 +22,7 @@ export class SearchService {
|
||||
webPreferences: {
|
||||
nodeIntegration: true,
|
||||
contextIsolation: false,
|
||||
devTools: is.dev,
|
||||
offscreen: true // 启用离屏渲染
|
||||
devTools: is.dev
|
||||
}
|
||||
})
|
||||
|
||||
@ -69,8 +68,7 @@ export class SearchService {
|
||||
// Wait for the page to fully load before getting the content
|
||||
await new Promise<void>((resolve) => {
|
||||
const loadTimeout = setTimeout(() => resolve(), 10000) // 10 second timeout
|
||||
window.once('ready-to-show', () => {
|
||||
//让网页加载完成后执行,原来的.webContents.once('did-finish-load'会导致网页抖动
|
||||
window.webContents.once('did-finish-load', () => {
|
||||
clearTimeout(loadTimeout)
|
||||
// Small delay to ensure JavaScript has executed
|
||||
setTimeout(resolve, 500)
|
||||
@ -78,9 +76,7 @@ export class SearchService {
|
||||
})
|
||||
|
||||
// Get the page content after ensuring it's fully loaded
|
||||
const executeJavaScript = await window.webContents.executeJavaScript('document.documentElement.outerHTML')
|
||||
// logger.info(executeJavaScript)
|
||||
return executeJavaScript
|
||||
return await window.webContents.executeJavaScript('document.documentElement.outerHTML')
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -181,47 +181,6 @@ describe('fetch', () => {
|
||||
|
||||
consoleSpy.mockRestore()
|
||||
})
|
||||
|
||||
it('should throttle requests to the same domain', async () => {
|
||||
const fetchCallTimes: number[] = []
|
||||
vi.mocked(global.fetch).mockImplementation(async () => {
|
||||
fetchCallTimes.push(Date.now())
|
||||
return createMockResponse()
|
||||
})
|
||||
|
||||
// 3 URLs from the same domain
|
||||
const urls = ['https://zhihu.com/a', 'https://zhihu.com/b', 'https://zhihu.com/c']
|
||||
await fetchWebContents(urls)
|
||||
|
||||
expect(fetchCallTimes).toHaveLength(3)
|
||||
// Verify that requests are spaced out (at least 400ms apart due to 500ms interval)
|
||||
if (fetchCallTimes.length >= 2) {
|
||||
const timeDiff1 = fetchCallTimes[1] - fetchCallTimes[0]
|
||||
expect(timeDiff1).toBeGreaterThanOrEqual(400)
|
||||
}
|
||||
if (fetchCallTimes.length >= 3) {
|
||||
const timeDiff2 = fetchCallTimes[2] - fetchCallTimes[1]
|
||||
expect(timeDiff2).toBeGreaterThanOrEqual(400)
|
||||
}
|
||||
})
|
||||
|
||||
it('should allow parallel requests to different domains', async () => {
|
||||
const fetchCallTimes: Map<string, number> = new Map()
|
||||
vi.mocked(global.fetch).mockImplementation(async (url) => {
|
||||
fetchCallTimes.set(url as string, Date.now())
|
||||
return createMockResponse()
|
||||
})
|
||||
|
||||
// URLs from different domains
|
||||
const urls = ['https://zhihu.com/a', 'https://douban.com/b', 'https://github.com/c']
|
||||
await fetchWebContents(urls)
|
||||
|
||||
expect(fetchCallTimes.size).toBe(3)
|
||||
// Different domains should start nearly simultaneously (within 100ms)
|
||||
const times = Array.from(fetchCallTimes.values())
|
||||
const maxDiff = Math.max(...times) - Math.min(...times)
|
||||
expect(maxDiff).toBeLessThan(100)
|
||||
})
|
||||
})
|
||||
|
||||
describe('fetchRedirectUrl', () => {
|
||||
|
||||
@ -4,7 +4,6 @@ import { nanoid } from '@reduxjs/toolkit'
|
||||
import type { WebSearchProviderResult } from '@renderer/types'
|
||||
import { createAbortPromise } from '@renderer/utils/abortController'
|
||||
import { isAbortError } from '@renderer/utils/error'
|
||||
import PQueue from 'p-queue'
|
||||
import TurndownService from 'turndown'
|
||||
|
||||
const logger = loggerService.withContext('Utils:fetch')
|
||||
@ -14,33 +13,6 @@ export const noContent = 'No content found'
|
||||
|
||||
type ResponseFormat = 'markdown' | 'html' | 'text'
|
||||
|
||||
// Domain queue management for throttling requests to the same domain
|
||||
const domainQueues = new Map<string, PQueue>()
|
||||
const DOMAIN_CONCURRENCY = 1
|
||||
const DOMAIN_INTERVAL = 500 // ms between requests to the same domain
|
||||
|
||||
function getDomainQueue(domain: string): PQueue {
|
||||
if (!domainQueues.has(domain)) {
|
||||
domainQueues.set(
|
||||
domain,
|
||||
new PQueue({
|
||||
concurrency: DOMAIN_CONCURRENCY,
|
||||
interval: DOMAIN_INTERVAL,
|
||||
intervalCap: 1
|
||||
})
|
||||
)
|
||||
}
|
||||
return domainQueues.get(domain)!
|
||||
}
|
||||
|
||||
function getDomain(url: string): string {
|
||||
try {
|
||||
return new URL(url).hostname
|
||||
} catch {
|
||||
return 'unknown'
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates if the string is a properly formatted URL
|
||||
*/
|
||||
@ -59,15 +31,10 @@ export async function fetchWebContents(
|
||||
usingBrowser: boolean = false,
|
||||
httpOptions: RequestInit = {}
|
||||
): Promise<WebSearchProviderResult[]> {
|
||||
const results = await Promise.allSettled(
|
||||
urls.map((url) => {
|
||||
const domain = getDomain(url)
|
||||
const queue = getDomainQueue(domain)
|
||||
return queue.add(() => fetchWebContent(url, format, usingBrowser, httpOptions), { throwOnTimeout: true })
|
||||
})
|
||||
)
|
||||
// parallel using fetchWebContent
|
||||
const results = await Promise.allSettled(urls.map((url) => fetchWebContent(url, format, usingBrowser, httpOptions)))
|
||||
return results.map((result, index) => {
|
||||
if (result.status === 'fulfilled' && result.value) {
|
||||
if (result.status === 'fulfilled') {
|
||||
return result.value
|
||||
} else {
|
||||
return {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user