feat: add Perplexity SDK integration (#10137)

* feat: add Perplexity SDK integration

* feat: enhance AiSdkToChunkAdapter with web search capabilities

- Added support for web search in AiSdkToChunkAdapter, allowing for dynamic link conversion based on provider type.
- Updated constructor to accept provider type and web search enablement flag.
- Improved link handling logic to buffer and process incomplete links.
- Enhanced message block handling in the store to accommodate new message structure.
- Updated middleware configuration to include web search option.

* fix

* fix

* chore: remove unuseful code

* fix: ci

* chore: log
This commit is contained in:
SuYao 2025-09-13 00:06:18 +08:00 committed by GitHub
parent f5d8974d04
commit 80afb3a86e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 143 additions and 460 deletions

View File

@ -97,6 +97,7 @@
"@ai-sdk/amazon-bedrock": "^3.0.0",
"@ai-sdk/google-vertex": "^3.0.25",
"@ai-sdk/mistral": "^2.0.0",
"@ai-sdk/perplexity": "^2.0.8",
"@ant-design/v5-patch-for-react-19": "^1.0.3",
"@anthropic-ai/sdk": "^0.41.0",
"@anthropic-ai/vertex-sdk": "patch:@anthropic-ai/vertex-sdk@npm%3A0.11.4#~/.yarn/patches/@anthropic-ai-vertex-sdk-npm-0.11.4-c19cb41edb.patch",

View File

@ -4,8 +4,9 @@
*/
import { loggerService } from '@logger'
import { MCPTool, WebSearchResults, WebSearchSource } from '@renderer/types'
import { AISDKWebSearchResult, MCPTool, WebSearchResults, WebSearchSource } from '@renderer/types'
import { Chunk, ChunkType } from '@renderer/types/chunk'
import { convertLinks, flushLinkConverterBuffer } from '@renderer/utils/linkConverter'
import type { TextStreamPart, ToolSet } from 'ai'
import { ToolCallChunkHandler } from './handleToolCallChunk'
@ -29,13 +30,18 @@ export interface CherryStudioChunk {
export class AiSdkToChunkAdapter {
toolCallHandler: ToolCallChunkHandler
private accumulate: boolean | undefined
private isFirstChunk = true
private enableWebSearch: boolean = false
constructor(
private onChunk: (chunk: Chunk) => void,
mcpTools: MCPTool[] = [],
accumulate?: boolean
accumulate?: boolean,
enableWebSearch?: boolean
) {
this.toolCallHandler = new ToolCallChunkHandler(onChunk, mcpTools)
this.accumulate = accumulate
this.enableWebSearch = enableWebSearch || false
}
/**
@ -65,11 +71,24 @@ export class AiSdkToChunkAdapter {
webSearchResults: [],
reasoningId: ''
}
// Reset link converter state at the start of stream
this.isFirstChunk = true
try {
while (true) {
const { done, value } = await reader.read()
if (done) {
// Flush any remaining content from link converter buffer if web search is enabled
if (this.enableWebSearch) {
const remainingText = flushLinkConverterBuffer()
if (remainingText) {
this.onChunk({
type: ChunkType.TEXT_DELTA,
text: remainingText
})
}
}
break
}
@ -87,7 +106,7 @@ export class AiSdkToChunkAdapter {
*/
private convertAndEmitChunk(
chunk: TextStreamPart<any>,
final: { text: string; reasoningContent: string; webSearchResults: any[]; reasoningId: string }
final: { text: string; reasoningContent: string; webSearchResults: AISDKWebSearchResult[]; reasoningId: string }
) {
logger.silly(`AI SDK chunk type: ${chunk.type}`, chunk)
switch (chunk.type) {
@ -97,17 +116,44 @@ export class AiSdkToChunkAdapter {
type: ChunkType.TEXT_START
})
break
case 'text-delta':
if (this.accumulate) {
final.text += chunk.text || ''
case 'text-delta': {
const processedText = chunk.text || ''
let finalText: string
// Only apply link conversion if web search is enabled
if (this.enableWebSearch) {
const result = convertLinks(processedText, this.isFirstChunk)
if (this.isFirstChunk) {
this.isFirstChunk = false
}
// Handle buffered content
if (result.hasBufferedContent) {
finalText = result.text
} else {
finalText = result.text || processedText
}
} else {
final.text = chunk.text || ''
// Without web search, just use the original text
finalText = processedText
}
if (this.accumulate) {
final.text += finalText
} else {
final.text = finalText
}
// Only emit chunk if there's text to send
if (finalText) {
this.onChunk({
type: ChunkType.TEXT_DELTA,
text: this.accumulate ? final.text : finalText
})
}
this.onChunk({
type: ChunkType.TEXT_DELTA,
text: final.text || ''
})
break
}
case 'text-end':
this.onChunk({
type: ChunkType.TEXT_COMPLETE,
@ -200,7 +246,7 @@ export class AiSdkToChunkAdapter {
[WebSearchSource.ANTHROPIC]: WebSearchSource.ANTHROPIC,
[WebSearchSource.OPENROUTER]: WebSearchSource.OPENROUTER,
[WebSearchSource.GEMINI]: WebSearchSource.GEMINI,
[WebSearchSource.PERPLEXITY]: WebSearchSource.PERPLEXITY,
// [WebSearchSource.PERPLEXITY]: WebSearchSource.PERPLEXITY,
[WebSearchSource.QWEN]: WebSearchSource.QWEN,
[WebSearchSource.HUNYUAN]: WebSearchSource.HUNYUAN,
[WebSearchSource.ZHIPU]: WebSearchSource.ZHIPU,
@ -268,18 +314,9 @@ export class AiSdkToChunkAdapter {
// === 源和文件相关事件 ===
case 'source':
if (chunk.sourceType === 'url') {
// if (final.webSearchResults.length === 0) {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const { sourceType: _, ...rest } = chunk
final.webSearchResults.push(rest)
// }
// this.onChunk({
// type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
// llm_web_search: {
// source: WebSearchSource.AISDK,
// results: final.webSearchResults
// }
// })
}
break
case 'file':

View File

@ -284,7 +284,7 @@ export default class ModernAiProvider {
// 创建带有中间件的执行器
if (config.onChunk) {
const accumulate = this.model!.supported_text_delta !== false // true and undefined
const adapter = new AiSdkToChunkAdapter(config.onChunk, config.mcpTools, accumulate)
const adapter = new AiSdkToChunkAdapter(config.onChunk, config.mcpTools, accumulate, config.enableWebSearch)
const streamResult = await executor.streamText({
...params,

View File

@ -1,6 +1,6 @@
import { loggerService } from '@logger'
import { ChunkType } from '@renderer/types/chunk'
import { flushLinkConverterBuffer, smartLinkConverter } from '@renderer/utils/linkConverter'
import { convertLinks, flushLinkConverterBuffer } from '@renderer/utils/linkConverter'
import { CompletionsParams, CompletionsResult, GenericChunk } from '../schemas'
import { CompletionsContext, CompletionsMiddleware } from '../types'
@ -28,8 +28,6 @@ export const WebSearchMiddleware: CompletionsMiddleware =
}
// 调用下游中间件
const result = await next(ctx, params)
const model = params.assistant?.model!
let isFirstChunk = true
// 响应后处理记录Web搜索事件
@ -42,15 +40,9 @@ export const WebSearchMiddleware: CompletionsMiddleware =
new TransformStream<GenericChunk, GenericChunk>({
transform(chunk: GenericChunk, controller) {
if (chunk.type === ChunkType.TEXT_DELTA) {
const providerType = model.provider || 'openai'
// 使用当前可用的Web搜索结果进行链接转换
const text = chunk.text
const result = smartLinkConverter(
text,
providerType,
isFirstChunk,
ctx._internal.webSearchState!.results
)
const result = convertLinks(text, isFirstChunk)
if (isFirstChunk) {
isFirstChunk = false
}

View File

@ -20,6 +20,7 @@ export interface AiSdkMiddlewareConfig {
isSupportedToolUse: boolean
// image generation endpoint
isImageGenerationEndpoint: boolean
// 是否开启内置搜索
enableWebSearch: boolean
enableGenerateImage: boolean
enableUrlContext: boolean

View File

@ -39,6 +39,14 @@ export const NEW_PROVIDER_CONFIGS: ProviderConfig[] = [
creatorFunctionName: 'createAmazonBedrock',
supportsImageGeneration: true,
aliases: ['aws-bedrock']
},
{
id: 'perplexity',
name: 'Perplexity',
import: () => import('@ai-sdk/perplexity'),
creatorFunctionName: 'createPerplexity',
supportsImageGeneration: false,
aliases: ['perplexity']
}
] as const

View File

@ -1,16 +1,19 @@
import { WebSearchResultBlock } from '@anthropic-ai/sdk/resources'
import type { GroundingMetadata } from '@google/genai'
import { createEntityAdapter, createSelector, createSlice, type PayloadAction } from '@reduxjs/toolkit'
import { Citation, WebSearchProviderResponse, WebSearchSource } from '@renderer/types'
import { AISDKWebSearchResult, Citation, WebSearchProviderResponse, WebSearchSource } from '@renderer/types'
import type { CitationMessageBlock, MessageBlock } from '@renderer/types/newMessage'
import { MessageBlockType } from '@renderer/types/newMessage'
import type OpenAI from 'openai'
import type { RootState } from './index' // 确认 RootState 从 store/index.ts 导出
// Create a simplified type for the entity adapter to avoid circular type issues
type MessageBlockEntity = MessageBlock
// 1. 创建实体适配器 (Entity Adapter)
// 我们使用块的 `id` 作为唯一标识符。
const messageBlocksAdapter = createEntityAdapter<MessageBlock>()
const messageBlocksAdapter = createEntityAdapter<MessageBlockEntity>()
// 2. 使用适配器定义初始状态 (Initial State)
// 如果需要,可以在规范化实体的旁边添加其他状态属性。
@ -20,6 +23,7 @@ const initialState = messageBlocksAdapter.getInitialState({
})
// 3. 创建 Slice
// @ts-ignore ignore
export const messageBlocksSlice = createSlice({
name: 'messageBlocks',
initialState,
@ -76,8 +80,13 @@ export const messageBlocksSelectors = messageBlocksAdapter.getSelectors<RootStat
// --- Selector Integration --- START
// Selector to get the raw block entity by ID
const selectBlockEntityById = (state: RootState, blockId: string | undefined) =>
blockId ? messageBlocksSelectors.selectById(state, blockId) : undefined // Use adapter selector
const selectBlockEntityById = (state: RootState, blockId: string | undefined): MessageBlock | undefined => {
const entity = blockId ? messageBlocksSelectors.selectById(state, blockId) : undefined
if (!entity) return undefined
// Convert back to full MessageBlock type
return entity
}
// --- Centralized Citation Formatting Logic ---
export const formatCitationsFromBlock = (block: CitationMessageBlock | undefined): Citation[] => {
@ -173,13 +182,16 @@ export const formatCitationsFromBlock = (block: CitationMessageBlock | undefined
case WebSearchSource.GROK:
case WebSearchSource.OPENROUTER:
formattedCitations =
(block.response.results as any[])?.map((url, index) => {
(block.response.results as AISDKWebSearchResult[])?.map((result, index) => {
const url = result.url
try {
const hostname = new URL(url).hostname
const hostname = new URL(result.url).hostname
const content = result.providerMetadata && result.providerMetadata['openrouter']?.content
return {
number: index + 1,
url,
hostname,
title: result.title || hostname,
content: content as string,
showFavicon: true,
type: 'websearch'
}
@ -218,10 +230,12 @@ export const formatCitationsFromBlock = (block: CitationMessageBlock | undefined
break
case WebSearchSource.AISDK:
formattedCitations =
(block.response.results as any[])?.map((result, index) => ({
(block.response.results && (block.response.results as AISDKWebSearchResult[]))?.map((result, index) => ({
number: index + 1,
url: result.url,
title: result.title,
title: result.title || new URL(result.url).hostname,
showFavicon: true,
type: 'websearch',
providerMetadata: result?.providerMetadata
})) || []
break

View File

@ -19,6 +19,7 @@ import {
resetAssistantMessage
} from '@renderer/utils/messageUtils/create'
import { getTopicQueue, waitForTopicQueue } from '@renderer/utils/queue'
import { defaultAppHeaders } from '@shared/utils'
import { t } from 'i18next'
import { isEmpty, throttle } from 'lodash'
import { LRUCache } from 'lru-cache'
@ -369,7 +370,8 @@ const fetchAndProcessAssistantResponseImpl = async (
topicId,
options: {
signal: abortController.signal,
timeout: 30000
timeout: 30000,
headers: defaultAppHeaders()
}
},
streamProcessorCallbacks
@ -1073,7 +1075,7 @@ export const cloneMessagesToNewTopicThunk =
const oldBlock = state.messageBlocks.entities[oldBlockId]
if (oldBlock) {
const newBlockId = uuid()
const newBlock: MessageBlock = {
const newBlock = {
...oldBlock,
id: newBlockId,
messageId: newMsgId // Link block to the NEW message ID

View File

@ -1,3 +1,4 @@
import type { LanguageModelV2Source } from '@ai-sdk/provider'
import type { WebSearchResultBlock } from '@anthropic-ai/sdk/resources'
import type { GenerateImagesConfig, GroundingMetadata, PersonGeneration } from '@google/genai'
import type OpenAI from 'openai'
@ -726,12 +727,15 @@ export type WebSearchProviderResponse = {
results: WebSearchProviderResult[]
}
export type AISDKWebSearchResult = Omit<Extract<LanguageModelV2Source, { sourceType: 'url' }>, 'sourceType'>
export type WebSearchResults =
| WebSearchProviderResponse
| GroundingMetadata
| OpenAI.Chat.Completions.ChatCompletionMessage.Annotation.URLCitation[]
| OpenAI.Responses.ResponseOutputText.URLCitation[]
| WebSearchResultBlock[]
| AISDKWebSearchResult[]
| any[]
export enum WebSearchSource {

View File

@ -3,91 +3,12 @@ import { describe, expect, it } from 'vitest'
import {
cleanLinkCommas,
completeLinks,
completionPerplexityLinks,
convertLinks,
convertLinksToHunyuan,
convertLinksToOpenRouter,
convertLinksToZhipu,
extractUrlsFromMarkdown,
flushLinkConverterBuffer
} from '../linkConverter'
describe('linkConverter', () => {
describe('convertLinksToZhipu', () => {
it('should correctly convert complete [ref_N] format', () => {
const input = '这里有一个参考文献 [ref_1] 和另一个 [ref_2]'
const result = convertLinksToZhipu(input, true)
expect(result).toBe('这里有一个参考文献 [<sup>1</sup>]() 和另一个 [<sup>2</sup>]()')
})
it('should handle chunked input and preserve incomplete link patterns', () => {
// 第一个块包含未完成的模式
const chunk1 = '这是第一部分 [ref'
const result1 = convertLinksToZhipu(chunk1, true)
expect(result1).toBe('这是第一部分 ')
// 第二个块完成该模式
const chunk2 = '_1] 这是剩下的部分'
const result2 = convertLinksToZhipu(chunk2, false)
expect(result2).toBe('[<sup>1</sup>]() 这是剩下的部分')
})
it('should clear buffer when resetting counter', () => {
// 先进行一次转换不重置
const input1 = '第一次输入 [ref_1]'
convertLinksToZhipu(input1, false)
// 然后重置并进行新的转换
const input2 = '新的输入 [ref_2]'
const result = convertLinksToZhipu(input2, true)
expect(result).toBe('新的输入 [<sup>2</sup>]()')
})
})
describe('convertLinksToHunyuan', () => {
it('should correctly convert [N](@ref) format to links with URLs', () => {
const webSearch = [{ url: 'https://example.com/1' }, { url: 'https://example.com/2' }]
const input = '这里有单个引用 [1](@ref) 和多个引用 [2](@ref)'
const result = convertLinksToHunyuan(input, webSearch, true)
expect(result).toBe(
'这里有单个引用 [<sup>1</sup>](https://example.com/1) 和多个引用 [<sup>2</sup>](https://example.com/2)'
)
})
it('should correctly handle comma-separated multiple references', () => {
const webSearch = [
{ url: 'https://example.com/1' },
{ url: 'https://example.com/2' },
{ url: 'https://example.com/3' }
]
const input = '这里有多个引用 [1, 2, 3](@ref)'
const result = convertLinksToHunyuan(input, webSearch, true)
expect(result).toBe(
'这里有多个引用 [<sup>1</sup>](https://example.com/1)[<sup>2</sup>](https://example.com/2)[<sup>3</sup>](https://example.com/3)'
)
})
it('should handle non-existent reference indices', () => {
const webSearch = [{ url: 'https://example.com/1' }]
const input = '这里有一个超出范围的引用 [2](@ref)'
const result = convertLinksToHunyuan(input, webSearch, true)
expect(result).toBe('这里有一个超出范围的引用 [<sup>2</sup>](@ref)')
})
it('should handle incomplete reference formats in chunked input', () => {
const webSearch = [{ url: 'https://example.com/1' }]
// 第一个块包含未完成的模式
const chunk1 = '这是第一部分 ['
const result1 = convertLinksToHunyuan(chunk1, webSearch, true)
expect(result1).toBe('这是第一部分 ')
// 第二个块完成该模式
const chunk2 = '1](@ref) 这是剩下的部分'
const result2 = convertLinksToHunyuan(chunk2, webSearch, false)
expect(result2).toBe('[<sup>1</sup>](https://example.com/1) 这是剩下的部分')
})
})
describe('convertLinks', () => {
it('should convert number links to numbered links', () => {
const input = '参考 [1](https://example.com/1) 和 [2](https://example.com/2)'
@ -226,8 +147,10 @@ describe('linkConverter', () => {
it('should handle real links split across small chunks with proper buffering', () => {
// 模拟真实链接被分割成小chunks的情况 - 更现实的分割方式
const chunks = [
'Please visit [example.com](', // 不完整链接
'https://example.com) for details' // 完成链接
'Please visit [example.',
'com](', // 不完整链接'
'https://exa',
'mple.com) for details' // 完成链接'
]
let accumulatedText = ''
@ -235,14 +158,24 @@ describe('linkConverter', () => {
// 第一个chunk包含不完整链接 [text](
const result1 = convertLinks(chunks[0], true)
expect(result1.text).toBe('Please visit ') // 只返回安全部分
expect(result1.hasBufferedContent).toBe(true) // [example.com]( 被缓冲
expect(result1.hasBufferedContent).toBe(true) //
accumulatedText += result1.text
// 第二个chunk:完成链接
// 第二个chunk
const result2 = convertLinks(chunks[1], false)
expect(result2.text).toBe('[<sup>1</sup>](https://example.com) for details') // 完整链接 + 剩余文本
expect(result2.hasBufferedContent).toBe(false)
accumulatedText += result2.text
expect(result2.text).toBe('')
expect(result2.hasBufferedContent).toBe(true)
// 第三个chunk
const result3 = convertLinks(chunks[2], false)
expect(result3.text).toBe('')
expect(result3.hasBufferedContent).toBe(true)
accumulatedText += result3.text
// 第四个chunk
const result4 = convertLinks(chunks[3], false)
expect(result4.text).toBe('[<sup>1</sup>](https://example.com) for details')
expect(result4.hasBufferedContent).toBe(false)
accumulatedText += result4.text
// 验证最终结果
expect(accumulatedText).toBe('Please visit [<sup>1</sup>](https://example.com) for details')
@ -293,32 +226,6 @@ describe('linkConverter', () => {
})
})
describe('convertLinksToOpenRouter', () => {
it('should only convert links with domain-like text', () => {
const input = '网站 [example.com](https://example.com) 和 [点击这里](https://other.com)'
const result = convertLinksToOpenRouter(input, true)
expect(result).toBe('网站 [<sup>1</sup>](https://example.com) 和 [点击这里](https://other.com)')
})
it('should use the same counter for duplicate URLs', () => {
const input = '两个相同的链接 [example.com](https://example.com) 和 [example.org](https://example.com)'
const result = convertLinksToOpenRouter(input, true)
expect(result).toBe('两个相同的链接 [<sup>1</sup>](https://example.com) 和 [<sup>1</sup>](https://example.com)')
})
it('should handle incomplete links in chunked input', () => {
// 第一个块包含未完成的链接
const chunk1 = '这是域名链接 ['
const result1 = convertLinksToOpenRouter(chunk1, true)
expect(result1).toBe('这是域名链接 ')
// 第二个块完成链接
const chunk2 = 'example.com](https://example.com)'
const result2 = convertLinksToOpenRouter(chunk2, false)
expect(result2).toBe('[<sup>1</sup>](https://example.com)')
})
})
describe('completeLinks', () => {
it('should complete empty links with webSearch data', () => {
const webSearch = [{ link: 'https://example.com/1' }, { link: 'https://example.com/2' }]
@ -383,13 +290,4 @@ describe('linkConverter', () => {
expect(result).toBe('[链接1](https://example.com)[链接2](https://other.com)')
})
})
describe('completionPerplexityLinks', () => {
it('should complete links with webSearch data', () => {
const webSearch = [{ url: 'https://example.com/1' }, { url: 'https://example.com/2' }]
const input = '参考 [1] 和 [2]'
const result = completionPerplexityLinks(input, webSearch)
expect(result).toBe('参考 [1](https://example.com/1) 和 [2](https://example.com/2)')
})
})
})

View File

@ -1,5 +1,3 @@
import { WebSearchResponse, WebSearchSource } from '@renderer/types'
// Counter for numbering links
let linkCounter = 1
// Buffer to hold incomplete link fragments across chunks
@ -17,109 +15,6 @@ function isHost(text: string): boolean {
return /^(https?:\/\/)?[\w.-]+\.[a-z]{2,}(\/.*)?$/i.test(text) || /^[\w.-]+\.[a-z]{2,}(\/.*)?$/i.test(text)
}
/**
* Converts Markdown links in the text to numbered links based on the rules:s
* [ref_N] -> [<sup>N</sup>]
* @param {string} text The current chunk of text to process
* @param {boolean} resetCounter Whether to reset the counter and buffer
* @returns {string} Processed text with complete links converted
*/
export function convertLinksToZhipu(text: string, resetCounter: boolean = false): string {
if (resetCounter) {
linkCounter = 1
buffer = ''
}
// Append the new text to the buffer
buffer += text
let safePoint = buffer.length
// Check from the end for potentially incomplete [ref_N] patterns
for (let i = buffer.length - 1; i >= 0; i--) {
if (buffer[i] === '[') {
const substring = buffer.substring(i)
// Check if it's a complete [ref_N] pattern
const match = /^\[ref_\d+\]/.exec(substring)
if (!match) {
// Potentially incomplete [ref_N] pattern
safePoint = i
break
}
}
}
// Process the safe part of the buffer
const safeBuffer = buffer.substring(0, safePoint)
buffer = buffer.substring(safePoint)
// Replace all complete [ref_N] patterns
return safeBuffer.replace(/\[ref_(\d+)\]/g, (_, num) => {
return `[<sup>${num}</sup>]()`
})
}
/**
* Converts Markdown links in the text to numbered links based on the rules:
* [N](@ref) -> [<sup>N</sup>]()
* [N,M,...](@ref) -> [<sup>N</sup>]() [<sup>M</sup>]() ...
* @param {string} text The current chunk of text to process
* @param {any[]} webSearch webSearch results
* @param {boolean} resetCounter Whether to reset the counter and buffer
* @returns {string} Processed text with complete links converted
*/
export function convertLinksToHunyuan(text: string, webSearch: any[], resetCounter: boolean = false): string {
if (resetCounter) {
linkCounter = 1
buffer = ''
}
buffer += text
let safePoint = buffer.length
// Check from the end for potentially incomplete patterns
for (let i = buffer.length - 1; i >= 0; i--) {
if (buffer[i] === '[') {
const substring = buffer.substring(i)
// Check if it's a complete pattern - handles both [N](@ref) and [N,M,...](@ref)
const match = /^\[[\d,\s]+\]\(@ref\)/.exec(substring)
if (!match) {
// Potentially incomplete pattern
safePoint = i
break
}
}
}
// Process the safe part of the buffer
const safeBuffer = buffer.substring(0, safePoint)
buffer = buffer.substring(safePoint)
// Replace all complete patterns
return safeBuffer.replace(/\[([\d,\s]+)\]\(@ref\)/g, (_, numbers) => {
// Split the numbers string into individual numbers
const numArray = numbers
.split(',')
.map((num) => parseInt(num.trim()))
.filter((num) => !isNaN(num))
// Generate separate superscript links for each number
const links = numArray.map((num) => {
const index = num - 1
// Check if the index is valid in webSearch array
if (index >= 0 && index < webSearch.length && webSearch[index]?.url) {
return `[<sup>${num}</sup>](${webSearch[index].url})`
}
// If no matching URL found, keep the original reference format for this number
return `[<sup>${num}</sup>](@ref)`
})
// Join the separate links with spaces
return links.join('')
})
}
/**
* Converts Markdown links in the text to numbered links based on the rules:
* 1. ([host](url)) -> [cnt](url)
@ -171,13 +66,21 @@ export function convertLinks(
break
}
// 检查是否是完整的链接但需要验证
// 检查是否是完整的链接
const completeLink = /^\[([^\]]+)\]\(([^)]+)\)/.test(substring)
if (completeLink) {
// 如果是完整链接继续处理不设置safePoint
continue
}
// 检查是否是不完整的 [ 开始但还没有闭合的 ]
// 例如 [example. 这种情况
const incompleteBracket = /^\[[^\]]*$/.test(substring)
if (incompleteBracket) {
safePoint = i
break
}
// 如果不是潜在的链接格式,继续检查
}
}
@ -263,65 +166,6 @@ export function convertLinks(
}
}
/**
* Converts Markdown links in the text to numbered links based on the rules:
* 1. [host](url) -> [cnt](url)
*
* @param {string} text The current chunk of text to process
* @param {boolean} resetCounter Whether to reset the counter and buffer
* @returns {string} Processed text with complete links converted
*/
export function convertLinksToOpenRouter(text: string, resetCounter = false): string {
if (resetCounter) {
linkCounter = 1
buffer = ''
urlToCounterMap = new Map<string, number>()
}
// Append the new text to the buffer
buffer += text
// Find a safe point to process
let safePoint = buffer.length
// Check for potentially incomplete link patterns from the end
for (let i = buffer.length - 1; i >= 0; i--) {
if (buffer[i] === '[') {
const substring = buffer.substring(i)
const match = /^\[([^\]]+)\]\(([^)]+)\)/.exec(substring)
if (!match) {
safePoint = i
break
}
}
}
// Extract the part of the buffer that we can safely process
const safeBuffer = buffer.substring(0, safePoint)
buffer = buffer.substring(safePoint)
// Process the safe buffer to handle complete links
const result = safeBuffer.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, text, url) => {
// Only convert link if the text looks like a host/URL
if (isHost(text)) {
// Check if this URL has been seen before
let counter: number
if (urlToCounterMap.has(url)) {
counter = urlToCounterMap.get(url)!
} else {
counter = linkCounter++
urlToCounterMap.set(url, counter)
}
return `[<sup>${counter}</sup>](${url})`
}
// Keep original link format if the text doesn't look like a host
return match
})
return result
}
/**
* webSearch结果补全链接[<sup>num</sup>]()[<sup>num</sup>](webSearch[num-1].url)
* @param {string} text
@ -341,25 +185,6 @@ export function completeLinks(text: string, webSearch: any[]): string {
})
}
/**
* webSearch结果补全链接[num][num](webSearch[num-1].url)
* @param {string} text
* @param {any[]} webSearch webSearch结果
* @returns {string}
*/
export function completionPerplexityLinks(text: string, webSearch: any[]): string {
return text.replace(/\[(\d+)\]/g, (match, numStr) => {
const num = parseInt(numStr)
const index = num - 1
// 检查 webSearch 数组中是否存在对应的 URL
if (index >= 0 && index < webSearch.length && webSearch[index].url) {
return `[${num}](${webSearch[index].url})`
}
// 如果没有找到对应的 URL保持原样
return match
})
}
/**
* Markdown文本中提取所有URL
*
@ -412,118 +237,6 @@ export function cleanLinkCommas(text: string): string {
return text.replace(/\]\(([^)]+)\)\s*,\s*\[/g, ']($1)[')
}
/**
* Web搜索引用占位符
* [1], [ref_1], [1](@ref), [1,2,3](@ref)
* @param {string} text
* @returns {Array}
*/
export function extractWebSearchReferences(text: string): Array<{
match: string
placeholder: string
numbers: number[]
startIndex: number
endIndex: number
}> {
const references: Array<{
match: string
placeholder: string
numbers: number[]
startIndex: number
endIndex: number
}> = []
// 匹配各种引用格式的正则表达式
const patterns = [
// [1], [2], [3] - 简单数字引用
{ regex: /\[(\d+)\]/g, type: 'simple' },
// [ref_1], [ref_2] - Zhipu格式
{ regex: /\[ref_(\d+)\]/g, type: 'zhipu' },
// [1](@ref), [2](@ref) - Hunyuan单个引用格式
{ regex: /\[(\d+)\]\(@ref\)/g, type: 'hunyuan_single' },
// [1,2,3](@ref) - Hunyuan多个引用格式
{ regex: /\[([\d,\s]+)\]\(@ref\)/g, type: 'hunyuan_multiple' }
]
patterns.forEach(({ regex, type }) => {
let match
while ((match = regex.exec(text)) !== null) {
let numbers: number[] = []
if (type === 'hunyuan_multiple') {
// 解析逗号分隔的数字
numbers = match[1]
.split(',')
.map((num) => parseInt(num.trim()))
.filter((num) => !isNaN(num))
} else {
// 单个数字
numbers = [parseInt(match[1])]
}
references.push({
match: match[0],
placeholder: match[0],
numbers: numbers,
startIndex: match.index!,
endIndex: match.index! + match[0].length
})
}
})
// 按位置排序
return references.sort((a, b) => a.startIndex - b.startIndex)
}
/**
* - Web搜索结果自动选择合适的转换策略
* @param {string} text
* @param {any[]} webSearchResults Web搜索结果数组
* @param {string} providerType Provider类型 ('openai', 'zhipu', 'hunyuan', 'openrouter', etc.)
* @param {boolean} resetCounter
* @returns {{text: string, hasBufferedContent: boolean}}
*/
export function smartLinkConverter(
text: string,
providerType: string = 'openai',
resetCounter: boolean = false,
webSearchResults?: WebSearchResponse
): { text: string; hasBufferedContent: boolean } {
if (webSearchResults) {
const webSearch = webSearchResults.results
switch (webSearchResults.source) {
case WebSearchSource.PERPLEXITY: {
text = completionPerplexityLinks(text, webSearch as any[])
break
}
}
}
// 检测文本中的引用模式
const references = extractWebSearchReferences(text)
if (references.length === 0) {
// 如果没有特定的引用模式,使用通用转换
return convertLinks(text, resetCounter)
}
// 根据检测到的引用模式选择合适的转换器
const hasZhipuPattern = references.some((ref) => ref.placeholder.includes('ref_'))
if (hasZhipuPattern) {
return {
text: convertLinksToZhipu(text, resetCounter),
hasBufferedContent: false
}
} else if (providerType === 'openrouter') {
return {
text: convertLinksToOpenRouter(text, resetCounter),
hasBufferedContent: false
}
} else {
return convertLinks(text, resetCounter)
}
}
/**
* buffer中的所有内容
* @returns {string} buffer中剩余的所有内容

View File

@ -239,6 +239,18 @@ __metadata:
languageName: node
linkType: hard
"@ai-sdk/perplexity@npm:^2.0.8":
version: 2.0.8
resolution: "@ai-sdk/perplexity@npm:2.0.8"
dependencies:
"@ai-sdk/provider": "npm:2.0.0"
"@ai-sdk/provider-utils": "npm:3.0.8"
peerDependencies:
zod: ^3.25.76 || ^4
checksum: 10c0/acfd6c09c4c0ef5af7eeec6e8bc20b90b24d1d3fc2bc8ee9de4e40770fc0c17ca2c8db8f0248ff07264b71e5aa65f64d37a165db2f43fee84c1b3513cb97983c
languageName: node
linkType: hard
"@ai-sdk/provider-utils@npm:3.0.3":
version: 3.0.3
resolution: "@ai-sdk/provider-utils@npm:3.0.3"
@ -13023,6 +13035,7 @@ __metadata:
"@ai-sdk/amazon-bedrock": "npm:^3.0.0"
"@ai-sdk/google-vertex": "npm:^3.0.25"
"@ai-sdk/mistral": "npm:^2.0.0"
"@ai-sdk/perplexity": "npm:^2.0.8"
"@ant-design/v5-patch-for-react-19": "npm:^1.0.3"
"@anthropic-ai/sdk": "npm:^0.41.0"
"@anthropic-ai/vertex-sdk": "patch:@anthropic-ai/vertex-sdk@npm%3A0.11.4#~/.yarn/patches/@anthropic-ai-vertex-sdk-npm-0.11.4-c19cb41edb.patch"