mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-27 04:31:27 +08:00
feat: add Perplexity SDK integration (#10137)
* feat: add Perplexity SDK integration * feat: enhance AiSdkToChunkAdapter with web search capabilities - Added support for web search in AiSdkToChunkAdapter, allowing for dynamic link conversion based on provider type. - Updated constructor to accept provider type and web search enablement flag. - Improved link handling logic to buffer and process incomplete links. - Enhanced message block handling in the store to accommodate new message structure. - Updated middleware configuration to include web search option. * fix * fix * chore: remove unuseful code * fix: ci * chore: log
This commit is contained in:
parent
f5d8974d04
commit
80afb3a86e
@ -97,6 +97,7 @@
|
||||
"@ai-sdk/amazon-bedrock": "^3.0.0",
|
||||
"@ai-sdk/google-vertex": "^3.0.25",
|
||||
"@ai-sdk/mistral": "^2.0.0",
|
||||
"@ai-sdk/perplexity": "^2.0.8",
|
||||
"@ant-design/v5-patch-for-react-19": "^1.0.3",
|
||||
"@anthropic-ai/sdk": "^0.41.0",
|
||||
"@anthropic-ai/vertex-sdk": "patch:@anthropic-ai/vertex-sdk@npm%3A0.11.4#~/.yarn/patches/@anthropic-ai-vertex-sdk-npm-0.11.4-c19cb41edb.patch",
|
||||
|
||||
@ -4,8 +4,9 @@
|
||||
*/
|
||||
|
||||
import { loggerService } from '@logger'
|
||||
import { MCPTool, WebSearchResults, WebSearchSource } from '@renderer/types'
|
||||
import { AISDKWebSearchResult, MCPTool, WebSearchResults, WebSearchSource } from '@renderer/types'
|
||||
import { Chunk, ChunkType } from '@renderer/types/chunk'
|
||||
import { convertLinks, flushLinkConverterBuffer } from '@renderer/utils/linkConverter'
|
||||
import type { TextStreamPart, ToolSet } from 'ai'
|
||||
|
||||
import { ToolCallChunkHandler } from './handleToolCallChunk'
|
||||
@ -29,13 +30,18 @@ export interface CherryStudioChunk {
|
||||
export class AiSdkToChunkAdapter {
|
||||
toolCallHandler: ToolCallChunkHandler
|
||||
private accumulate: boolean | undefined
|
||||
private isFirstChunk = true
|
||||
private enableWebSearch: boolean = false
|
||||
|
||||
constructor(
|
||||
private onChunk: (chunk: Chunk) => void,
|
||||
mcpTools: MCPTool[] = [],
|
||||
accumulate?: boolean
|
||||
accumulate?: boolean,
|
||||
enableWebSearch?: boolean
|
||||
) {
|
||||
this.toolCallHandler = new ToolCallChunkHandler(onChunk, mcpTools)
|
||||
this.accumulate = accumulate
|
||||
this.enableWebSearch = enableWebSearch || false
|
||||
}
|
||||
|
||||
/**
|
||||
@ -65,11 +71,24 @@ export class AiSdkToChunkAdapter {
|
||||
webSearchResults: [],
|
||||
reasoningId: ''
|
||||
}
|
||||
// Reset link converter state at the start of stream
|
||||
this.isFirstChunk = true
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
|
||||
if (done) {
|
||||
// Flush any remaining content from link converter buffer if web search is enabled
|
||||
if (this.enableWebSearch) {
|
||||
const remainingText = flushLinkConverterBuffer()
|
||||
if (remainingText) {
|
||||
this.onChunk({
|
||||
type: ChunkType.TEXT_DELTA,
|
||||
text: remainingText
|
||||
})
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
@ -87,7 +106,7 @@ export class AiSdkToChunkAdapter {
|
||||
*/
|
||||
private convertAndEmitChunk(
|
||||
chunk: TextStreamPart<any>,
|
||||
final: { text: string; reasoningContent: string; webSearchResults: any[]; reasoningId: string }
|
||||
final: { text: string; reasoningContent: string; webSearchResults: AISDKWebSearchResult[]; reasoningId: string }
|
||||
) {
|
||||
logger.silly(`AI SDK chunk type: ${chunk.type}`, chunk)
|
||||
switch (chunk.type) {
|
||||
@ -97,17 +116,44 @@ export class AiSdkToChunkAdapter {
|
||||
type: ChunkType.TEXT_START
|
||||
})
|
||||
break
|
||||
case 'text-delta':
|
||||
if (this.accumulate) {
|
||||
final.text += chunk.text || ''
|
||||
case 'text-delta': {
|
||||
const processedText = chunk.text || ''
|
||||
let finalText: string
|
||||
|
||||
// Only apply link conversion if web search is enabled
|
||||
if (this.enableWebSearch) {
|
||||
const result = convertLinks(processedText, this.isFirstChunk)
|
||||
|
||||
if (this.isFirstChunk) {
|
||||
this.isFirstChunk = false
|
||||
}
|
||||
|
||||
// Handle buffered content
|
||||
if (result.hasBufferedContent) {
|
||||
finalText = result.text
|
||||
} else {
|
||||
finalText = result.text || processedText
|
||||
}
|
||||
} else {
|
||||
final.text = chunk.text || ''
|
||||
// Without web search, just use the original text
|
||||
finalText = processedText
|
||||
}
|
||||
|
||||
if (this.accumulate) {
|
||||
final.text += finalText
|
||||
} else {
|
||||
final.text = finalText
|
||||
}
|
||||
|
||||
// Only emit chunk if there's text to send
|
||||
if (finalText) {
|
||||
this.onChunk({
|
||||
type: ChunkType.TEXT_DELTA,
|
||||
text: this.accumulate ? final.text : finalText
|
||||
})
|
||||
}
|
||||
this.onChunk({
|
||||
type: ChunkType.TEXT_DELTA,
|
||||
text: final.text || ''
|
||||
})
|
||||
break
|
||||
}
|
||||
case 'text-end':
|
||||
this.onChunk({
|
||||
type: ChunkType.TEXT_COMPLETE,
|
||||
@ -200,7 +246,7 @@ export class AiSdkToChunkAdapter {
|
||||
[WebSearchSource.ANTHROPIC]: WebSearchSource.ANTHROPIC,
|
||||
[WebSearchSource.OPENROUTER]: WebSearchSource.OPENROUTER,
|
||||
[WebSearchSource.GEMINI]: WebSearchSource.GEMINI,
|
||||
[WebSearchSource.PERPLEXITY]: WebSearchSource.PERPLEXITY,
|
||||
// [WebSearchSource.PERPLEXITY]: WebSearchSource.PERPLEXITY,
|
||||
[WebSearchSource.QWEN]: WebSearchSource.QWEN,
|
||||
[WebSearchSource.HUNYUAN]: WebSearchSource.HUNYUAN,
|
||||
[WebSearchSource.ZHIPU]: WebSearchSource.ZHIPU,
|
||||
@ -268,18 +314,9 @@ export class AiSdkToChunkAdapter {
|
||||
// === 源和文件相关事件 ===
|
||||
case 'source':
|
||||
if (chunk.sourceType === 'url') {
|
||||
// if (final.webSearchResults.length === 0) {
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
||||
const { sourceType: _, ...rest } = chunk
|
||||
final.webSearchResults.push(rest)
|
||||
// }
|
||||
// this.onChunk({
|
||||
// type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
|
||||
// llm_web_search: {
|
||||
// source: WebSearchSource.AISDK,
|
||||
// results: final.webSearchResults
|
||||
// }
|
||||
// })
|
||||
}
|
||||
break
|
||||
case 'file':
|
||||
|
||||
@ -284,7 +284,7 @@ export default class ModernAiProvider {
|
||||
// 创建带有中间件的执行器
|
||||
if (config.onChunk) {
|
||||
const accumulate = this.model!.supported_text_delta !== false // true and undefined
|
||||
const adapter = new AiSdkToChunkAdapter(config.onChunk, config.mcpTools, accumulate)
|
||||
const adapter = new AiSdkToChunkAdapter(config.onChunk, config.mcpTools, accumulate, config.enableWebSearch)
|
||||
|
||||
const streamResult = await executor.streamText({
|
||||
...params,
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { loggerService } from '@logger'
|
||||
import { ChunkType } from '@renderer/types/chunk'
|
||||
import { flushLinkConverterBuffer, smartLinkConverter } from '@renderer/utils/linkConverter'
|
||||
import { convertLinks, flushLinkConverterBuffer } from '@renderer/utils/linkConverter'
|
||||
|
||||
import { CompletionsParams, CompletionsResult, GenericChunk } from '../schemas'
|
||||
import { CompletionsContext, CompletionsMiddleware } from '../types'
|
||||
@ -28,8 +28,6 @@ export const WebSearchMiddleware: CompletionsMiddleware =
|
||||
}
|
||||
// 调用下游中间件
|
||||
const result = await next(ctx, params)
|
||||
|
||||
const model = params.assistant?.model!
|
||||
let isFirstChunk = true
|
||||
|
||||
// 响应后处理:记录Web搜索事件
|
||||
@ -42,15 +40,9 @@ export const WebSearchMiddleware: CompletionsMiddleware =
|
||||
new TransformStream<GenericChunk, GenericChunk>({
|
||||
transform(chunk: GenericChunk, controller) {
|
||||
if (chunk.type === ChunkType.TEXT_DELTA) {
|
||||
const providerType = model.provider || 'openai'
|
||||
// 使用当前可用的Web搜索结果进行链接转换
|
||||
const text = chunk.text
|
||||
const result = smartLinkConverter(
|
||||
text,
|
||||
providerType,
|
||||
isFirstChunk,
|
||||
ctx._internal.webSearchState!.results
|
||||
)
|
||||
const result = convertLinks(text, isFirstChunk)
|
||||
if (isFirstChunk) {
|
||||
isFirstChunk = false
|
||||
}
|
||||
|
||||
@ -20,6 +20,7 @@ export interface AiSdkMiddlewareConfig {
|
||||
isSupportedToolUse: boolean
|
||||
// image generation endpoint
|
||||
isImageGenerationEndpoint: boolean
|
||||
// 是否开启内置搜索
|
||||
enableWebSearch: boolean
|
||||
enableGenerateImage: boolean
|
||||
enableUrlContext: boolean
|
||||
|
||||
@ -39,6 +39,14 @@ export const NEW_PROVIDER_CONFIGS: ProviderConfig[] = [
|
||||
creatorFunctionName: 'createAmazonBedrock',
|
||||
supportsImageGeneration: true,
|
||||
aliases: ['aws-bedrock']
|
||||
},
|
||||
{
|
||||
id: 'perplexity',
|
||||
name: 'Perplexity',
|
||||
import: () => import('@ai-sdk/perplexity'),
|
||||
creatorFunctionName: 'createPerplexity',
|
||||
supportsImageGeneration: false,
|
||||
aliases: ['perplexity']
|
||||
}
|
||||
] as const
|
||||
|
||||
|
||||
@ -1,16 +1,19 @@
|
||||
import { WebSearchResultBlock } from '@anthropic-ai/sdk/resources'
|
||||
import type { GroundingMetadata } from '@google/genai'
|
||||
import { createEntityAdapter, createSelector, createSlice, type PayloadAction } from '@reduxjs/toolkit'
|
||||
import { Citation, WebSearchProviderResponse, WebSearchSource } from '@renderer/types'
|
||||
import { AISDKWebSearchResult, Citation, WebSearchProviderResponse, WebSearchSource } from '@renderer/types'
|
||||
import type { CitationMessageBlock, MessageBlock } from '@renderer/types/newMessage'
|
||||
import { MessageBlockType } from '@renderer/types/newMessage'
|
||||
import type OpenAI from 'openai'
|
||||
|
||||
import type { RootState } from './index' // 确认 RootState 从 store/index.ts 导出
|
||||
|
||||
// Create a simplified type for the entity adapter to avoid circular type issues
|
||||
type MessageBlockEntity = MessageBlock
|
||||
|
||||
// 1. 创建实体适配器 (Entity Adapter)
|
||||
// 我们使用块的 `id` 作为唯一标识符。
|
||||
const messageBlocksAdapter = createEntityAdapter<MessageBlock>()
|
||||
const messageBlocksAdapter = createEntityAdapter<MessageBlockEntity>()
|
||||
|
||||
// 2. 使用适配器定义初始状态 (Initial State)
|
||||
// 如果需要,可以在规范化实体的旁边添加其他状态属性。
|
||||
@ -20,6 +23,7 @@ const initialState = messageBlocksAdapter.getInitialState({
|
||||
})
|
||||
|
||||
// 3. 创建 Slice
|
||||
// @ts-ignore ignore
|
||||
export const messageBlocksSlice = createSlice({
|
||||
name: 'messageBlocks',
|
||||
initialState,
|
||||
@ -76,8 +80,13 @@ export const messageBlocksSelectors = messageBlocksAdapter.getSelectors<RootStat
|
||||
// --- Selector Integration --- START
|
||||
|
||||
// Selector to get the raw block entity by ID
|
||||
const selectBlockEntityById = (state: RootState, blockId: string | undefined) =>
|
||||
blockId ? messageBlocksSelectors.selectById(state, blockId) : undefined // Use adapter selector
|
||||
const selectBlockEntityById = (state: RootState, blockId: string | undefined): MessageBlock | undefined => {
|
||||
const entity = blockId ? messageBlocksSelectors.selectById(state, blockId) : undefined
|
||||
if (!entity) return undefined
|
||||
|
||||
// Convert back to full MessageBlock type
|
||||
return entity
|
||||
}
|
||||
|
||||
// --- Centralized Citation Formatting Logic ---
|
||||
export const formatCitationsFromBlock = (block: CitationMessageBlock | undefined): Citation[] => {
|
||||
@ -173,13 +182,16 @@ export const formatCitationsFromBlock = (block: CitationMessageBlock | undefined
|
||||
case WebSearchSource.GROK:
|
||||
case WebSearchSource.OPENROUTER:
|
||||
formattedCitations =
|
||||
(block.response.results as any[])?.map((url, index) => {
|
||||
(block.response.results as AISDKWebSearchResult[])?.map((result, index) => {
|
||||
const url = result.url
|
||||
try {
|
||||
const hostname = new URL(url).hostname
|
||||
const hostname = new URL(result.url).hostname
|
||||
const content = result.providerMetadata && result.providerMetadata['openrouter']?.content
|
||||
return {
|
||||
number: index + 1,
|
||||
url,
|
||||
hostname,
|
||||
title: result.title || hostname,
|
||||
content: content as string,
|
||||
showFavicon: true,
|
||||
type: 'websearch'
|
||||
}
|
||||
@ -218,10 +230,12 @@ export const formatCitationsFromBlock = (block: CitationMessageBlock | undefined
|
||||
break
|
||||
case WebSearchSource.AISDK:
|
||||
formattedCitations =
|
||||
(block.response.results as any[])?.map((result, index) => ({
|
||||
(block.response.results && (block.response.results as AISDKWebSearchResult[]))?.map((result, index) => ({
|
||||
number: index + 1,
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
title: result.title || new URL(result.url).hostname,
|
||||
showFavicon: true,
|
||||
type: 'websearch',
|
||||
providerMetadata: result?.providerMetadata
|
||||
})) || []
|
||||
break
|
||||
|
||||
@ -19,6 +19,7 @@ import {
|
||||
resetAssistantMessage
|
||||
} from '@renderer/utils/messageUtils/create'
|
||||
import { getTopicQueue, waitForTopicQueue } from '@renderer/utils/queue'
|
||||
import { defaultAppHeaders } from '@shared/utils'
|
||||
import { t } from 'i18next'
|
||||
import { isEmpty, throttle } from 'lodash'
|
||||
import { LRUCache } from 'lru-cache'
|
||||
@ -369,7 +370,8 @@ const fetchAndProcessAssistantResponseImpl = async (
|
||||
topicId,
|
||||
options: {
|
||||
signal: abortController.signal,
|
||||
timeout: 30000
|
||||
timeout: 30000,
|
||||
headers: defaultAppHeaders()
|
||||
}
|
||||
},
|
||||
streamProcessorCallbacks
|
||||
@ -1073,7 +1075,7 @@ export const cloneMessagesToNewTopicThunk =
|
||||
const oldBlock = state.messageBlocks.entities[oldBlockId]
|
||||
if (oldBlock) {
|
||||
const newBlockId = uuid()
|
||||
const newBlock: MessageBlock = {
|
||||
const newBlock = {
|
||||
...oldBlock,
|
||||
id: newBlockId,
|
||||
messageId: newMsgId // Link block to the NEW message ID
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import type { LanguageModelV2Source } from '@ai-sdk/provider'
|
||||
import type { WebSearchResultBlock } from '@anthropic-ai/sdk/resources'
|
||||
import type { GenerateImagesConfig, GroundingMetadata, PersonGeneration } from '@google/genai'
|
||||
import type OpenAI from 'openai'
|
||||
@ -726,12 +727,15 @@ export type WebSearchProviderResponse = {
|
||||
results: WebSearchProviderResult[]
|
||||
}
|
||||
|
||||
export type AISDKWebSearchResult = Omit<Extract<LanguageModelV2Source, { sourceType: 'url' }>, 'sourceType'>
|
||||
|
||||
export type WebSearchResults =
|
||||
| WebSearchProviderResponse
|
||||
| GroundingMetadata
|
||||
| OpenAI.Chat.Completions.ChatCompletionMessage.Annotation.URLCitation[]
|
||||
| OpenAI.Responses.ResponseOutputText.URLCitation[]
|
||||
| WebSearchResultBlock[]
|
||||
| AISDKWebSearchResult[]
|
||||
| any[]
|
||||
|
||||
export enum WebSearchSource {
|
||||
|
||||
@ -3,91 +3,12 @@ import { describe, expect, it } from 'vitest'
|
||||
import {
|
||||
cleanLinkCommas,
|
||||
completeLinks,
|
||||
completionPerplexityLinks,
|
||||
convertLinks,
|
||||
convertLinksToHunyuan,
|
||||
convertLinksToOpenRouter,
|
||||
convertLinksToZhipu,
|
||||
extractUrlsFromMarkdown,
|
||||
flushLinkConverterBuffer
|
||||
} from '../linkConverter'
|
||||
|
||||
describe('linkConverter', () => {
|
||||
describe('convertLinksToZhipu', () => {
|
||||
it('should correctly convert complete [ref_N] format', () => {
|
||||
const input = '这里有一个参考文献 [ref_1] 和另一个 [ref_2]'
|
||||
const result = convertLinksToZhipu(input, true)
|
||||
expect(result).toBe('这里有一个参考文献 [<sup>1</sup>]() 和另一个 [<sup>2</sup>]()')
|
||||
})
|
||||
|
||||
it('should handle chunked input and preserve incomplete link patterns', () => {
|
||||
// 第一个块包含未完成的模式
|
||||
const chunk1 = '这是第一部分 [ref'
|
||||
const result1 = convertLinksToZhipu(chunk1, true)
|
||||
expect(result1).toBe('这是第一部分 ')
|
||||
|
||||
// 第二个块完成该模式
|
||||
const chunk2 = '_1] 这是剩下的部分'
|
||||
const result2 = convertLinksToZhipu(chunk2, false)
|
||||
expect(result2).toBe('[<sup>1</sup>]() 这是剩下的部分')
|
||||
})
|
||||
|
||||
it('should clear buffer when resetting counter', () => {
|
||||
// 先进行一次转换不重置
|
||||
const input1 = '第一次输入 [ref_1]'
|
||||
convertLinksToZhipu(input1, false)
|
||||
|
||||
// 然后重置并进行新的转换
|
||||
const input2 = '新的输入 [ref_2]'
|
||||
const result = convertLinksToZhipu(input2, true)
|
||||
expect(result).toBe('新的输入 [<sup>2</sup>]()')
|
||||
})
|
||||
})
|
||||
|
||||
describe('convertLinksToHunyuan', () => {
|
||||
it('should correctly convert [N](@ref) format to links with URLs', () => {
|
||||
const webSearch = [{ url: 'https://example.com/1' }, { url: 'https://example.com/2' }]
|
||||
const input = '这里有单个引用 [1](@ref) 和多个引用 [2](@ref)'
|
||||
const result = convertLinksToHunyuan(input, webSearch, true)
|
||||
expect(result).toBe(
|
||||
'这里有单个引用 [<sup>1</sup>](https://example.com/1) 和多个引用 [<sup>2</sup>](https://example.com/2)'
|
||||
)
|
||||
})
|
||||
|
||||
it('should correctly handle comma-separated multiple references', () => {
|
||||
const webSearch = [
|
||||
{ url: 'https://example.com/1' },
|
||||
{ url: 'https://example.com/2' },
|
||||
{ url: 'https://example.com/3' }
|
||||
]
|
||||
const input = '这里有多个引用 [1, 2, 3](@ref)'
|
||||
const result = convertLinksToHunyuan(input, webSearch, true)
|
||||
expect(result).toBe(
|
||||
'这里有多个引用 [<sup>1</sup>](https://example.com/1)[<sup>2</sup>](https://example.com/2)[<sup>3</sup>](https://example.com/3)'
|
||||
)
|
||||
})
|
||||
|
||||
it('should handle non-existent reference indices', () => {
|
||||
const webSearch = [{ url: 'https://example.com/1' }]
|
||||
const input = '这里有一个超出范围的引用 [2](@ref)'
|
||||
const result = convertLinksToHunyuan(input, webSearch, true)
|
||||
expect(result).toBe('这里有一个超出范围的引用 [<sup>2</sup>](@ref)')
|
||||
})
|
||||
|
||||
it('should handle incomplete reference formats in chunked input', () => {
|
||||
const webSearch = [{ url: 'https://example.com/1' }]
|
||||
// 第一个块包含未完成的模式
|
||||
const chunk1 = '这是第一部分 ['
|
||||
const result1 = convertLinksToHunyuan(chunk1, webSearch, true)
|
||||
expect(result1).toBe('这是第一部分 ')
|
||||
|
||||
// 第二个块完成该模式
|
||||
const chunk2 = '1](@ref) 这是剩下的部分'
|
||||
const result2 = convertLinksToHunyuan(chunk2, webSearch, false)
|
||||
expect(result2).toBe('[<sup>1</sup>](https://example.com/1) 这是剩下的部分')
|
||||
})
|
||||
})
|
||||
|
||||
describe('convertLinks', () => {
|
||||
it('should convert number links to numbered links', () => {
|
||||
const input = '参考 [1](https://example.com/1) 和 [2](https://example.com/2)'
|
||||
@ -226,8 +147,10 @@ describe('linkConverter', () => {
|
||||
it('should handle real links split across small chunks with proper buffering', () => {
|
||||
// 模拟真实链接被分割成小chunks的情况 - 更现实的分割方式
|
||||
const chunks = [
|
||||
'Please visit [example.com](', // 不完整链接
|
||||
'https://example.com) for details' // 完成链接
|
||||
'Please visit [example.',
|
||||
'com](', // 不完整链接'
|
||||
'https://exa',
|
||||
'mple.com) for details' // 完成链接'
|
||||
]
|
||||
|
||||
let accumulatedText = ''
|
||||
@ -235,14 +158,24 @@ describe('linkConverter', () => {
|
||||
// 第一个chunk:包含不完整链接 [text](
|
||||
const result1 = convertLinks(chunks[0], true)
|
||||
expect(result1.text).toBe('Please visit ') // 只返回安全部分
|
||||
expect(result1.hasBufferedContent).toBe(true) // [example.com]( 被缓冲
|
||||
expect(result1.hasBufferedContent).toBe(true) //
|
||||
accumulatedText += result1.text
|
||||
|
||||
// 第二个chunk:完成链接
|
||||
// 第二个chunk
|
||||
const result2 = convertLinks(chunks[1], false)
|
||||
expect(result2.text).toBe('[<sup>1</sup>](https://example.com) for details') // 完整链接 + 剩余文本
|
||||
expect(result2.hasBufferedContent).toBe(false)
|
||||
accumulatedText += result2.text
|
||||
expect(result2.text).toBe('')
|
||||
expect(result2.hasBufferedContent).toBe(true)
|
||||
// 第三个chunk
|
||||
const result3 = convertLinks(chunks[2], false)
|
||||
expect(result3.text).toBe('')
|
||||
expect(result3.hasBufferedContent).toBe(true)
|
||||
accumulatedText += result3.text
|
||||
|
||||
// 第四个chunk
|
||||
const result4 = convertLinks(chunks[3], false)
|
||||
expect(result4.text).toBe('[<sup>1</sup>](https://example.com) for details')
|
||||
expect(result4.hasBufferedContent).toBe(false)
|
||||
accumulatedText += result4.text
|
||||
|
||||
// 验证最终结果
|
||||
expect(accumulatedText).toBe('Please visit [<sup>1</sup>](https://example.com) for details')
|
||||
@ -293,32 +226,6 @@ describe('linkConverter', () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe('convertLinksToOpenRouter', () => {
|
||||
it('should only convert links with domain-like text', () => {
|
||||
const input = '网站 [example.com](https://example.com) 和 [点击这里](https://other.com)'
|
||||
const result = convertLinksToOpenRouter(input, true)
|
||||
expect(result).toBe('网站 [<sup>1</sup>](https://example.com) 和 [点击这里](https://other.com)')
|
||||
})
|
||||
|
||||
it('should use the same counter for duplicate URLs', () => {
|
||||
const input = '两个相同的链接 [example.com](https://example.com) 和 [example.org](https://example.com)'
|
||||
const result = convertLinksToOpenRouter(input, true)
|
||||
expect(result).toBe('两个相同的链接 [<sup>1</sup>](https://example.com) 和 [<sup>1</sup>](https://example.com)')
|
||||
})
|
||||
|
||||
it('should handle incomplete links in chunked input', () => {
|
||||
// 第一个块包含未完成的链接
|
||||
const chunk1 = '这是域名链接 ['
|
||||
const result1 = convertLinksToOpenRouter(chunk1, true)
|
||||
expect(result1).toBe('这是域名链接 ')
|
||||
|
||||
// 第二个块完成链接
|
||||
const chunk2 = 'example.com](https://example.com)'
|
||||
const result2 = convertLinksToOpenRouter(chunk2, false)
|
||||
expect(result2).toBe('[<sup>1</sup>](https://example.com)')
|
||||
})
|
||||
})
|
||||
|
||||
describe('completeLinks', () => {
|
||||
it('should complete empty links with webSearch data', () => {
|
||||
const webSearch = [{ link: 'https://example.com/1' }, { link: 'https://example.com/2' }]
|
||||
@ -383,13 +290,4 @@ describe('linkConverter', () => {
|
||||
expect(result).toBe('[链接1](https://example.com)[链接2](https://other.com)')
|
||||
})
|
||||
})
|
||||
|
||||
describe('completionPerplexityLinks', () => {
|
||||
it('should complete links with webSearch data', () => {
|
||||
const webSearch = [{ url: 'https://example.com/1' }, { url: 'https://example.com/2' }]
|
||||
const input = '参考 [1] 和 [2]'
|
||||
const result = completionPerplexityLinks(input, webSearch)
|
||||
expect(result).toBe('参考 [1](https://example.com/1) 和 [2](https://example.com/2)')
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@ -1,5 +1,3 @@
|
||||
import { WebSearchResponse, WebSearchSource } from '@renderer/types'
|
||||
|
||||
// Counter for numbering links
|
||||
let linkCounter = 1
|
||||
// Buffer to hold incomplete link fragments across chunks
|
||||
@ -17,109 +15,6 @@ function isHost(text: string): boolean {
|
||||
return /^(https?:\/\/)?[\w.-]+\.[a-z]{2,}(\/.*)?$/i.test(text) || /^[\w.-]+\.[a-z]{2,}(\/.*)?$/i.test(text)
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts Markdown links in the text to numbered links based on the rules:s
|
||||
* [ref_N] -> [<sup>N</sup>]
|
||||
* @param {string} text The current chunk of text to process
|
||||
* @param {boolean} resetCounter Whether to reset the counter and buffer
|
||||
* @returns {string} Processed text with complete links converted
|
||||
*/
|
||||
export function convertLinksToZhipu(text: string, resetCounter: boolean = false): string {
|
||||
if (resetCounter) {
|
||||
linkCounter = 1
|
||||
buffer = ''
|
||||
}
|
||||
|
||||
// Append the new text to the buffer
|
||||
buffer += text
|
||||
let safePoint = buffer.length
|
||||
|
||||
// Check from the end for potentially incomplete [ref_N] patterns
|
||||
for (let i = buffer.length - 1; i >= 0; i--) {
|
||||
if (buffer[i] === '[') {
|
||||
const substring = buffer.substring(i)
|
||||
// Check if it's a complete [ref_N] pattern
|
||||
const match = /^\[ref_\d+\]/.exec(substring)
|
||||
|
||||
if (!match) {
|
||||
// Potentially incomplete [ref_N] pattern
|
||||
safePoint = i
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process the safe part of the buffer
|
||||
const safeBuffer = buffer.substring(0, safePoint)
|
||||
buffer = buffer.substring(safePoint)
|
||||
|
||||
// Replace all complete [ref_N] patterns
|
||||
return safeBuffer.replace(/\[ref_(\d+)\]/g, (_, num) => {
|
||||
return `[<sup>${num}</sup>]()`
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts Markdown links in the text to numbered links based on the rules:
|
||||
* [N](@ref) -> [<sup>N</sup>]()
|
||||
* [N,M,...](@ref) -> [<sup>N</sup>]() [<sup>M</sup>]() ...
|
||||
* @param {string} text The current chunk of text to process
|
||||
* @param {any[]} webSearch webSearch results
|
||||
* @param {boolean} resetCounter Whether to reset the counter and buffer
|
||||
* @returns {string} Processed text with complete links converted
|
||||
*/
|
||||
export function convertLinksToHunyuan(text: string, webSearch: any[], resetCounter: boolean = false): string {
|
||||
if (resetCounter) {
|
||||
linkCounter = 1
|
||||
buffer = ''
|
||||
}
|
||||
|
||||
buffer += text
|
||||
let safePoint = buffer.length
|
||||
|
||||
// Check from the end for potentially incomplete patterns
|
||||
for (let i = buffer.length - 1; i >= 0; i--) {
|
||||
if (buffer[i] === '[') {
|
||||
const substring = buffer.substring(i)
|
||||
// Check if it's a complete pattern - handles both [N](@ref) and [N,M,...](@ref)
|
||||
const match = /^\[[\d,\s]+\]\(@ref\)/.exec(substring)
|
||||
|
||||
if (!match) {
|
||||
// Potentially incomplete pattern
|
||||
safePoint = i
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process the safe part of the buffer
|
||||
const safeBuffer = buffer.substring(0, safePoint)
|
||||
buffer = buffer.substring(safePoint)
|
||||
|
||||
// Replace all complete patterns
|
||||
return safeBuffer.replace(/\[([\d,\s]+)\]\(@ref\)/g, (_, numbers) => {
|
||||
// Split the numbers string into individual numbers
|
||||
const numArray = numbers
|
||||
.split(',')
|
||||
.map((num) => parseInt(num.trim()))
|
||||
.filter((num) => !isNaN(num))
|
||||
|
||||
// Generate separate superscript links for each number
|
||||
const links = numArray.map((num) => {
|
||||
const index = num - 1
|
||||
// Check if the index is valid in webSearch array
|
||||
if (index >= 0 && index < webSearch.length && webSearch[index]?.url) {
|
||||
return `[<sup>${num}</sup>](${webSearch[index].url})`
|
||||
}
|
||||
// If no matching URL found, keep the original reference format for this number
|
||||
return `[<sup>${num}</sup>](@ref)`
|
||||
})
|
||||
|
||||
// Join the separate links with spaces
|
||||
return links.join('')
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts Markdown links in the text to numbered links based on the rules:
|
||||
* 1. ([host](url)) -> [cnt](url)
|
||||
@ -171,13 +66,21 @@ export function convertLinks(
|
||||
break
|
||||
}
|
||||
|
||||
// 检查是否是完整的链接但需要验证
|
||||
// 检查是否是完整的链接
|
||||
const completeLink = /^\[([^\]]+)\]\(([^)]+)\)/.test(substring)
|
||||
if (completeLink) {
|
||||
// 如果是完整链接,继续处理,不设置safePoint
|
||||
continue
|
||||
}
|
||||
|
||||
// 检查是否是不完整的 [ 开始但还没有闭合的 ]
|
||||
// 例如 [example. 这种情况
|
||||
const incompleteBracket = /^\[[^\]]*$/.test(substring)
|
||||
if (incompleteBracket) {
|
||||
safePoint = i
|
||||
break
|
||||
}
|
||||
|
||||
// 如果不是潜在的链接格式,继续检查
|
||||
}
|
||||
}
|
||||
@ -263,65 +166,6 @@ export function convertLinks(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts Markdown links in the text to numbered links based on the rules:
|
||||
* 1. [host](url) -> [cnt](url)
|
||||
*
|
||||
* @param {string} text The current chunk of text to process
|
||||
* @param {boolean} resetCounter Whether to reset the counter and buffer
|
||||
* @returns {string} Processed text with complete links converted
|
||||
*/
|
||||
export function convertLinksToOpenRouter(text: string, resetCounter = false): string {
|
||||
if (resetCounter) {
|
||||
linkCounter = 1
|
||||
buffer = ''
|
||||
urlToCounterMap = new Map<string, number>()
|
||||
}
|
||||
|
||||
// Append the new text to the buffer
|
||||
buffer += text
|
||||
|
||||
// Find a safe point to process
|
||||
let safePoint = buffer.length
|
||||
|
||||
// Check for potentially incomplete link patterns from the end
|
||||
for (let i = buffer.length - 1; i >= 0; i--) {
|
||||
if (buffer[i] === '[') {
|
||||
const substring = buffer.substring(i)
|
||||
const match = /^\[([^\]]+)\]\(([^)]+)\)/.exec(substring)
|
||||
|
||||
if (!match) {
|
||||
safePoint = i
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract the part of the buffer that we can safely process
|
||||
const safeBuffer = buffer.substring(0, safePoint)
|
||||
buffer = buffer.substring(safePoint)
|
||||
|
||||
// Process the safe buffer to handle complete links
|
||||
const result = safeBuffer.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, text, url) => {
|
||||
// Only convert link if the text looks like a host/URL
|
||||
if (isHost(text)) {
|
||||
// Check if this URL has been seen before
|
||||
let counter: number
|
||||
if (urlToCounterMap.has(url)) {
|
||||
counter = urlToCounterMap.get(url)!
|
||||
} else {
|
||||
counter = linkCounter++
|
||||
urlToCounterMap.set(url, counter)
|
||||
}
|
||||
return `[<sup>${counter}</sup>](${url})`
|
||||
}
|
||||
// Keep original link format if the text doesn't look like a host
|
||||
return match
|
||||
})
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据webSearch结果补全链接,将[<sup>num</sup>]()转换为[<sup>num</sup>](webSearch[num-1].url)
|
||||
* @param {string} text 原始文本
|
||||
@ -341,25 +185,6 @@ export function completeLinks(text: string, webSearch: any[]): string {
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据webSearch结果补全链接,将[num]转换为[num](webSearch[num-1].url)
|
||||
* @param {string} text 原始文本
|
||||
* @param {any[]} webSearch webSearch结果
|
||||
* @returns {string} 补全后的文本
|
||||
*/
|
||||
export function completionPerplexityLinks(text: string, webSearch: any[]): string {
|
||||
return text.replace(/\[(\d+)\]/g, (match, numStr) => {
|
||||
const num = parseInt(numStr)
|
||||
const index = num - 1
|
||||
// 检查 webSearch 数组中是否存在对应的 URL
|
||||
if (index >= 0 && index < webSearch.length && webSearch[index].url) {
|
||||
return `[${num}](${webSearch[index].url})`
|
||||
}
|
||||
// 如果没有找到对应的 URL,保持原样
|
||||
return match
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* 从Markdown文本中提取所有URL
|
||||
* 支持以下格式:
|
||||
@ -412,118 +237,6 @@ export function cleanLinkCommas(text: string): string {
|
||||
return text.replace(/\]\(([^)]+)\)\s*,\s*\[/g, ']($1)[')
|
||||
}
|
||||
|
||||
/**
|
||||
* 从文本中识别各种格式的Web搜索引用占位符
|
||||
* 支持的格式包括:[1], [ref_1], [1](@ref), [1,2,3](@ref) 等
|
||||
* @param {string} text 要分析的文本
|
||||
* @returns {Array} 识别到的引用信息数组
|
||||
*/
|
||||
export function extractWebSearchReferences(text: string): Array<{
|
||||
match: string
|
||||
placeholder: string
|
||||
numbers: number[]
|
||||
startIndex: number
|
||||
endIndex: number
|
||||
}> {
|
||||
const references: Array<{
|
||||
match: string
|
||||
placeholder: string
|
||||
numbers: number[]
|
||||
startIndex: number
|
||||
endIndex: number
|
||||
}> = []
|
||||
|
||||
// 匹配各种引用格式的正则表达式
|
||||
const patterns = [
|
||||
// [1], [2], [3] - 简单数字引用
|
||||
{ regex: /\[(\d+)\]/g, type: 'simple' },
|
||||
// [ref_1], [ref_2] - Zhipu格式
|
||||
{ regex: /\[ref_(\d+)\]/g, type: 'zhipu' },
|
||||
// [1](@ref), [2](@ref) - Hunyuan单个引用格式
|
||||
{ regex: /\[(\d+)\]\(@ref\)/g, type: 'hunyuan_single' },
|
||||
// [1,2,3](@ref) - Hunyuan多个引用格式
|
||||
{ regex: /\[([\d,\s]+)\]\(@ref\)/g, type: 'hunyuan_multiple' }
|
||||
]
|
||||
|
||||
patterns.forEach(({ regex, type }) => {
|
||||
let match
|
||||
while ((match = regex.exec(text)) !== null) {
|
||||
let numbers: number[] = []
|
||||
|
||||
if (type === 'hunyuan_multiple') {
|
||||
// 解析逗号分隔的数字
|
||||
numbers = match[1]
|
||||
.split(',')
|
||||
.map((num) => parseInt(num.trim()))
|
||||
.filter((num) => !isNaN(num))
|
||||
} else {
|
||||
// 单个数字
|
||||
numbers = [parseInt(match[1])]
|
||||
}
|
||||
|
||||
references.push({
|
||||
match: match[0],
|
||||
placeholder: match[0],
|
||||
numbers: numbers,
|
||||
startIndex: match.index!,
|
||||
endIndex: match.index! + match[0].length
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
// 按位置排序
|
||||
return references.sort((a, b) => a.startIndex - b.startIndex)
|
||||
}
|
||||
|
||||
/**
|
||||
* 智能链接转换器 - 根据文本中的引用模式和Web搜索结果自动选择合适的转换策略
|
||||
* @param {string} text 当前文本块
|
||||
* @param {any[]} webSearchResults Web搜索结果数组
|
||||
* @param {string} providerType Provider类型 ('openai', 'zhipu', 'hunyuan', 'openrouter', etc.)
|
||||
* @param {boolean} resetCounter 是否重置计数器
|
||||
* @returns {{text: string, hasBufferedContent: boolean}} 转换后的文本和是否有内容被缓冲
|
||||
*/
|
||||
export function smartLinkConverter(
|
||||
text: string,
|
||||
providerType: string = 'openai',
|
||||
resetCounter: boolean = false,
|
||||
webSearchResults?: WebSearchResponse
|
||||
): { text: string; hasBufferedContent: boolean } {
|
||||
if (webSearchResults) {
|
||||
const webSearch = webSearchResults.results
|
||||
switch (webSearchResults.source) {
|
||||
case WebSearchSource.PERPLEXITY: {
|
||||
text = completionPerplexityLinks(text, webSearch as any[])
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// 检测文本中的引用模式
|
||||
const references = extractWebSearchReferences(text)
|
||||
|
||||
if (references.length === 0) {
|
||||
// 如果没有特定的引用模式,使用通用转换
|
||||
return convertLinks(text, resetCounter)
|
||||
}
|
||||
|
||||
// 根据检测到的引用模式选择合适的转换器
|
||||
const hasZhipuPattern = references.some((ref) => ref.placeholder.includes('ref_'))
|
||||
|
||||
if (hasZhipuPattern) {
|
||||
return {
|
||||
text: convertLinksToZhipu(text, resetCounter),
|
||||
hasBufferedContent: false
|
||||
}
|
||||
} else if (providerType === 'openrouter') {
|
||||
return {
|
||||
text: convertLinksToOpenRouter(text, resetCounter),
|
||||
hasBufferedContent: false
|
||||
}
|
||||
} else {
|
||||
return convertLinks(text, resetCounter)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 强制返回buffer中的所有内容,用于流结束时清空缓冲区
|
||||
* @returns {string} buffer中剩余的所有内容
|
||||
|
||||
13
yarn.lock
13
yarn.lock
@ -239,6 +239,18 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@ai-sdk/perplexity@npm:^2.0.8":
|
||||
version: 2.0.8
|
||||
resolution: "@ai-sdk/perplexity@npm:2.0.8"
|
||||
dependencies:
|
||||
"@ai-sdk/provider": "npm:2.0.0"
|
||||
"@ai-sdk/provider-utils": "npm:3.0.8"
|
||||
peerDependencies:
|
||||
zod: ^3.25.76 || ^4
|
||||
checksum: 10c0/acfd6c09c4c0ef5af7eeec6e8bc20b90b24d1d3fc2bc8ee9de4e40770fc0c17ca2c8db8f0248ff07264b71e5aa65f64d37a165db2f43fee84c1b3513cb97983c
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@ai-sdk/provider-utils@npm:3.0.3":
|
||||
version: 3.0.3
|
||||
resolution: "@ai-sdk/provider-utils@npm:3.0.3"
|
||||
@ -13023,6 +13035,7 @@ __metadata:
|
||||
"@ai-sdk/amazon-bedrock": "npm:^3.0.0"
|
||||
"@ai-sdk/google-vertex": "npm:^3.0.25"
|
||||
"@ai-sdk/mistral": "npm:^2.0.0"
|
||||
"@ai-sdk/perplexity": "npm:^2.0.8"
|
||||
"@ant-design/v5-patch-for-react-19": "npm:^1.0.3"
|
||||
"@anthropic-ai/sdk": "npm:^0.41.0"
|
||||
"@anthropic-ai/vertex-sdk": "patch:@anthropic-ai/vertex-sdk@npm%3A0.11.4#~/.yarn/patches/@anthropic-ai-vertex-sdk-npm-0.11.4-c19cb41edb.patch"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user