diff --git a/package.json b/package.json index 153be63374..b620dd51d4 100644 --- a/package.json +++ b/package.json @@ -97,6 +97,7 @@ "@ai-sdk/amazon-bedrock": "^3.0.0", "@ai-sdk/google-vertex": "^3.0.25", "@ai-sdk/mistral": "^2.0.0", + "@ai-sdk/perplexity": "^2.0.8", "@ant-design/v5-patch-for-react-19": "^1.0.3", "@anthropic-ai/sdk": "^0.41.0", "@anthropic-ai/vertex-sdk": "patch:@anthropic-ai/vertex-sdk@npm%3A0.11.4#~/.yarn/patches/@anthropic-ai-vertex-sdk-npm-0.11.4-c19cb41edb.patch", diff --git a/src/renderer/src/aiCore/chunk/AiSdkToChunkAdapter.ts b/src/renderer/src/aiCore/chunk/AiSdkToChunkAdapter.ts index a28000d9dd..8af4388d5f 100644 --- a/src/renderer/src/aiCore/chunk/AiSdkToChunkAdapter.ts +++ b/src/renderer/src/aiCore/chunk/AiSdkToChunkAdapter.ts @@ -4,8 +4,9 @@ */ import { loggerService } from '@logger' -import { MCPTool, WebSearchResults, WebSearchSource } from '@renderer/types' +import { AISDKWebSearchResult, MCPTool, WebSearchResults, WebSearchSource } from '@renderer/types' import { Chunk, ChunkType } from '@renderer/types/chunk' +import { convertLinks, flushLinkConverterBuffer } from '@renderer/utils/linkConverter' import type { TextStreamPart, ToolSet } from 'ai' import { ToolCallChunkHandler } from './handleToolCallChunk' @@ -29,13 +30,18 @@ export interface CherryStudioChunk { export class AiSdkToChunkAdapter { toolCallHandler: ToolCallChunkHandler private accumulate: boolean | undefined + private isFirstChunk = true + private enableWebSearch: boolean = false + constructor( private onChunk: (chunk: Chunk) => void, mcpTools: MCPTool[] = [], - accumulate?: boolean + accumulate?: boolean, + enableWebSearch?: boolean ) { this.toolCallHandler = new ToolCallChunkHandler(onChunk, mcpTools) this.accumulate = accumulate + this.enableWebSearch = enableWebSearch || false } /** @@ -65,11 +71,24 @@ export class AiSdkToChunkAdapter { webSearchResults: [], reasoningId: '' } + // Reset link converter state at the start of stream + this.isFirstChunk = true + try { while (true) { const { done, value } = await reader.read() if (done) { + // Flush any remaining content from link converter buffer if web search is enabled + if (this.enableWebSearch) { + const remainingText = flushLinkConverterBuffer() + if (remainingText) { + this.onChunk({ + type: ChunkType.TEXT_DELTA, + text: remainingText + }) + } + } break } @@ -87,7 +106,7 @@ export class AiSdkToChunkAdapter { */ private convertAndEmitChunk( chunk: TextStreamPart, - final: { text: string; reasoningContent: string; webSearchResults: any[]; reasoningId: string } + final: { text: string; reasoningContent: string; webSearchResults: AISDKWebSearchResult[]; reasoningId: string } ) { logger.silly(`AI SDK chunk type: ${chunk.type}`, chunk) switch (chunk.type) { @@ -97,17 +116,44 @@ export class AiSdkToChunkAdapter { type: ChunkType.TEXT_START }) break - case 'text-delta': - if (this.accumulate) { - final.text += chunk.text || '' + case 'text-delta': { + const processedText = chunk.text || '' + let finalText: string + + // Only apply link conversion if web search is enabled + if (this.enableWebSearch) { + const result = convertLinks(processedText, this.isFirstChunk) + + if (this.isFirstChunk) { + this.isFirstChunk = false + } + + // Handle buffered content + if (result.hasBufferedContent) { + finalText = result.text + } else { + finalText = result.text || processedText + } } else { - final.text = chunk.text || '' + // Without web search, just use the original text + finalText = processedText + } + + if (this.accumulate) { + final.text += finalText + } else { + final.text = finalText + } + + // Only emit chunk if there's text to send + if (finalText) { + this.onChunk({ + type: ChunkType.TEXT_DELTA, + text: this.accumulate ? final.text : finalText + }) } - this.onChunk({ - type: ChunkType.TEXT_DELTA, - text: final.text || '' - }) break + } case 'text-end': this.onChunk({ type: ChunkType.TEXT_COMPLETE, @@ -200,7 +246,7 @@ export class AiSdkToChunkAdapter { [WebSearchSource.ANTHROPIC]: WebSearchSource.ANTHROPIC, [WebSearchSource.OPENROUTER]: WebSearchSource.OPENROUTER, [WebSearchSource.GEMINI]: WebSearchSource.GEMINI, - [WebSearchSource.PERPLEXITY]: WebSearchSource.PERPLEXITY, + // [WebSearchSource.PERPLEXITY]: WebSearchSource.PERPLEXITY, [WebSearchSource.QWEN]: WebSearchSource.QWEN, [WebSearchSource.HUNYUAN]: WebSearchSource.HUNYUAN, [WebSearchSource.ZHIPU]: WebSearchSource.ZHIPU, @@ -268,18 +314,9 @@ export class AiSdkToChunkAdapter { // === 源和文件相关事件 === case 'source': if (chunk.sourceType === 'url') { - // if (final.webSearchResults.length === 0) { // eslint-disable-next-line @typescript-eslint/no-unused-vars const { sourceType: _, ...rest } = chunk final.webSearchResults.push(rest) - // } - // this.onChunk({ - // type: ChunkType.LLM_WEB_SEARCH_COMPLETE, - // llm_web_search: { - // source: WebSearchSource.AISDK, - // results: final.webSearchResults - // } - // }) } break case 'file': diff --git a/src/renderer/src/aiCore/index_new.ts b/src/renderer/src/aiCore/index_new.ts index 8da8e8c7c1..0225d15d26 100644 --- a/src/renderer/src/aiCore/index_new.ts +++ b/src/renderer/src/aiCore/index_new.ts @@ -284,7 +284,7 @@ export default class ModernAiProvider { // 创建带有中间件的执行器 if (config.onChunk) { const accumulate = this.model!.supported_text_delta !== false // true and undefined - const adapter = new AiSdkToChunkAdapter(config.onChunk, config.mcpTools, accumulate) + const adapter = new AiSdkToChunkAdapter(config.onChunk, config.mcpTools, accumulate, config.enableWebSearch) const streamResult = await executor.streamText({ ...params, diff --git a/src/renderer/src/aiCore/legacy/middleware/core/WebSearchMiddleware.ts b/src/renderer/src/aiCore/legacy/middleware/core/WebSearchMiddleware.ts index 4c72e877a9..ae346af836 100644 --- a/src/renderer/src/aiCore/legacy/middleware/core/WebSearchMiddleware.ts +++ b/src/renderer/src/aiCore/legacy/middleware/core/WebSearchMiddleware.ts @@ -1,6 +1,6 @@ import { loggerService } from '@logger' import { ChunkType } from '@renderer/types/chunk' -import { flushLinkConverterBuffer, smartLinkConverter } from '@renderer/utils/linkConverter' +import { convertLinks, flushLinkConverterBuffer } from '@renderer/utils/linkConverter' import { CompletionsParams, CompletionsResult, GenericChunk } from '../schemas' import { CompletionsContext, CompletionsMiddleware } from '../types' @@ -28,8 +28,6 @@ export const WebSearchMiddleware: CompletionsMiddleware = } // 调用下游中间件 const result = await next(ctx, params) - - const model = params.assistant?.model! let isFirstChunk = true // 响应后处理:记录Web搜索事件 @@ -42,15 +40,9 @@ export const WebSearchMiddleware: CompletionsMiddleware = new TransformStream({ transform(chunk: GenericChunk, controller) { if (chunk.type === ChunkType.TEXT_DELTA) { - const providerType = model.provider || 'openai' // 使用当前可用的Web搜索结果进行链接转换 const text = chunk.text - const result = smartLinkConverter( - text, - providerType, - isFirstChunk, - ctx._internal.webSearchState!.results - ) + const result = convertLinks(text, isFirstChunk) if (isFirstChunk) { isFirstChunk = false } diff --git a/src/renderer/src/aiCore/middleware/AiSdkMiddlewareBuilder.ts b/src/renderer/src/aiCore/middleware/AiSdkMiddlewareBuilder.ts index 04b5efd1b5..f0d3b2eb59 100644 --- a/src/renderer/src/aiCore/middleware/AiSdkMiddlewareBuilder.ts +++ b/src/renderer/src/aiCore/middleware/AiSdkMiddlewareBuilder.ts @@ -20,6 +20,7 @@ export interface AiSdkMiddlewareConfig { isSupportedToolUse: boolean // image generation endpoint isImageGenerationEndpoint: boolean + // 是否开启内置搜索 enableWebSearch: boolean enableGenerateImage: boolean enableUrlContext: boolean diff --git a/src/renderer/src/aiCore/provider/providerInitialization.ts b/src/renderer/src/aiCore/provider/providerInitialization.ts index cf3366d70a..3c188313b9 100644 --- a/src/renderer/src/aiCore/provider/providerInitialization.ts +++ b/src/renderer/src/aiCore/provider/providerInitialization.ts @@ -39,6 +39,14 @@ export const NEW_PROVIDER_CONFIGS: ProviderConfig[] = [ creatorFunctionName: 'createAmazonBedrock', supportsImageGeneration: true, aliases: ['aws-bedrock'] + }, + { + id: 'perplexity', + name: 'Perplexity', + import: () => import('@ai-sdk/perplexity'), + creatorFunctionName: 'createPerplexity', + supportsImageGeneration: false, + aliases: ['perplexity'] } ] as const diff --git a/src/renderer/src/store/messageBlock.ts b/src/renderer/src/store/messageBlock.ts index cd77c05430..368696a4d0 100644 --- a/src/renderer/src/store/messageBlock.ts +++ b/src/renderer/src/store/messageBlock.ts @@ -1,16 +1,19 @@ import { WebSearchResultBlock } from '@anthropic-ai/sdk/resources' import type { GroundingMetadata } from '@google/genai' import { createEntityAdapter, createSelector, createSlice, type PayloadAction } from '@reduxjs/toolkit' -import { Citation, WebSearchProviderResponse, WebSearchSource } from '@renderer/types' +import { AISDKWebSearchResult, Citation, WebSearchProviderResponse, WebSearchSource } from '@renderer/types' import type { CitationMessageBlock, MessageBlock } from '@renderer/types/newMessage' import { MessageBlockType } from '@renderer/types/newMessage' import type OpenAI from 'openai' import type { RootState } from './index' // 确认 RootState 从 store/index.ts 导出 +// Create a simplified type for the entity adapter to avoid circular type issues +type MessageBlockEntity = MessageBlock + // 1. 创建实体适配器 (Entity Adapter) // 我们使用块的 `id` 作为唯一标识符。 -const messageBlocksAdapter = createEntityAdapter() +const messageBlocksAdapter = createEntityAdapter() // 2. 使用适配器定义初始状态 (Initial State) // 如果需要,可以在规范化实体的旁边添加其他状态属性。 @@ -20,6 +23,7 @@ const initialState = messageBlocksAdapter.getInitialState({ }) // 3. 创建 Slice +// @ts-ignore ignore export const messageBlocksSlice = createSlice({ name: 'messageBlocks', initialState, @@ -76,8 +80,13 @@ export const messageBlocksSelectors = messageBlocksAdapter.getSelectors - blockId ? messageBlocksSelectors.selectById(state, blockId) : undefined // Use adapter selector +const selectBlockEntityById = (state: RootState, blockId: string | undefined): MessageBlock | undefined => { + const entity = blockId ? messageBlocksSelectors.selectById(state, blockId) : undefined + if (!entity) return undefined + + // Convert back to full MessageBlock type + return entity +} // --- Centralized Citation Formatting Logic --- export const formatCitationsFromBlock = (block: CitationMessageBlock | undefined): Citation[] => { @@ -173,13 +182,16 @@ export const formatCitationsFromBlock = (block: CitationMessageBlock | undefined case WebSearchSource.GROK: case WebSearchSource.OPENROUTER: formattedCitations = - (block.response.results as any[])?.map((url, index) => { + (block.response.results as AISDKWebSearchResult[])?.map((result, index) => { + const url = result.url try { - const hostname = new URL(url).hostname + const hostname = new URL(result.url).hostname + const content = result.providerMetadata && result.providerMetadata['openrouter']?.content return { number: index + 1, url, - hostname, + title: result.title || hostname, + content: content as string, showFavicon: true, type: 'websearch' } @@ -218,10 +230,12 @@ export const formatCitationsFromBlock = (block: CitationMessageBlock | undefined break case WebSearchSource.AISDK: formattedCitations = - (block.response.results as any[])?.map((result, index) => ({ + (block.response.results && (block.response.results as AISDKWebSearchResult[]))?.map((result, index) => ({ number: index + 1, url: result.url, - title: result.title, + title: result.title || new URL(result.url).hostname, + showFavicon: true, + type: 'websearch', providerMetadata: result?.providerMetadata })) || [] break diff --git a/src/renderer/src/store/thunk/messageThunk.ts b/src/renderer/src/store/thunk/messageThunk.ts index 45143b3ccc..42195eb8bb 100644 --- a/src/renderer/src/store/thunk/messageThunk.ts +++ b/src/renderer/src/store/thunk/messageThunk.ts @@ -19,6 +19,7 @@ import { resetAssistantMessage } from '@renderer/utils/messageUtils/create' import { getTopicQueue, waitForTopicQueue } from '@renderer/utils/queue' +import { defaultAppHeaders } from '@shared/utils' import { t } from 'i18next' import { isEmpty, throttle } from 'lodash' import { LRUCache } from 'lru-cache' @@ -369,7 +370,8 @@ const fetchAndProcessAssistantResponseImpl = async ( topicId, options: { signal: abortController.signal, - timeout: 30000 + timeout: 30000, + headers: defaultAppHeaders() } }, streamProcessorCallbacks @@ -1073,7 +1075,7 @@ export const cloneMessagesToNewTopicThunk = const oldBlock = state.messageBlocks.entities[oldBlockId] if (oldBlock) { const newBlockId = uuid() - const newBlock: MessageBlock = { + const newBlock = { ...oldBlock, id: newBlockId, messageId: newMsgId // Link block to the NEW message ID diff --git a/src/renderer/src/types/index.ts b/src/renderer/src/types/index.ts index a5fba1040b..9518f1b38a 100644 --- a/src/renderer/src/types/index.ts +++ b/src/renderer/src/types/index.ts @@ -1,3 +1,4 @@ +import type { LanguageModelV2Source } from '@ai-sdk/provider' import type { WebSearchResultBlock } from '@anthropic-ai/sdk/resources' import type { GenerateImagesConfig, GroundingMetadata, PersonGeneration } from '@google/genai' import type OpenAI from 'openai' @@ -726,12 +727,15 @@ export type WebSearchProviderResponse = { results: WebSearchProviderResult[] } +export type AISDKWebSearchResult = Omit, 'sourceType'> + export type WebSearchResults = | WebSearchProviderResponse | GroundingMetadata | OpenAI.Chat.Completions.ChatCompletionMessage.Annotation.URLCitation[] | OpenAI.Responses.ResponseOutputText.URLCitation[] | WebSearchResultBlock[] + | AISDKWebSearchResult[] | any[] export enum WebSearchSource { diff --git a/src/renderer/src/utils/__tests__/linkConverter.test.ts b/src/renderer/src/utils/__tests__/linkConverter.test.ts index 16d2e8aefb..1b813f1a54 100644 --- a/src/renderer/src/utils/__tests__/linkConverter.test.ts +++ b/src/renderer/src/utils/__tests__/linkConverter.test.ts @@ -3,91 +3,12 @@ import { describe, expect, it } from 'vitest' import { cleanLinkCommas, completeLinks, - completionPerplexityLinks, convertLinks, - convertLinksToHunyuan, - convertLinksToOpenRouter, - convertLinksToZhipu, extractUrlsFromMarkdown, flushLinkConverterBuffer } from '../linkConverter' describe('linkConverter', () => { - describe('convertLinksToZhipu', () => { - it('should correctly convert complete [ref_N] format', () => { - const input = '这里有一个参考文献 [ref_1] 和另一个 [ref_2]' - const result = convertLinksToZhipu(input, true) - expect(result).toBe('这里有一个参考文献 [1]() 和另一个 [2]()') - }) - - it('should handle chunked input and preserve incomplete link patterns', () => { - // 第一个块包含未完成的模式 - const chunk1 = '这是第一部分 [ref' - const result1 = convertLinksToZhipu(chunk1, true) - expect(result1).toBe('这是第一部分 ') - - // 第二个块完成该模式 - const chunk2 = '_1] 这是剩下的部分' - const result2 = convertLinksToZhipu(chunk2, false) - expect(result2).toBe('[1]() 这是剩下的部分') - }) - - it('should clear buffer when resetting counter', () => { - // 先进行一次转换不重置 - const input1 = '第一次输入 [ref_1]' - convertLinksToZhipu(input1, false) - - // 然后重置并进行新的转换 - const input2 = '新的输入 [ref_2]' - const result = convertLinksToZhipu(input2, true) - expect(result).toBe('新的输入 [2]()') - }) - }) - - describe('convertLinksToHunyuan', () => { - it('should correctly convert [N](@ref) format to links with URLs', () => { - const webSearch = [{ url: 'https://example.com/1' }, { url: 'https://example.com/2' }] - const input = '这里有单个引用 [1](@ref) 和多个引用 [2](@ref)' - const result = convertLinksToHunyuan(input, webSearch, true) - expect(result).toBe( - '这里有单个引用 [1](https://example.com/1) 和多个引用 [2](https://example.com/2)' - ) - }) - - it('should correctly handle comma-separated multiple references', () => { - const webSearch = [ - { url: 'https://example.com/1' }, - { url: 'https://example.com/2' }, - { url: 'https://example.com/3' } - ] - const input = '这里有多个引用 [1, 2, 3](@ref)' - const result = convertLinksToHunyuan(input, webSearch, true) - expect(result).toBe( - '这里有多个引用 [1](https://example.com/1)[2](https://example.com/2)[3](https://example.com/3)' - ) - }) - - it('should handle non-existent reference indices', () => { - const webSearch = [{ url: 'https://example.com/1' }] - const input = '这里有一个超出范围的引用 [2](@ref)' - const result = convertLinksToHunyuan(input, webSearch, true) - expect(result).toBe('这里有一个超出范围的引用 [2](@ref)') - }) - - it('should handle incomplete reference formats in chunked input', () => { - const webSearch = [{ url: 'https://example.com/1' }] - // 第一个块包含未完成的模式 - const chunk1 = '这是第一部分 [' - const result1 = convertLinksToHunyuan(chunk1, webSearch, true) - expect(result1).toBe('这是第一部分 ') - - // 第二个块完成该模式 - const chunk2 = '1](@ref) 这是剩下的部分' - const result2 = convertLinksToHunyuan(chunk2, webSearch, false) - expect(result2).toBe('[1](https://example.com/1) 这是剩下的部分') - }) - }) - describe('convertLinks', () => { it('should convert number links to numbered links', () => { const input = '参考 [1](https://example.com/1) 和 [2](https://example.com/2)' @@ -226,8 +147,10 @@ describe('linkConverter', () => { it('should handle real links split across small chunks with proper buffering', () => { // 模拟真实链接被分割成小chunks的情况 - 更现实的分割方式 const chunks = [ - 'Please visit [example.com](', // 不完整链接 - 'https://example.com) for details' // 完成链接 + 'Please visit [example.', + 'com](', // 不完整链接' + 'https://exa', + 'mple.com) for details' // 完成链接' ] let accumulatedText = '' @@ -235,14 +158,24 @@ describe('linkConverter', () => { // 第一个chunk:包含不完整链接 [text]( const result1 = convertLinks(chunks[0], true) expect(result1.text).toBe('Please visit ') // 只返回安全部分 - expect(result1.hasBufferedContent).toBe(true) // [example.com]( 被缓冲 + expect(result1.hasBufferedContent).toBe(true) // accumulatedText += result1.text - // 第二个chunk:完成链接 + // 第二个chunk const result2 = convertLinks(chunks[1], false) - expect(result2.text).toBe('[1](https://example.com) for details') // 完整链接 + 剩余文本 - expect(result2.hasBufferedContent).toBe(false) - accumulatedText += result2.text + expect(result2.text).toBe('') + expect(result2.hasBufferedContent).toBe(true) + // 第三个chunk + const result3 = convertLinks(chunks[2], false) + expect(result3.text).toBe('') + expect(result3.hasBufferedContent).toBe(true) + accumulatedText += result3.text + + // 第四个chunk + const result4 = convertLinks(chunks[3], false) + expect(result4.text).toBe('[1](https://example.com) for details') + expect(result4.hasBufferedContent).toBe(false) + accumulatedText += result4.text // 验证最终结果 expect(accumulatedText).toBe('Please visit [1](https://example.com) for details') @@ -293,32 +226,6 @@ describe('linkConverter', () => { }) }) - describe('convertLinksToOpenRouter', () => { - it('should only convert links with domain-like text', () => { - const input = '网站 [example.com](https://example.com) 和 [点击这里](https://other.com)' - const result = convertLinksToOpenRouter(input, true) - expect(result).toBe('网站 [1](https://example.com) 和 [点击这里](https://other.com)') - }) - - it('should use the same counter for duplicate URLs', () => { - const input = '两个相同的链接 [example.com](https://example.com) 和 [example.org](https://example.com)' - const result = convertLinksToOpenRouter(input, true) - expect(result).toBe('两个相同的链接 [1](https://example.com) 和 [1](https://example.com)') - }) - - it('should handle incomplete links in chunked input', () => { - // 第一个块包含未完成的链接 - const chunk1 = '这是域名链接 [' - const result1 = convertLinksToOpenRouter(chunk1, true) - expect(result1).toBe('这是域名链接 ') - - // 第二个块完成链接 - const chunk2 = 'example.com](https://example.com)' - const result2 = convertLinksToOpenRouter(chunk2, false) - expect(result2).toBe('[1](https://example.com)') - }) - }) - describe('completeLinks', () => { it('should complete empty links with webSearch data', () => { const webSearch = [{ link: 'https://example.com/1' }, { link: 'https://example.com/2' }] @@ -383,13 +290,4 @@ describe('linkConverter', () => { expect(result).toBe('[链接1](https://example.com)[链接2](https://other.com)') }) }) - - describe('completionPerplexityLinks', () => { - it('should complete links with webSearch data', () => { - const webSearch = [{ url: 'https://example.com/1' }, { url: 'https://example.com/2' }] - const input = '参考 [1] 和 [2]' - const result = completionPerplexityLinks(input, webSearch) - expect(result).toBe('参考 [1](https://example.com/1) 和 [2](https://example.com/2)') - }) - }) }) diff --git a/src/renderer/src/utils/linkConverter.ts b/src/renderer/src/utils/linkConverter.ts index 220333f027..ccb6ec5004 100644 --- a/src/renderer/src/utils/linkConverter.ts +++ b/src/renderer/src/utils/linkConverter.ts @@ -1,5 +1,3 @@ -import { WebSearchResponse, WebSearchSource } from '@renderer/types' - // Counter for numbering links let linkCounter = 1 // Buffer to hold incomplete link fragments across chunks @@ -17,109 +15,6 @@ function isHost(text: string): boolean { return /^(https?:\/\/)?[\w.-]+\.[a-z]{2,}(\/.*)?$/i.test(text) || /^[\w.-]+\.[a-z]{2,}(\/.*)?$/i.test(text) } -/** - * Converts Markdown links in the text to numbered links based on the rules:s - * [ref_N] -> [N] - * @param {string} text The current chunk of text to process - * @param {boolean} resetCounter Whether to reset the counter and buffer - * @returns {string} Processed text with complete links converted - */ -export function convertLinksToZhipu(text: string, resetCounter: boolean = false): string { - if (resetCounter) { - linkCounter = 1 - buffer = '' - } - - // Append the new text to the buffer - buffer += text - let safePoint = buffer.length - - // Check from the end for potentially incomplete [ref_N] patterns - for (let i = buffer.length - 1; i >= 0; i--) { - if (buffer[i] === '[') { - const substring = buffer.substring(i) - // Check if it's a complete [ref_N] pattern - const match = /^\[ref_\d+\]/.exec(substring) - - if (!match) { - // Potentially incomplete [ref_N] pattern - safePoint = i - break - } - } - } - - // Process the safe part of the buffer - const safeBuffer = buffer.substring(0, safePoint) - buffer = buffer.substring(safePoint) - - // Replace all complete [ref_N] patterns - return safeBuffer.replace(/\[ref_(\d+)\]/g, (_, num) => { - return `[${num}]()` - }) -} - -/** - * Converts Markdown links in the text to numbered links based on the rules: - * [N](@ref) -> [N]() - * [N,M,...](@ref) -> [N]() [M]() ... - * @param {string} text The current chunk of text to process - * @param {any[]} webSearch webSearch results - * @param {boolean} resetCounter Whether to reset the counter and buffer - * @returns {string} Processed text with complete links converted - */ -export function convertLinksToHunyuan(text: string, webSearch: any[], resetCounter: boolean = false): string { - if (resetCounter) { - linkCounter = 1 - buffer = '' - } - - buffer += text - let safePoint = buffer.length - - // Check from the end for potentially incomplete patterns - for (let i = buffer.length - 1; i >= 0; i--) { - if (buffer[i] === '[') { - const substring = buffer.substring(i) - // Check if it's a complete pattern - handles both [N](@ref) and [N,M,...](@ref) - const match = /^\[[\d,\s]+\]\(@ref\)/.exec(substring) - - if (!match) { - // Potentially incomplete pattern - safePoint = i - break - } - } - } - - // Process the safe part of the buffer - const safeBuffer = buffer.substring(0, safePoint) - buffer = buffer.substring(safePoint) - - // Replace all complete patterns - return safeBuffer.replace(/\[([\d,\s]+)\]\(@ref\)/g, (_, numbers) => { - // Split the numbers string into individual numbers - const numArray = numbers - .split(',') - .map((num) => parseInt(num.trim())) - .filter((num) => !isNaN(num)) - - // Generate separate superscript links for each number - const links = numArray.map((num) => { - const index = num - 1 - // Check if the index is valid in webSearch array - if (index >= 0 && index < webSearch.length && webSearch[index]?.url) { - return `[${num}](${webSearch[index].url})` - } - // If no matching URL found, keep the original reference format for this number - return `[${num}](@ref)` - }) - - // Join the separate links with spaces - return links.join('') - }) -} - /** * Converts Markdown links in the text to numbered links based on the rules: * 1. ([host](url)) -> [cnt](url) @@ -171,13 +66,21 @@ export function convertLinks( break } - // 检查是否是完整的链接但需要验证 + // 检查是否是完整的链接 const completeLink = /^\[([^\]]+)\]\(([^)]+)\)/.test(substring) if (completeLink) { // 如果是完整链接,继续处理,不设置safePoint continue } + // 检查是否是不完整的 [ 开始但还没有闭合的 ] + // 例如 [example. 这种情况 + const incompleteBracket = /^\[[^\]]*$/.test(substring) + if (incompleteBracket) { + safePoint = i + break + } + // 如果不是潜在的链接格式,继续检查 } } @@ -263,65 +166,6 @@ export function convertLinks( } } -/** - * Converts Markdown links in the text to numbered links based on the rules: - * 1. [host](url) -> [cnt](url) - * - * @param {string} text The current chunk of text to process - * @param {boolean} resetCounter Whether to reset the counter and buffer - * @returns {string} Processed text with complete links converted - */ -export function convertLinksToOpenRouter(text: string, resetCounter = false): string { - if (resetCounter) { - linkCounter = 1 - buffer = '' - urlToCounterMap = new Map() - } - - // Append the new text to the buffer - buffer += text - - // Find a safe point to process - let safePoint = buffer.length - - // Check for potentially incomplete link patterns from the end - for (let i = buffer.length - 1; i >= 0; i--) { - if (buffer[i] === '[') { - const substring = buffer.substring(i) - const match = /^\[([^\]]+)\]\(([^)]+)\)/.exec(substring) - - if (!match) { - safePoint = i - break - } - } - } - - // Extract the part of the buffer that we can safely process - const safeBuffer = buffer.substring(0, safePoint) - buffer = buffer.substring(safePoint) - - // Process the safe buffer to handle complete links - const result = safeBuffer.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, text, url) => { - // Only convert link if the text looks like a host/URL - if (isHost(text)) { - // Check if this URL has been seen before - let counter: number - if (urlToCounterMap.has(url)) { - counter = urlToCounterMap.get(url)! - } else { - counter = linkCounter++ - urlToCounterMap.set(url, counter) - } - return `[${counter}](${url})` - } - // Keep original link format if the text doesn't look like a host - return match - }) - - return result -} - /** * 根据webSearch结果补全链接,将[num]()转换为[num](webSearch[num-1].url) * @param {string} text 原始文本 @@ -341,25 +185,6 @@ export function completeLinks(text: string, webSearch: any[]): string { }) } -/** - * 根据webSearch结果补全链接,将[num]转换为[num](webSearch[num-1].url) - * @param {string} text 原始文本 - * @param {any[]} webSearch webSearch结果 - * @returns {string} 补全后的文本 - */ -export function completionPerplexityLinks(text: string, webSearch: any[]): string { - return text.replace(/\[(\d+)\]/g, (match, numStr) => { - const num = parseInt(numStr) - const index = num - 1 - // 检查 webSearch 数组中是否存在对应的 URL - if (index >= 0 && index < webSearch.length && webSearch[index].url) { - return `[${num}](${webSearch[index].url})` - } - // 如果没有找到对应的 URL,保持原样 - return match - }) -} - /** * 从Markdown文本中提取所有URL * 支持以下格式: @@ -412,118 +237,6 @@ export function cleanLinkCommas(text: string): string { return text.replace(/\]\(([^)]+)\)\s*,\s*\[/g, ']($1)[') } -/** - * 从文本中识别各种格式的Web搜索引用占位符 - * 支持的格式包括:[1], [ref_1], [1](@ref), [1,2,3](@ref) 等 - * @param {string} text 要分析的文本 - * @returns {Array} 识别到的引用信息数组 - */ -export function extractWebSearchReferences(text: string): Array<{ - match: string - placeholder: string - numbers: number[] - startIndex: number - endIndex: number -}> { - const references: Array<{ - match: string - placeholder: string - numbers: number[] - startIndex: number - endIndex: number - }> = [] - - // 匹配各种引用格式的正则表达式 - const patterns = [ - // [1], [2], [3] - 简单数字引用 - { regex: /\[(\d+)\]/g, type: 'simple' }, - // [ref_1], [ref_2] - Zhipu格式 - { regex: /\[ref_(\d+)\]/g, type: 'zhipu' }, - // [1](@ref), [2](@ref) - Hunyuan单个引用格式 - { regex: /\[(\d+)\]\(@ref\)/g, type: 'hunyuan_single' }, - // [1,2,3](@ref) - Hunyuan多个引用格式 - { regex: /\[([\d,\s]+)\]\(@ref\)/g, type: 'hunyuan_multiple' } - ] - - patterns.forEach(({ regex, type }) => { - let match - while ((match = regex.exec(text)) !== null) { - let numbers: number[] = [] - - if (type === 'hunyuan_multiple') { - // 解析逗号分隔的数字 - numbers = match[1] - .split(',') - .map((num) => parseInt(num.trim())) - .filter((num) => !isNaN(num)) - } else { - // 单个数字 - numbers = [parseInt(match[1])] - } - - references.push({ - match: match[0], - placeholder: match[0], - numbers: numbers, - startIndex: match.index!, - endIndex: match.index! + match[0].length - }) - } - }) - - // 按位置排序 - return references.sort((a, b) => a.startIndex - b.startIndex) -} - -/** - * 智能链接转换器 - 根据文本中的引用模式和Web搜索结果自动选择合适的转换策略 - * @param {string} text 当前文本块 - * @param {any[]} webSearchResults Web搜索结果数组 - * @param {string} providerType Provider类型 ('openai', 'zhipu', 'hunyuan', 'openrouter', etc.) - * @param {boolean} resetCounter 是否重置计数器 - * @returns {{text: string, hasBufferedContent: boolean}} 转换后的文本和是否有内容被缓冲 - */ -export function smartLinkConverter( - text: string, - providerType: string = 'openai', - resetCounter: boolean = false, - webSearchResults?: WebSearchResponse -): { text: string; hasBufferedContent: boolean } { - if (webSearchResults) { - const webSearch = webSearchResults.results - switch (webSearchResults.source) { - case WebSearchSource.PERPLEXITY: { - text = completionPerplexityLinks(text, webSearch as any[]) - break - } - } - } - // 检测文本中的引用模式 - const references = extractWebSearchReferences(text) - - if (references.length === 0) { - // 如果没有特定的引用模式,使用通用转换 - return convertLinks(text, resetCounter) - } - - // 根据检测到的引用模式选择合适的转换器 - const hasZhipuPattern = references.some((ref) => ref.placeholder.includes('ref_')) - - if (hasZhipuPattern) { - return { - text: convertLinksToZhipu(text, resetCounter), - hasBufferedContent: false - } - } else if (providerType === 'openrouter') { - return { - text: convertLinksToOpenRouter(text, resetCounter), - hasBufferedContent: false - } - } else { - return convertLinks(text, resetCounter) - } -} - /** * 强制返回buffer中的所有内容,用于流结束时清空缓冲区 * @returns {string} buffer中剩余的所有内容 diff --git a/yarn.lock b/yarn.lock index eb0cde8a17..342c305c3d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -239,6 +239,18 @@ __metadata: languageName: node linkType: hard +"@ai-sdk/perplexity@npm:^2.0.8": + version: 2.0.8 + resolution: "@ai-sdk/perplexity@npm:2.0.8" + dependencies: + "@ai-sdk/provider": "npm:2.0.0" + "@ai-sdk/provider-utils": "npm:3.0.8" + peerDependencies: + zod: ^3.25.76 || ^4 + checksum: 10c0/acfd6c09c4c0ef5af7eeec6e8bc20b90b24d1d3fc2bc8ee9de4e40770fc0c17ca2c8db8f0248ff07264b71e5aa65f64d37a165db2f43fee84c1b3513cb97983c + languageName: node + linkType: hard + "@ai-sdk/provider-utils@npm:3.0.3": version: 3.0.3 resolution: "@ai-sdk/provider-utils@npm:3.0.3" @@ -13023,6 +13035,7 @@ __metadata: "@ai-sdk/amazon-bedrock": "npm:^3.0.0" "@ai-sdk/google-vertex": "npm:^3.0.25" "@ai-sdk/mistral": "npm:^2.0.0" + "@ai-sdk/perplexity": "npm:^2.0.8" "@ant-design/v5-patch-for-react-19": "npm:^1.0.3" "@anthropic-ai/sdk": "npm:^0.41.0" "@anthropic-ai/vertex-sdk": "patch:@anthropic-ai/vertex-sdk@npm%3A0.11.4#~/.yarn/patches/@anthropic-ai-vertex-sdk-npm-0.11.4-c19cb41edb.patch"