From 134ea51b0f0fdac14e9688f948b914a50297c84c Mon Sep 17 00:00:00 2001 From: SuYao Date: Fri, 4 Jul 2025 17:03:45 +0800 Subject: [PATCH] fix: websearch block and citation formatting (#7776) * feat: enhance citation handling for Perplexity web search results - Implemented formatting for Perplexity citations in MainTextBlock, including data-citation attributes. - Updated citation processing in message store and thunk to support new citation structure. - Added utility functions for link completion based on web search results. - Enhanced tests to verify correct handling of Perplexity citations and links. * refactor: streamline chunk processing in OpenAIApiClient - Replaced single choice handling with a loop to process all choices in the chunk. - Improved handling of content sources, ensuring fallback mechanisms are in place for delta and message fields. - Enhanced tool call processing to accommodate missing function names and arguments. - Maintained existing functionality for web search data and reasoning content processing. * fix: improve citation handling and web search integration - Enhanced citation formatting to support legacy data compatibility in messageBlock.ts. - Updated messageThunk.ts to manage main text block references and citation updates more effectively. - Removed unnecessary web search flag and streamlined block processing logic. * fix: improve citation transforms to skip code blocks - Add withCitationTags for better code structure - Add tests - Remove outdated code - The Citation type in @renderer/types/index.ts is not referenced anywhere, so removed - Move the actual Citation type from @renderer/pages/home/Messages/CitationsList.tsx to @renderer/types/index.ts - Allow text selecting in tooltip * test: update tests * refactor(messageThunk): streamline citation handling in response processing - Removed redundant citation block source retrieval during text chunk processing. - Updated citation references handling to ensure proper inclusion only when available. - Simplified the logic for managing citation references in both streaming and final text updates. * refactor: simplify determineCitationSource for backward compatibility --------- Co-authored-by: one --- .../clients/anthropic/AnthropicAPIClient.ts | 26 +- .../aiCore/clients/openai/OpenAIApiClient.ts | 136 ++--- .../clients/openai/OpenAIResponseAPIClient.ts | 4 + .../middleware/core/TextChunkMiddleware.ts | 56 +- .../middleware/core/ThinkChunkMiddleware.ts | 10 + .../middleware/core/WebSearchMiddleware.ts | 7 +- .../pages/home/Markdown/CitationTooltip.tsx | 2 +- .../CitationTooltip.test.tsx.snap | 4 +- .../home/Messages/Blocks/MainTextBlock.tsx | 114 +--- .../Blocks/__tests__/MainTextBlock.test.tsx | 162 ++--- .../src/pages/home/Messages/CitationsList.tsx | 12 +- src/renderer/src/services/ApiService.ts | 3 - .../src/services/StreamProcessingService.ts | 1 + src/renderer/src/store/messageBlock.ts | 15 +- src/renderer/src/store/thunk/messageThunk.ts | 76 ++- src/renderer/src/types/chunk.ts | 1 - src/renderer/src/types/index.ts | 5 +- .../src/utils/__tests__/citation.test.ts | 562 ++++++++++++++++++ .../src/utils/__tests__/linkConverter.test.ts | 17 + src/renderer/src/utils/citation.ts | 210 +++++++ src/renderer/src/utils/formats.ts | 28 - src/renderer/src/utils/linkConverter.ts | 43 +- 22 files changed, 1156 insertions(+), 338 deletions(-) create mode 100644 src/renderer/src/utils/__tests__/citation.test.ts create mode 100644 src/renderer/src/utils/citation.ts diff --git a/src/renderer/src/aiCore/clients/anthropic/AnthropicAPIClient.ts b/src/renderer/src/aiCore/clients/anthropic/AnthropicAPIClient.ts index ebe76d8152..c946f114fe 100644 --- a/src/renderer/src/aiCore/clients/anthropic/AnthropicAPIClient.ts +++ b/src/renderer/src/aiCore/clients/anthropic/AnthropicAPIClient.ts @@ -49,7 +49,9 @@ import { LLMWebSearchCompleteChunk, LLMWebSearchInProgressChunk, MCPToolCreatedChunk, + TextCompleteChunk, TextDeltaChunk, + ThinkingCompleteChunk, ThinkingDeltaChunk } from '@renderer/types/chunk' import { type Message } from '@renderer/types/newMessage' @@ -517,7 +519,7 @@ export class AnthropicAPIClient extends BaseApiClient< return () => { let accumulatedJson = '' const toolCalls: Record = {} - + const ChunkIdTypeMap: Record = {} return { async transform(rawChunk: AnthropicSdkRawChunk, controller: TransformStreamDefaultController) { switch (rawChunk.type) { @@ -612,6 +614,19 @@ export class AnthropicAPIClient extends BaseApiClient< toolCalls[rawChunk.index] = contentBlock break } + case 'text': { + if (!ChunkIdTypeMap[rawChunk.index]) { + ChunkIdTypeMap[rawChunk.index] = ChunkType.TEXT_DELTA // 用textdelta代表文本块 + } + break + } + case 'thinking': + case 'redacted_thinking': { + if (!ChunkIdTypeMap[rawChunk.index]) { + ChunkIdTypeMap[rawChunk.index] = ChunkType.THINKING_DELTA // 用thinkingdelta代表思考块 + } + break + } } break } @@ -646,6 +661,15 @@ export class AnthropicAPIClient extends BaseApiClient< break } case 'content_block_stop': { + if (ChunkIdTypeMap[rawChunk.index] === ChunkType.TEXT_DELTA) { + controller.enqueue({ + type: ChunkType.TEXT_COMPLETE + } as TextCompleteChunk) + } else if (ChunkIdTypeMap[rawChunk.index] === ChunkType.THINKING_DELTA) { + controller.enqueue({ + type: ChunkType.THINKING_COMPLETE + } as ThinkingCompleteChunk) + } const toolCall = toolCalls[rawChunk.index] if (toolCall) { try { diff --git a/src/renderer/src/aiCore/clients/openai/OpenAIApiClient.ts b/src/renderer/src/aiCore/clients/openai/OpenAIApiClient.ts index 65e9cc67c4..c1994dcb95 100644 --- a/src/renderer/src/aiCore/clients/openai/OpenAIApiClient.ts +++ b/src/renderer/src/aiCore/clients/openai/OpenAIApiClient.ts @@ -564,11 +564,11 @@ export class OpenAIAPIClient extends OpenAIBaseClient< // Perplexity citations // @ts-ignore - citations may not be in standard type definitions - if (context.provider?.id === 'perplexity' && chunk.citations && chunk.citations.length > 0) { + if (context.provider?.id === 'perplexity' && chunk.search_results && chunk.search_results.length > 0) { hasBeenCollectedWebSearch = true return { // @ts-ignore - citations may not be in standard type definitions - results: chunk.citations, + results: chunk.search_results, source: WebSearchSource.PERPLEXITY } } @@ -672,74 +672,21 @@ export class OpenAIAPIClient extends OpenAIBaseClient< // 处理chunk if ('choices' in chunk && chunk.choices && chunk.choices.length > 0) { - const choice = chunk.choices[0] + for (const choice of chunk.choices) { + if (!choice) continue - if (!choice) return - - // 对于流式响应,使用 delta;对于非流式响应,使用 message。 - // 然而某些 OpenAI 兼容平台在非流式请求时会错误地返回一个空对象的 delta 字段。 - // 如果 delta 为空对象,应当忽略它并回退到 message,避免造成内容缺失。 - let contentSource: OpenAISdkRawContentSource | null = null - if ('delta' in choice && choice.delta && Object.keys(choice.delta).length > 0) { - contentSource = choice.delta - } else if ('message' in choice) { - contentSource = choice.message - } - - if (!contentSource) return - - const webSearchData = collectWebSearchData(chunk, contentSource, context) - if (webSearchData) { - controller.enqueue({ - type: ChunkType.LLM_WEB_SEARCH_COMPLETE, - llm_web_search: webSearchData - }) - } - - // 处理推理内容 (e.g. from OpenRouter DeepSeek-R1) - // @ts-ignore - reasoning_content is not in standard OpenAI types but some providers use it - const reasoningText = contentSource.reasoning_content || contentSource.reasoning - if (reasoningText) { - controller.enqueue({ - type: ChunkType.THINKING_DELTA, - text: reasoningText - }) - } - - // 处理文本内容 - if (contentSource.content) { - controller.enqueue({ - type: ChunkType.TEXT_DELTA, - text: contentSource.content - }) - } - - // 处理工具调用 - if (contentSource.tool_calls) { - for (const toolCall of contentSource.tool_calls) { - if ('index' in toolCall) { - const { id, index, function: fun } = toolCall - if (fun?.name) { - toolCalls[index] = { - id: id || '', - function: { - name: fun.name, - arguments: fun.arguments || '' - }, - type: 'function' - } - } else if (fun?.arguments) { - toolCalls[index].function.arguments += fun.arguments - } - } else { - toolCalls.push(toolCall) - } + // 对于流式响应,使用 delta;对于非流式响应,使用 message。 + // 然而某些 OpenAI 兼容平台在非流式请求时会错误地返回一个空对象的 delta 字段。 + // 如果 delta 为空对象,应当忽略它并回退到 message,避免造成内容缺失。 + let contentSource: OpenAISdkRawContentSource | null = null + if ('delta' in choice && choice.delta && Object.keys(choice.delta).length > 0) { + contentSource = choice.delta + } else if ('message' in choice) { + contentSource = choice.message } - } - // 处理finish_reason,发送流结束信号 - if ('finish_reason' in choice && choice.finish_reason) { - Logger.debug(`[OpenAIApiClient] Stream finished with reason: ${choice.finish_reason}`) + if (!contentSource) continue + const webSearchData = collectWebSearchData(chunk, contentSource, context) if (webSearchData) { controller.enqueue({ @@ -747,7 +694,60 @@ export class OpenAIAPIClient extends OpenAIBaseClient< llm_web_search: webSearchData }) } - emitCompletionSignals(controller) + + // 处理推理内容 (e.g. from OpenRouter DeepSeek-R1) + // @ts-ignore - reasoning_content is not in standard OpenAI types but some providers use it + const reasoningText = contentSource.reasoning_content || contentSource.reasoning + if (reasoningText) { + controller.enqueue({ + type: ChunkType.THINKING_DELTA, + text: reasoningText + }) + } + + // 处理文本内容 + if (contentSource.content) { + controller.enqueue({ + type: ChunkType.TEXT_DELTA, + text: contentSource.content + }) + } + + // 处理工具调用 + if (contentSource.tool_calls) { + for (const toolCall of contentSource.tool_calls) { + if ('index' in toolCall) { + const { id, index, function: fun } = toolCall + if (fun?.name) { + toolCalls[index] = { + id: id || '', + function: { + name: fun.name, + arguments: fun.arguments || '' + }, + type: 'function' + } + } else if (fun?.arguments) { + toolCalls[index].function.arguments += fun.arguments + } + } else { + toolCalls.push(toolCall) + } + } + } + + // 处理finish_reason,发送流结束信号 + if ('finish_reason' in choice && choice.finish_reason) { + Logger.debug(`[OpenAIApiClient] Stream finished with reason: ${choice.finish_reason}`) + const webSearchData = collectWebSearchData(chunk, contentSource, context) + if (webSearchData) { + controller.enqueue({ + type: ChunkType.LLM_WEB_SEARCH_COMPLETE, + llm_web_search: webSearchData + }) + } + emitCompletionSignals(controller) + } } } }, diff --git a/src/renderer/src/aiCore/clients/openai/OpenAIResponseAPIClient.ts b/src/renderer/src/aiCore/clients/openai/OpenAIResponseAPIClient.ts index 99e40ed818..a5bfff8263 100644 --- a/src/renderer/src/aiCore/clients/openai/OpenAIResponseAPIClient.ts +++ b/src/renderer/src/aiCore/clients/openai/OpenAIResponseAPIClient.ts @@ -492,6 +492,10 @@ export class OpenAIResponseAPIClient extends OpenAIBaseClient< case 'response.output_item.added': if (chunk.item.type === 'function_call') { outputItems.push(chunk.item) + } else if (chunk.item.type === 'web_search_call') { + controller.enqueue({ + type: ChunkType.LLM_WEB_SEARCH_IN_PROGRESS + }) } break case 'response.reasoning_summary_part.added': diff --git a/src/renderer/src/aiCore/middleware/core/TextChunkMiddleware.ts b/src/renderer/src/aiCore/middleware/core/TextChunkMiddleware.ts index 2a3255356f..3905d52058 100644 --- a/src/renderer/src/aiCore/middleware/core/TextChunkMiddleware.ts +++ b/src/renderer/src/aiCore/middleware/core/TextChunkMiddleware.ts @@ -1,5 +1,5 @@ import Logger from '@renderer/config/logger' -import { ChunkType, TextDeltaChunk } from '@renderer/types/chunk' +import { ChunkType, TextCompleteChunk, TextDeltaChunk } from '@renderer/types/chunk' import { CompletionsParams, CompletionsResult, GenericChunk } from '../schemas' import { CompletionsContext, CompletionsMiddleware } from '../types' @@ -38,7 +38,7 @@ export const TextChunkMiddleware: CompletionsMiddleware = // 用于跨chunk的状态管理 let accumulatedTextContent = '' - let hasEnqueue = false + let hasTextCompleteEventEnqueue = false const enhancedTextStream = resultFromUpstream.pipeThrough( new TransformStream({ transform(chunk: GenericChunk, controller) { @@ -53,30 +53,44 @@ export const TextChunkMiddleware: CompletionsMiddleware = // 创建新的chunk,包含处理后的文本 controller.enqueue(chunk) - } else if (accumulatedTextContent) { - if (chunk.type !== ChunkType.LLM_RESPONSE_COMPLETE) { - controller.enqueue(chunk) - hasEnqueue = true - } - const finalText = accumulatedTextContent - ctx._internal.customState!.accumulatedText = finalText - if (ctx._internal.toolProcessingState && !ctx._internal.toolProcessingState?.output) { - ctx._internal.toolProcessingState.output = finalText - } - - // 处理 onResponse 回调 - 发送最终完整文本 - if (params.onResponse) { - params.onResponse(finalText, true) - } - + } else if (chunk.type === ChunkType.TEXT_COMPLETE) { + const textChunk = chunk as TextCompleteChunk controller.enqueue({ - type: ChunkType.TEXT_COMPLETE, - text: finalText + ...textChunk, + text: accumulatedTextContent }) + if (params.onResponse) { + params.onResponse(accumulatedTextContent, true) + } + hasTextCompleteEventEnqueue = true accumulatedTextContent = '' - if (!hasEnqueue) { + } else if (accumulatedTextContent && !hasTextCompleteEventEnqueue) { + if (chunk.type === ChunkType.LLM_RESPONSE_COMPLETE) { + const finalText = accumulatedTextContent + ctx._internal.customState!.accumulatedText = finalText + if (ctx._internal.toolProcessingState && !ctx._internal.toolProcessingState?.output) { + ctx._internal.toolProcessingState.output = finalText + } + + // 处理 onResponse 回调 - 发送最终完整文本 + if (params.onResponse) { + params.onResponse(finalText, true) + } + + controller.enqueue({ + type: ChunkType.TEXT_COMPLETE, + text: finalText + }) + controller.enqueue(chunk) + } else { + controller.enqueue({ + type: ChunkType.TEXT_COMPLETE, + text: accumulatedTextContent + }) controller.enqueue(chunk) } + hasTextCompleteEventEnqueue = true + accumulatedTextContent = '' } else { // 其他类型的chunk直接传递 controller.enqueue(chunk) diff --git a/src/renderer/src/aiCore/middleware/core/ThinkChunkMiddleware.ts b/src/renderer/src/aiCore/middleware/core/ThinkChunkMiddleware.ts index b0df8313a5..dccdde7f10 100644 --- a/src/renderer/src/aiCore/middleware/core/ThinkChunkMiddleware.ts +++ b/src/renderer/src/aiCore/middleware/core/ThinkChunkMiddleware.ts @@ -65,6 +65,16 @@ export const ThinkChunkMiddleware: CompletionsMiddleware = thinking_millsec: thinkingStartTime > 0 ? Date.now() - thinkingStartTime : 0 } controller.enqueue(enhancedChunk) + } else if (chunk.type === ChunkType.THINKING_COMPLETE) { + const thinkingCompleteChunk = chunk as ThinkingCompleteChunk + controller.enqueue({ + ...thinkingCompleteChunk, + text: accumulatedThinkingContent, + thinking_millsec: thinkingStartTime > 0 ? Date.now() - thinkingStartTime : 0 + }) + hasThinkingContent = false + accumulatedThinkingContent = '' + thinkingStartTime = 0 } else if (hasThinkingContent && thinkingStartTime > 0) { // 收到任何非THINKING_DELTA的chunk时,如果有累积的思考内容,生成THINKING_COMPLETE const thinkingCompleteChunk: ThinkingCompleteChunk = { diff --git a/src/renderer/src/aiCore/middleware/core/WebSearchMiddleware.ts b/src/renderer/src/aiCore/middleware/core/WebSearchMiddleware.ts index 70915abffa..d4c8f71eff 100644 --- a/src/renderer/src/aiCore/middleware/core/WebSearchMiddleware.ts +++ b/src/renderer/src/aiCore/middleware/core/WebSearchMiddleware.ts @@ -42,7 +42,12 @@ export const WebSearchMiddleware: CompletionsMiddleware = const providerType = model.provider || 'openai' // 使用当前可用的Web搜索结果进行链接转换 const text = chunk.text - const result = smartLinkConverter(text, providerType, isFirstChunk) + const result = smartLinkConverter( + text, + providerType, + isFirstChunk, + ctx._internal.webSearchState!.results + ) if (isFirstChunk) { isFirstChunk = false } diff --git a/src/renderer/src/pages/home/Markdown/CitationTooltip.tsx b/src/renderer/src/pages/home/Markdown/CitationTooltip.tsx index 6041b562af..febbd3264f 100644 --- a/src/renderer/src/pages/home/Markdown/CitationTooltip.tsx +++ b/src/renderer/src/pages/home/Markdown/CitationTooltip.tsx @@ -32,7 +32,7 @@ const CitationTooltip: React.FC = ({ children, citation }) // 自定义悬浮卡片内容 const tooltipContent = useMemo( () => ( -
+
diff --git a/src/renderer/src/pages/home/Markdown/__tests__/__snapshots__/CitationTooltip.test.tsx.snap b/src/renderer/src/pages/home/Markdown/__tests__/__snapshots__/CitationTooltip.test.tsx.snap index e9c6def351..f11d52a833 100644 --- a/src/renderer/src/pages/home/Markdown/__tests__/__snapshots__/CitationTooltip.test.tsx.snap +++ b/src/renderer/src/pages/home/Markdown/__tests__/__snapshots__/CitationTooltip.test.tsx.snap @@ -58,7 +58,9 @@ exports[`CitationTooltip > basic rendering > should match snapshot 1`] = `
-
+
= ({ block, citationBlockId, role, mentions const rawCitations = useSelector((state: RootState) => selectFormattedCitationsByBlockId(state, citationBlockId)) - const formattedCitations = useMemo(() => { - return rawCitations.map((citation) => ({ - ...citation, - content: citation.content ? cleanMarkdownContent(citation.content) : citation.content - })) - }, [rawCitations]) - const processedContent = useMemo(() => { - let content = block.content - // Update condition to use citationBlockId - if (!block.citationReferences?.length || !citationBlockId || formattedCitations.length === 0) { - return content + if (!block.citationReferences?.length || !citationBlockId || rawCitations.length === 0) { + return block.content } - switch (block.citationReferences[0].citationBlockSource) { - case WebSearchSource.OPENAI: - case WebSearchSource.OPENAI_RESPONSE: { - formattedCitations.forEach((citation) => { - const citationNum = citation.number - const supData = { - id: citationNum, - url: citation.url, - title: citation.title || citation.hostname || '', - content: citation.content?.substring(0, 200) - } - const citationJson = encodeHTML(JSON.stringify(supData)) + // 确定最适合的 source + const sourceType = determineCitationSource(block.citationReferences) - // Handle[N](url) - const preFormattedRegex = new RegExp(`\\[${citationNum}\\]\\(.*?\\)`, 'g') - - const citationTag = `[${citationNum}](${citation.url})` - - content = content.replace(preFormattedRegex, citationTag) - }) - break - } - case WebSearchSource.GEMINI: { - // First pass: Add basic citation marks using metadata - let processedContent = content - const firstCitation = formattedCitations[0] - if (firstCitation?.metadata) { - firstCitation.metadata.forEach((support: GroundingSupport) => { - const citationNums = support.groundingChunkIndices! - - if (support.segment) { - const text = support.segment.text! - // 生成引用标记 - const basicTag = citationNums - .map((citationNum) => { - const citation = formattedCitations.find((c) => c.number === citationNum + 1) - return citation ? `[${citationNum + 1}](${citation.url})` : '' - }) - .join('') - - // 在文本后面添加引用标记,而不是替换 - if (text && basicTag) { - processedContent = processedContent.replace(text, `${text}${basicTag}`) - } - } - }) - content = processedContent - } - // Second pass: Replace basic citations with full citation data - formattedCitations.forEach((citation) => { - const citationNum = citation.number - const supData = { - id: citationNum, - url: citation.url, - title: citation.title || citation.hostname || '', - content: citation.content?.substring(0, 200) - } - const citationJson = encodeHTML(JSON.stringify(supData)) - - // Replace basic citation with full citation including data - const basicCitationRegex = new RegExp(`\\[${citationNum}\\]\\(${citation.url}\\)`, 'g') - const fullCitationTag = `[${citationNum}](${citation.url})` - content = content.replace(basicCitationRegex, fullCitationTag) - }) - break - } - default: { - // FIXME:性能问题,需要优化 - // Replace all citation numbers and pre-formatted links with formatted citations - formattedCitations.forEach((citation) => { - const citationNum = citation.number - const supData = { - id: citationNum, - url: citation.url, - title: citation.title || citation.hostname || '', - content: citation.content?.substring(0, 200) - } - const isLink = citation.url.startsWith('http') - const citationJson = encodeHTML(JSON.stringify(supData)) - - // Handle both plain references [N] and pre-formatted links [N](url) - const plainRefRegex = new RegExp(`\\[${citationNum}\\]`, 'g') - - const supTag = `${citationNum}` - const citationTag = isLink ? `[${supTag}](${citation.url})` : supTag - - content = content.replace(plainRefRegex, citationTag) - }) - } - } - - return content - }, [block.content, block.citationReferences, citationBlockId, formattedCitations]) + return withCitationTags(block.content, rawCitations, sourceType) + }, [block.content, block.citationReferences, citationBlockId, rawCitations]) const ignoreToolUse = useMemo(() => { return processedContent.replace(toolUseRegex, '') diff --git a/src/renderer/src/pages/home/Messages/Blocks/__tests__/MainTextBlock.test.tsx b/src/renderer/src/pages/home/Messages/Blocks/__tests__/MainTextBlock.test.tsx index e2badf156c..551e0d9371 100644 --- a/src/renderer/src/pages/home/Messages/Blocks/__tests__/MainTextBlock.test.tsx +++ b/src/renderer/src/pages/home/Messages/Blocks/__tests__/MainTextBlock.test.tsx @@ -48,6 +48,28 @@ vi.mock('@renderer/utils/formats', () => ({ encodeHTML: vi.fn((content: string) => content.replace(/"/g, '"')) })) +// Mock citation utilities +vi.mock('@renderer/utils/citation', () => ({ + withCitationTags: vi.fn((content: string, citations: any[]) => { + // Simple mock implementation that simulates citation processing + if (citations.length > 0) { + return `${content} [processed-citations]` + } + return content + }), + determineCitationSource: vi.fn((citationReferences: any[], citationBlock?: any) => { + // Mock implementation that returns the first valid source from citationReferences + if (citationBlock?.response?.source) { + return citationBlock.response.source + } + if (citationReferences?.length) { + const validReference = citationReferences.find((ref) => ref.citationBlockSource) + return validReference?.citationBlockSource + } + return undefined + }) +})) + // Mock services vi.mock('@renderer/services/ModelService', () => ({ getModelUniqId: vi.fn() @@ -66,7 +88,8 @@ vi.mock('@renderer/pages/home/Markdown/Markdown', () => ({ describe('MainTextBlock', () => { // Get references to mocked modules let mockGetModelUniqId: any - let mockCleanMarkdownContent: any + let mockWithCitationTags: any + let mockDetermineCitationSource: any // Create a mock store for Provider const mockStore = configureStore({ @@ -80,9 +103,10 @@ describe('MainTextBlock', () => { // Get the mocked functions const { getModelUniqId } = await import('@renderer/services/ModelService') - const { cleanMarkdownContent } = await import('@renderer/utils/formats') + const { withCitationTags, determineCitationSource } = await import('@renderer/utils/citation') mockGetModelUniqId = getModelUniqId as any - mockCleanMarkdownContent = cleanMarkdownContent as any + mockWithCitationTags = withCitationTags as any + mockDetermineCitationSource = determineCitationSource as any // Default mock implementations mockUseSettings.mockReturnValue({ renderInputMessageAsMarkdown: false }) @@ -283,8 +307,16 @@ text after`, }) it('should process content through format utilities', () => { - const block = createMainTextBlock({ content: 'Content to process' }) - mockUseSelector.mockReturnValue([{ id: '1', content: 'Citation content', number: 1 }]) + const block = createMainTextBlock({ + content: 'Content to process', + citationReferences: [{ citationBlockSource: 'DEFAULT' as any }] + }) + const mockCitations = [{ id: '1', content: 'Citation content', number: 1 }] + + // Mock the useSelector calls - first call for citations, second call for citationBlock + mockUseSelector + .mockReturnValueOnce(mockCitations) // selectFormattedCitationsByBlockId + .mockReturnValueOnce(undefined) // messageBlocksSelectors.selectById renderMainTextBlock({ block, @@ -292,8 +324,14 @@ text after`, citationBlockId: 'test-citations' }) - // Verify utility functions are called - expect(mockCleanMarkdownContent).toHaveBeenCalled() + // Verify determineCitationSource was called with correct parameters + expect(mockDetermineCitationSource).toHaveBeenCalledWith(block.citationReferences) + + // Verify citation processing was called with correct parameters + expect(mockWithCitationTags).toHaveBeenCalledWith('Content to process', mockCitations, 'DEFAULT') + + // Verify the processed content is rendered + expect(screen.getByText('Markdown: Content to process [processed-citations]')).toBeInTheDocument() }) }) @@ -308,7 +346,7 @@ text after`, expect(mockUseSelector).toHaveBeenCalled() }) - it('should integrate with citation system when citations exist', () => { + it('should integrate with citation processing when all conditions are met', () => { const block = createMainTextBlock({ content: 'Content with citation [1]', citationReferences: [{ citationBlockSource: WebSearchSource.OPENAI }] @@ -324,7 +362,11 @@ text after`, } ] - mockUseSelector.mockReturnValue(mockCitations) + // Mock the useSelector calls - first call for citations, second call for citationBlock + mockUseSelector + .mockReturnValueOnce(mockCitations) // selectFormattedCitationsByBlockId + .mockReturnValueOnce(undefined) // messageBlocksSelectors.selectById + renderMainTextBlock({ block, role: 'assistant', @@ -335,28 +377,58 @@ text after`, expect(mockUseSelector).toHaveBeenCalled() expect(getRenderedMarkdown()).toBeInTheDocument() - // Verify content processing occurred - expect(mockCleanMarkdownContent).toHaveBeenCalledWith('Citation content') + // Verify determineCitationSource was called + expect(mockDetermineCitationSource).toHaveBeenCalledWith(block.citationReferences) + + // Verify withCitationTags was called with correct parameters + expect(mockWithCitationTags).toHaveBeenCalledWith( + 'Content with citation [1]', + mockCitations, + WebSearchSource.OPENAI + ) + + // Verify the processed content is rendered + expect(screen.getByText('Markdown: Content with citation [1] [processed-citations]')).toBeInTheDocument() }) - it('should handle different citation sources correctly', () => { - const testSources = [WebSearchSource.OPENAI, 'DEFAULT' as any, 'CUSTOM' as any] + it('should skip citation processing when conditions are not met', () => { + const testCases = [ + { + name: 'no citationReferences', + block: createMainTextBlock({ content: 'Content [1]' }), + citationBlockId: 'test' + }, + { + name: 'no citationBlockId', + block: createMainTextBlock({ + content: 'Content [1]', + citationReferences: [{ citationBlockSource: 'DEFAULT' as any }] + }), + citationBlockId: undefined + }, + { + name: 'no citations data', + block: createMainTextBlock({ + content: 'Content [1]', + citationReferences: [{ citationBlockSource: 'DEFAULT' as any }] + }), + citationBlockId: 'test' + } + ] - testSources.forEach((source) => { - const block = createMainTextBlock({ - content: `Citation test for ${source}`, - citationReferences: [{ citationBlockSource: source }] - }) - - mockUseSelector.mockReturnValue([{ id: '1', number: 1, url: 'https://test.com', title: 'Test' }]) + testCases.forEach(({ block, citationBlockId }) => { + mockUseSelector.mockReturnValue([]) // No citations const { unmount } = renderMainTextBlock({ block, role: 'assistant', - citationBlockId: `test-${source}` + citationBlockId }) expect(getRenderedMarkdown()).toBeInTheDocument() + // Should render original content without citation processing + expect(screen.getByText(`Markdown: ${block.content}`)).toBeInTheDocument() + unmount() }) }) @@ -400,51 +472,7 @@ text after`, }) }) - describe('edge cases and robustness', () => { - it('should handle large content without performance issues', () => { - const largeContent = 'A'.repeat(1000) + ' with citations [1]' - const block = createMainTextBlock({ content: largeContent }) - - const largeCitations = [ - { - id: '1', - number: 1, - url: 'https://large.com', - title: 'Large', - content: 'B'.repeat(500) - } - ] - - mockUseSelector.mockReturnValue(largeCitations) - - expect(() => { - renderMainTextBlock({ - block, - role: 'assistant', - citationBlockId: 'large-test' - }) - }).not.toThrow() - - expect(getRenderedMarkdown()).toBeInTheDocument() - }) - - it('should handle special characters and Unicode gracefully', () => { - const specialContent = '测试内容 🚀 📝 ✨ <>&"\'` [1]' - const block = createMainTextBlock({ content: specialContent }) - - mockUseSelector.mockReturnValue([{ id: '1', number: 1, title: '特殊字符测试', content: '内容 with 🎉' }]) - - expect(() => { - renderMainTextBlock({ - block, - role: 'assistant', - citationBlockId: 'unicode-test' - }) - }).not.toThrow() - - expect(getRenderedMarkdown()).toBeInTheDocument() - }) - + describe('integration and robustness', () => { it('should handle null and undefined values gracefully', () => { const block = createMainTextBlock({ content: 'Null safety test' }) @@ -460,7 +488,7 @@ text after`, expect(getRenderedMarkdown()).toBeInTheDocument() }) - it('should integrate properly with Redux store', () => { + it('should integrate properly with Redux store for citations', () => { const block = createMainTextBlock({ content: 'Redux integration test', citationReferences: [{ citationBlockSource: 'DEFAULT' as any }] diff --git a/src/renderer/src/pages/home/Messages/CitationsList.tsx b/src/renderer/src/pages/home/Messages/CitationsList.tsx index 5a61fecb72..ec51379784 100644 --- a/src/renderer/src/pages/home/Messages/CitationsList.tsx +++ b/src/renderer/src/pages/home/Messages/CitationsList.tsx @@ -1,5 +1,6 @@ import ContextMenu from '@renderer/components/ContextMenu' import Favicon from '@renderer/components/Icons/FallbackFavicon' +import { Citation } from '@renderer/types' import { fetchWebContent } from '@renderer/utils/fetch' import { cleanMarkdownContent } from '@renderer/utils/formats' import { QueryClient, QueryClientProvider, useQuery } from '@tanstack/react-query' @@ -9,17 +10,6 @@ import React, { useState } from 'react' import { useTranslation } from 'react-i18next' import styled from 'styled-components' -export interface Citation { - number: number - url: string - title?: string - hostname?: string - content?: string - showFavicon?: boolean - type?: string - metadata?: Record -} - interface CitationsListProps { citations: Citation[] } diff --git a/src/renderer/src/services/ApiService.ts b/src/renderer/src/services/ApiService.ts index abc3db81b0..7c4861f403 100644 --- a/src/renderer/src/services/ApiService.ts +++ b/src/renderer/src/services/ApiService.ts @@ -359,9 +359,6 @@ export async function fetchChatCompletion({ // --- Call AI Completions --- onChunkReceived({ type: ChunkType.LLM_RESPONSE_CREATED }) - if (enableWebSearch) { - onChunkReceived({ type: ChunkType.LLM_WEB_SEARCH_IN_PROGRESS }) - } await AI.completions( { callType: 'chat', diff --git a/src/renderer/src/services/StreamProcessingService.ts b/src/renderer/src/services/StreamProcessingService.ts index 527cc2242f..78007e85e2 100644 --- a/src/renderer/src/services/StreamProcessingService.ts +++ b/src/renderer/src/services/StreamProcessingService.ts @@ -43,6 +43,7 @@ export function createStreamProcessor(callbacks: StreamProcessorCallbacks = {}) return (chunk: Chunk) => { try { const data = chunk + // console.log('data: ', chunk) switch (data.type) { case ChunkType.BLOCK_COMPLETE: { if (callbacks.onComplete) callbacks.onComplete(AssistantMessageStatus.SUCCESS, data?.response) diff --git a/src/renderer/src/store/messageBlock.ts b/src/renderer/src/store/messageBlock.ts index f9b8c34cd5..50d94af827 100644 --- a/src/renderer/src/store/messageBlock.ts +++ b/src/renderer/src/store/messageBlock.ts @@ -1,8 +1,7 @@ import { WebSearchResultBlock } from '@anthropic-ai/sdk/resources' import type { GroundingMetadata } from '@google/genai' import { createEntityAdapter, createSelector, createSlice, type PayloadAction } from '@reduxjs/toolkit' -import type { Citation } from '@renderer/pages/home/Messages/CitationsList' -import { WebSearchProviderResponse, WebSearchSource } from '@renderer/types' +import { Citation, WebSearchProviderResponse, WebSearchSource } from '@renderer/types' import type { CitationMessageBlock, MessageBlock } from '@renderer/types/newMessage' import { MessageBlockType } from '@renderer/types/newMessage' import type OpenAI from 'openai' @@ -160,9 +159,19 @@ export const formatCitationsFromBlock = (block: CitationMessageBlock | undefined } }) || [] break + case WebSearchSource.PERPLEXITY: { + formattedCitations = + (block.response.results as any[])?.map((result, index) => ({ + number: index + 1, + url: result.url || result, // 兼容旧数据 + title: result.title || new URL(result).hostname, // 兼容旧数据 + showFavicon: true, + type: 'websearch' + })) || [] + break + } case WebSearchSource.GROK: case WebSearchSource.OPENROUTER: - case WebSearchSource.PERPLEXITY: formattedCitations = (block.response.results as any[])?.map((url, index) => { try { diff --git a/src/renderer/src/store/thunk/messageThunk.ts b/src/renderer/src/store/thunk/messageThunk.ts index 5537ed0b13..5cb2836e82 100644 --- a/src/renderer/src/store/thunk/messageThunk.ts +++ b/src/renderer/src/store/thunk/messageThunk.ts @@ -8,7 +8,15 @@ import { createStreamProcessor, type StreamProcessorCallbacks } from '@renderer/ import { estimateMessagesUsage } from '@renderer/services/TokenService' import store from '@renderer/store' import { updateTopicUpdatedAt } from '@renderer/store/assistants' -import type { Assistant, ExternalToolResult, FileMetadata, MCPToolResponse, Model, Topic } from '@renderer/types' +import { + type Assistant, + type ExternalToolResult, + type FileMetadata, + type MCPToolResponse, + type Model, + type Topic, + WebSearchSource +} from '@renderer/types' import type { CitationMessageBlock, FileMessageBlock, @@ -353,7 +361,7 @@ const fetchAndProcessAssistantResponseImpl = async ( let thinkingBlockId: string | null = null let imageBlockId: string | null = null let toolBlockId: string | null = null - let hasWebSearch = false + const toolCallIdToBlockIdMap = new Map() const notificationService = NotificationService.getInstance() @@ -433,8 +441,7 @@ const fetchAndProcessAssistantResponseImpl = async ( const initialChanges: Partial = { type: MessageBlockType.MAIN_TEXT, content: accumulatedContent, - status: MessageBlockStatus.STREAMING, - citationReferences: citationBlockId ? [{ citationBlockId }] : [] + status: MessageBlockStatus.STREAMING } mainTextBlockId = initialPlaceholderBlockId // 清理占位块 @@ -444,8 +451,7 @@ const fetchAndProcessAssistantResponseImpl = async ( saveUpdatedBlockToDB(mainTextBlockId, assistantMsgId, topicId, getState) } else { const newBlock = createMainTextBlock(assistantMsgId, accumulatedContent, { - status: MessageBlockStatus.STREAMING, - citationReferences: citationBlockId ? [{ citationBlockId }] : [] + status: MessageBlockStatus.STREAMING }) mainTextBlockId = newBlock.id // 立即设置ID,防止竞态条件 await handleBlockTransition(newBlock, MessageBlockType.MAIN_TEXT) @@ -453,27 +459,27 @@ const fetchAndProcessAssistantResponseImpl = async ( }, onTextComplete: async (finalText) => { if (mainTextBlockId) { + let citationBlockSource: WebSearchSource | undefined + if (citationBlockId) { + const citationBlock = getState().messageBlocks.entities[citationBlockId] as CitationMessageBlock + citationBlockSource = citationBlock.response?.source + } const changes = { content: finalText, - status: MessageBlockStatus.SUCCESS + status: MessageBlockStatus.SUCCESS, + citationReferences: citationBlockSource ? [{ citationBlockId, citationBlockSource }] : [] } cancelThrottledBlockUpdate(mainTextBlockId) dispatch(updateOneBlock({ id: mainTextBlockId, changes })) saveUpdatedBlockToDB(mainTextBlockId, assistantMsgId, topicId, getState) - mainTextBlockId = null + if (!assistant.enableWebSearch) { + mainTextBlockId = null + } } else { console.warn( `[onTextComplete] Received text.complete but last block was not MAIN_TEXT (was ${lastBlockType}) or lastBlockId is null.` ) } - if (citationBlockId && !hasWebSearch) { - const changes: Partial = { - status: MessageBlockStatus.SUCCESS - } - dispatch(updateOneBlock({ id: citationBlockId, changes })) - saveUpdatedBlockToDB(citationBlockId, assistantMsgId, topicId, getState) - citationBlockId = null - } }, onThinkingChunk: async (text, thinking_millsec) => { accumulatedThinking += text @@ -616,15 +622,44 @@ const fetchAndProcessAssistantResponseImpl = async ( } }, onLLMWebSearchComplete: async (llmWebSearchResult) => { - if (citationBlockId) { - hasWebSearch = true + const blockId = citationBlockId || initialPlaceholderBlockId + if (blockId) { const changes: Partial = { + type: MessageBlockType.CITATION, response: llmWebSearchResult, status: MessageBlockStatus.SUCCESS } - dispatch(updateOneBlock({ id: citationBlockId, changes })) - saveUpdatedBlockToDB(citationBlockId, assistantMsgId, topicId, getState) + dispatch(updateOneBlock({ id: blockId, changes })) + saveUpdatedBlockToDB(blockId, assistantMsgId, topicId, getState) + if (mainTextBlockId) { + const state = getState() + const existingMainTextBlock = state.messageBlocks.entities[mainTextBlockId] + if (existingMainTextBlock && existingMainTextBlock.type === MessageBlockType.MAIN_TEXT) { + const currentRefs = existingMainTextBlock.citationReferences || [] + const mainTextChanges = { + citationReferences: [...currentRefs, { blockId, citationBlockSource: llmWebSearchResult.source }] + } + dispatch(updateOneBlock({ id: mainTextBlockId, changes: mainTextChanges })) + saveUpdatedBlockToDB(mainTextBlockId, assistantMsgId, topicId, getState) + } + mainTextBlockId = null + } + if (initialPlaceholderBlockId) { + citationBlockId = initialPlaceholderBlockId + initialPlaceholderBlockId = null + } + } else { + const citationBlock = createCitationBlock( + assistantMsgId, + { + response: llmWebSearchResult + }, + { + status: MessageBlockStatus.SUCCESS + } + ) + citationBlockId = citationBlock.id if (mainTextBlockId) { const state = getState() const existingMainTextBlock = state.messageBlocks.entities[mainTextBlockId] @@ -641,6 +676,7 @@ const fetchAndProcessAssistantResponseImpl = async ( } mainTextBlockId = null } + await handleBlockTransition(citationBlock, MessageBlockType.CITATION) } }, onImageCreated: async () => { diff --git a/src/renderer/src/types/chunk.ts b/src/renderer/src/types/chunk.ts index 746c8999cb..f079677a1d 100644 --- a/src/renderer/src/types/chunk.ts +++ b/src/renderer/src/types/chunk.ts @@ -55,7 +55,6 @@ export interface LLMResponseInProgressChunk { response?: Response type: ChunkType.LLM_RESPONSE_IN_PROGRESS } - export interface TextDeltaChunk { /** * The text content of the chunk diff --git a/src/renderer/src/types/index.ts b/src/renderer/src/types/index.ts index b5cd425f03..084cc130e2 100644 --- a/src/renderer/src/types/index.ts +++ b/src/renderer/src/types/index.ts @@ -728,9 +728,12 @@ export interface QuickPhrase { export interface Citation { number: number url: string - hostname: string title?: string + hostname?: string content?: string + showFavicon?: boolean + type?: string + metadata?: Record } export type MathEngine = 'KaTeX' | 'MathJax' | 'none' diff --git a/src/renderer/src/utils/__tests__/citation.test.ts b/src/renderer/src/utils/__tests__/citation.test.ts new file mode 100644 index 0000000000..4c877e285a --- /dev/null +++ b/src/renderer/src/utils/__tests__/citation.test.ts @@ -0,0 +1,562 @@ +import { GroundingSupport } from '@google/genai' +import { Citation, WebSearchSource } from '@renderer/types' +import { describe, expect, it, vi } from 'vitest' + +import { + determineCitationSource, + generateCitationTag, + mapCitationMarksToTags, + normalizeCitationMarks, + withCitationTags +} from '../citation' + +// Mock dependencies +vi.mock('@renderer/utils/formats', () => ({ + cleanMarkdownContent: vi.fn((content: string) => content.replace(/[*_~`]/g, '')), + encodeHTML: vi.fn((str: string) => + str.replace(/[&<>"']/g, (match) => { + const entities: { [key: string]: string } = { + '&': '&', + '<': '<', + '>': '>', + '"': '"', + "'": ''' + } + return entities[match] + }) + ) +})) + +describe('citation', () => { + const createCitationMap = (citations: Citation[]) => new Map(citations.map((c) => [c.number, c])) + + describe('determineCitationSource', () => { + it('should find the the citation source', () => { + const citationReferences = [{ citationBlockId: 'block1', citationBlockSource: WebSearchSource.OPENAI }] + + const result = determineCitationSource(citationReferences) + expect(result).toBe(WebSearchSource.OPENAI) + }) + + it('should find first valid source in citation references', () => { + const citationReferences = [ + { citationBlockId: 'block1' }, // no source + { citationBlockId: 'block2', citationBlockSource: WebSearchSource.GEMINI }, + { citationBlockId: 'block3', citationBlockSource: WebSearchSource.GEMINI } + ] + + const result = determineCitationSource(citationReferences) + expect(result).toBe(WebSearchSource.GEMINI) + }) + + it('should return undefined when no sources available', () => { + const citationReferences = [ + { citationBlockId: 'block1' }, // no source + { citationBlockId: 'block2' } // no source + ] + + const result = determineCitationSource(citationReferences) + expect(result).toBeUndefined() + }) + + it('should return undefined for empty citation references', () => { + const result = determineCitationSource([]) + expect(result).toBeUndefined() + }) + + it('should return undefined for undefined citation references', () => { + const result = determineCitationSource(undefined) + expect(result).toBeUndefined() + }) + }) + + describe('withCitationTags', () => { + it('should process citations with default source type', () => { + const content = 'Test content [1] with citation' + const citations: Citation[] = [ + { + number: 1, + url: 'https://example.com', + title: 'Example' + } + ] + + const result = withCitationTags(content, citations) + + expect(result).toContain('[](https://example.com)') + }) + + it('should process citations with OpenAI source type', () => { + const content = 'Test content [1](https://example.com)' + const citations: Citation[] = [ + { + number: 1, + url: 'https://example.com', + title: 'Example', + content: 'Some **content**' + } + ] + + const result = withCitationTags(content, citations, WebSearchSource.OPENAI) + + expect(result).toContain('[](https://example.com)') + }) + + it('should process citations with Gemini source type', () => { + const content = 'Test content from Gemini' + const metadata: GroundingSupport[] = [ + { + segment: { text: 'Test content' }, + groundingChunkIndices: [0] + } + ] + const citations: Citation[] = [ + { + number: 1, + url: 'https://example.com', + title: 'Example', + metadata + } + ] + + const result = withCitationTags(content, citations, WebSearchSource.GEMINI) + + expect(result).toContain('Test content[](https://example.com)') + }) + + it('should handle empty citations array', () => { + const content = 'This is test content [1]' + const result = withCitationTags(content, []) + expect(result).toBe(content) + }) + }) + + describe('normalizeCitationMarks with markdown', () => { + const citations: Citation[] = [ + { number: 1, url: 'https://example1.com', title: 'Example 1' }, + { number: 2, url: 'https://example2.com', title: 'Example 2' }, + { number: 3, url: 'https://example3.com', title: 'Example 3' } + ] + const citationMap = createCitationMap(citations) + + it('should not process citations in inline code', () => { + const content = 'Here is `code with [1] citation` and normal [2] citation' + const result = normalizeCitationMarks(content, citationMap) + + // 内联代码中的 [1] 应该保持不变 + expect(result).toContain('`code with [1] citation`') + // 普通文本中的 [2] 应该被处理 + expect(result).toContain('[cite:2]') + }) + + it('should not process citations in code blocks', () => { + const content = `Text with citation [1] + +\`\`\`python +# Python code with [2] reference +def func(): + data = [3, 4, 5] # Array with [1] element reference + return data +\`\`\` + +\`\`\`bash +echo "Command with [2] parameter" +\`\`\` + + // Indented code block is not skipped + echo "Indented code block [3]" + +Normal text with [3] citation` + + const result = normalizeCitationMarks(content, citationMap) + + // 代码块内的内容应该保持原样 + expect(result).toContain('# Python code with [2] reference') + expect(result).toContain('data = [3, 4, 5] # Array with [1] element reference') + expect(result).toContain('echo "Command with [2] parameter"') + + // 代码块外的引用应该被处理 + expect(result).toContain('Text with citation [cite:1]') + expect(result).toContain('Indented code block [cite:3]') + expect(result).toContain('Normal text with [cite:3]') + }) + + it('should handle malformed code blocks', () => { + const content = `Text with [1] + +\`\`\`unclosed +Code block without closing +With [2] citation + +Normal text with [3] continues` + + const result = normalizeCitationMarks(content, citationMap) + + expect(result).toContain('[cite:1]') + expect(result).toContain('[cite:2]') + expect(result).toContain('[cite:3]') + }) + + it('should handle citations in various markdown structures', () => { + const content = `Normal citation [1] + +> This is a blockquote with [2] citation +> And another line with [3] + +Back to normal **with [1] again** + +# Heading with [3] citation +## Subheading with [2] citation + +List: +- list item with citation [1] + +Numbered list: +1. item with [2]` + + const result = normalizeCitationMarks(content, citationMap) + console.log(result) + + expect(result).toContain('citation [cite:1]') + expect(result).toContain('blockquote with [cite:2]') + expect(result).toContain('another line with [cite:3]') + expect(result).toContain('with [cite:1] again') + expect(result).toContain('Heading with [cite:3]') + expect(result).toContain('Subheading with [cite:2]') + expect(result).toContain('list item with citation [cite:1]') + expect(result).toContain('item with [cite:2]') + }) + }) + + describe('normalizeCitationMarks simple', () => { + describe('OpenAI format citations', () => { + it('should normalize OpenAI format citations', () => { + const content = 'Text with [1](https://example.com) citation' + const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Test' }] + const citationMap = createCitationMap(citations) + + for (const sourceType of [WebSearchSource.OPENAI, WebSearchSource.OPENAI_RESPONSE]) { + const result = normalizeCitationMarks(content, citationMap, sourceType) + expect(result).toBe('Text with [cite:1] citation') + } + }) + + it('should preserve non-matching OpenAI citations', () => { + const content = 'Text with [3](https://missing.com) citation' + const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Test' }] + const citationMap = createCitationMap(citations) + + for (const sourceType of [WebSearchSource.OPENAI, WebSearchSource.OPENAI_RESPONSE]) { + const result = normalizeCitationMarks(content, citationMap, sourceType) + expect(result).toBe('Text with [3](https://missing.com) citation') + } + }) + }) + + describe('Perplexity format citations', () => { + it('should normalize Perplexity format citations', () => { + const content = 'Perplexity citations [1](https://example.com)' + const citations: Citation[] = [ + { number: 1, url: 'https://example.com', title: 'Example Citation', content: 'Citation content' } + ] + const citationMap = new Map(citations.map((c) => [c.number, c])) + + const normalized = normalizeCitationMarks(content, citationMap, WebSearchSource.PERPLEXITY) + expect(normalized).toBe('Perplexity citations [cite:1]') + }) + + it('should preserve unmatched Perplexity citations', () => { + const content = 'Text with [2](https://notfound.com) citation' + const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Example Citation' }] + const citationMap = new Map(citations.map((c) => [c.number, c])) + + // 2号引用不存在,应该保持原样 + const normalized = normalizeCitationMarks(content, citationMap, WebSearchSource.PERPLEXITY) + expect(normalized).toBe('Text with [2](https://notfound.com) citation') + }) + }) + + describe('Gemini format citations', () => { + it('should normalize Gemini format citations', () => { + const content = 'This is test content from Gemini' + const metadata: GroundingSupport[] = [ + { + segment: { text: 'test content' }, + groundingChunkIndices: [0, 1] + } + ] + const citations: Citation[] = [ + { number: 1, url: 'https://example1.com', title: 'Test 1', metadata }, + { number: 2, url: 'https://example2.com', title: 'Test 2' } + ] + const citationMap = createCitationMap(citations) + + const result = normalizeCitationMarks(content, citationMap, WebSearchSource.GEMINI) + + expect(result).toBe('This is test content[cite:1][cite:2] from Gemini') + }) + + it('should handle Gemini citations without metadata', () => { + const content = 'Content without metadata' + const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Test' }] + const citationMap = createCitationMap(citations) + + const result = normalizeCitationMarks(content, citationMap, WebSearchSource.GEMINI) + + expect(result).toBe('Content without metadata') + }) + }) + + describe('default format citations', () => { + it('should normalize default format citations', () => { + const content = 'Text with [1][2] and [3] citations' + const citations: Citation[] = [ + { number: 1, url: 'https://example1.com', title: 'Test 1' }, + { number: 2, url: 'https://example2.com', title: 'Test 2' }, + { number: 3, url: 'https://example3.com', title: 'Test 3' } + ] + const citationMap = createCitationMap(citations) + + const result = normalizeCitationMarks(content, citationMap) + + expect(result).toBe('Text with [cite:1][cite:2] and [cite:3] citations') + }) + + it('should preserve non-matching default format citations', () => { + const content = 'Text with [1] and [3] citations' + const citations: Citation[] = [{ number: 1, url: 'https://example1.com', title: 'Test 1' }] + const citationMap = createCitationMap(citations) + + const result = normalizeCitationMarks(content, citationMap) + + expect(result).toBe('Text with [cite:1] and [3] citations') + }) + + it('should handle nested citation patterns', () => { + const content = 'Text with [[1]] and [cite:[2]] patterns' + const citations: Citation[] = [ + { number: 1, url: 'https://example1.com', title: 'Test 1' }, + { number: 2, url: 'https://example2.com', title: 'Test 2' } + ] + const citationMap = new Map(citations.map((c) => [c.number, c])) + + const result = normalizeCitationMarks(content, citationMap) + + // 最里面的会被处理 + expect(result).toBe('Text with [[cite:1]] and [cite:[cite:2]] patterns') + }) + + it('should handle mixed citation formats', () => { + const content = 'Text with [1] and [2](url) and other [3] formats' + const citations: Citation[] = [ + { number: 1, url: 'https://example1.com', title: 'Test 1' }, + { number: 2, url: 'https://example2.com', title: 'Test 2' } + ] + const citationMap = createCitationMap(citations) + + const result = normalizeCitationMarks(content, citationMap, WebSearchSource.OPENAI) + + expect(result).toBe('Text with [1] and [cite:2] and other [3] formats') + }) + }) + }) + + describe('mapCitationMarksToTags', () => { + const createCitationMap = (citations: Citation[]) => new Map(citations.map((c) => [c.number, c])) + + it('should convert cite marks to tags', () => { + const content = 'Text with [cite:1] citation' + const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Test' }] + const citationMap = createCitationMap(citations) + + const result = mapCitationMarksToTags(content, citationMap) + + expect(result).toContain('with [](https://example.com) citation') + }) + + it('should handle multiple cite marks', () => { + const content = 'Text with [cite:1][cite:2] and [cite:3] citations' + const citations: Citation[] = [ + { number: 1, url: 'https://example1.com', title: 'Test 1' }, + { number: 2, url: 'https://example2.com', title: 'Test 2' }, + { number: 3, url: 'https://example3.com', title: 'Test 3' } + ] + const citationMap = createCitationMap(citations) + + const result = mapCitationMarksToTags(content, citationMap) + + expect(result).toContain('with [](https://example1.com)[](https://example2.com) and') + expect(result).toContain('3](https://example3.com) citations') + }) + + it('should preserve non-matching cite marks', () => { + const content = 'Text with [cite:1] and [cite:3] citations' + const citations: Citation[] = [{ number: 1, url: 'https://example1.com', title: 'Test 1' }] + const citationMap = createCitationMap(citations) + + const result = mapCitationMarksToTags(content, citationMap) + + expect(result).toContain('1](https://example1.com)') + expect(result).toContain('[cite:3]') // Should remain unchanged + }) + + it('should handle nested cite marks', () => { + const content = 'Text with [cite:[cite:1]] and [cite:2] citations' + const citations: Citation[] = [ + { number: 1, url: 'https://example1.com', title: 'Test 1' }, + { number: 2, url: 'https://example2.com', title: 'Test 2' } + ] + const citationMap = createCitationMap(citations) + + const result = mapCitationMarksToTags(content, citationMap) + + expect(result).toContain('[cite:[](https://example1.com)]') + expect(result).toContain('2](https://example2.com)') + }) + + it('should handle content without cite marks', () => { + const content = 'Text without citations' + const citationMap = new Map() + + const result = mapCitationMarksToTags(content, citationMap) + + expect(result).toBe('Text without citations') + }) + + it('should handle malformed citation numbers', () => { + const content = 'Text with [cite:abc] and [cite:] marks' + const citationMap = new Map() + + const result = mapCitationMarksToTags(content, citationMap) + + expect(result).toBe('Text with [cite:abc] and [cite:] marks') + }) + }) + + describe('generateCitationTag', () => { + it('should generate citation tag with valid URL', () => { + const citation: Citation = { + number: 1, + url: 'https://example.com', + title: 'Example Title', + content: 'Some content here' + } + + const result = generateCitationTag(citation) + + expect(result).toContain('[](https://example.com)') + expect(result).toContain('Example Title') + }) + + it('should generate citation tag without URL when invalid', () => { + const citation: Citation = { + number: 2, + url: 'invalid-url', + title: 'Test Title' + } + + const result = generateCitationTag(citation) + + expect(result).toContain('[]()') + expect(result).not.toContain('](invalid-url)') + }) + + it('should handle citation without URL', () => { + const citation: Citation = { + number: 3, + url: '', + title: 'No URL Title' + } + + const result = generateCitationTag(citation) + + expect(result).toContain('[]()') + }) + + it('should use hostname when title is missing', () => { + const citation: Citation = { + number: 4, + url: 'https://example.com', + hostname: 'example.com' + } + + const result = generateCitationTag(citation) + + expect(result).toContain('example.com') + }) + + it('should handle citation with all empty values', () => { + const citation: Citation = { + number: 6, + url: '', + title: '', + hostname: '', + content: '' + } + + const result = generateCitationTag(citation) + + expect(result).toContain('[]()') + }) + + it('should truncate content to 200 characters in data-citation', () => { + const longContent = 'a'.repeat(300) + const citation: Citation = { + number: 1, + url: 'https://example.com', + title: 'Test', + content: longContent + } + + const result = generateCitationTag(citation) + const match = result.match(/data-citation='([^']+)'/) + expect(match).not.toBeNull() + if (match) { + const citationData = JSON.parse(match[1].replace(/"/g, '"')) + expect(citationData.content.length).toBe(200) + expect(citationData.content).toBe(longContent.substring(0, 200)) + } + }) + }) + + describe('performance', () => { + it('should handle large content efficiently', () => { + const largeContent = 'Test content '.repeat(10000) + '[1]' + const citations: Citation[] = [{ number: 1, url: 'https://example.com', title: 'Test' }] + + const start = Date.now() + const result = withCitationTags(largeContent, citations) + const end = Date.now() + + expect(result).toContain('[ { + const citations: Citation[] = Array.from({ length: 100 }, (_, i) => ({ + number: i + 1, + url: `https://example${i + 1}.com`, + title: `Test ${i + 1}` + })) + const content = citations.map((c) => `[${c.number}]`).join(' ') + + const start = Date.now() + const result = withCitationTags(content, citations) + const end = Date.now() + + expect(result).toContain('[ { }) describe('convertLinks', () => { + it('should convert number links to numbered links', () => { + const input = '参考 [1](https://example.com/1) 和 [2](https://example.com/2)' + const result = convertLinks(input, true) + expect(result.text).toBe('参考 [1](https://example.com/1) 和 [2](https://example.com/2)') + expect(result.hasBufferedContent).toBe(false) + }) + it('should convert links with domain-like text to numbered links', () => { const input = '查看这个网站 [example.com](https://example.com)' const result = convertLinks(input, true) @@ -375,4 +383,13 @@ describe('linkConverter', () => { expect(result).toBe('[链接1](https://example.com)[链接2](https://other.com)') }) }) + + describe('completionPerplexityLinks', () => { + it('should complete links with webSearch data', () => { + const webSearch = [{ url: 'https://example.com/1' }, { url: 'https://example.com/2' }] + const input = '参考 [1] 和 [2]' + const result = completionPerplexityLinks(input, webSearch) + expect(result).toBe('参考 [1](https://example.com/1) 和 [2](https://example.com/2)') + }) + }) }) diff --git a/src/renderer/src/utils/citation.ts b/src/renderer/src/utils/citation.ts new file mode 100644 index 0000000000..da2cee7706 --- /dev/null +++ b/src/renderer/src/utils/citation.ts @@ -0,0 +1,210 @@ +import { GroundingSupport } from '@google/genai' +import { Citation, WebSearchSource } from '@renderer/types' + +import { cleanMarkdownContent, encodeHTML } from './formats' + +/** + * 从多个 citationReference 中获取第一个有效的 source + * @returns WebSearchSource + */ +export function determineCitationSource( + citationReferences: Array<{ citationBlockId?: string; citationBlockSource?: WebSearchSource }> | undefined +): WebSearchSource | undefined { + // 从 citationReferences 获取第一个有效的 source + if (citationReferences?.length) { + const validReference = citationReferences.find((ref) => ref.citationBlockSource) + return validReference?.citationBlockSource + } + + return undefined +} + +/** + * 把文本内容中的引用标记转换为完整的引用标签 + * - 标准化引用标记 + * - 转换标记为用于渲染的标签 + * + * @param content 原始文本内容 + * @param citations 原始引用列表 + * @param sourceType 引用来源类型 + * @returns 处理后的文本内容 + */ +export function withCitationTags(content: string, citations: Citation[], sourceType?: WebSearchSource): string { + if (!content || citations.length === 0) return content + + const formattedCitations = citations.map((citation) => ({ + ...citation, + content: citation.content ? cleanMarkdownContent(citation.content) : citation.content + })) + + const citationMap = new Map(formattedCitations.map((c) => [c.number, c])) + + const normalizedContent = normalizeCitationMarks(content, citationMap, sourceType) + + return mapCitationMarksToTags(normalizedContent, citationMap) +} + +/** + * 标准化引用标记,统一转换为 [cite:N] 格式: + * - OpenAI 格式: [N](url) → [cite:N] + * - Gemini 格式: 根据metadata添加 [cite:N] + * - 其他格式: [N] → [cite:N] + * + * 算法: + * - one pass + 正则替换 + * - 跳过代码块等特殊上下文 + * + * @param content 原始文本内容 + * @param citationMap 引用映射表 + * @param sourceType 引用来源类型 + * @returns 标准化后的文本内容 + */ +export function normalizeCitationMarks( + content: string, + citationMap: Map, + sourceType?: WebSearchSource +): string { + // 识别需要跳过的代码区域,注意:indented code block已被禁用,不需要跳过 + const codeBlockRegex = /```[\s\S]*?```|`[^`\n]*`/gm + const skipRanges: Array<{ start: number; end: number }> = [] + + let match + while ((match = codeBlockRegex.exec(content)) !== null) { + skipRanges.push({ + start: match.index, + end: match.index + match[0].length + }) + } + + // 检查位置是否在代码块内 + const shouldSkip = (pos: number): boolean => { + for (const range of skipRanges) { + if (pos >= range.start && pos < range.end) return true + if (range.start > pos) break // 已排序,可以提前结束 + } + return false + } + + // 统一的替换函数 + const applyReplacements = (regex: RegExp, getReplacementFn: (match: RegExpExecArray) => string | null) => { + const replacements: Array<{ start: number; end: number; replacement: string }> = [] + + regex.lastIndex = 0 // 重置正则状态 + let match: RegExpExecArray | null + while ((match = regex.exec(content)) !== null) { + if (!shouldSkip(match.index)) { + const replacement = getReplacementFn(match) + if (replacement !== null) { + replacements.push({ + start: match.index, + end: match.index + match[0].length, + replacement + }) + } + } + } + + // 从后往前替换避免位置偏移 + replacements.reverse().forEach(({ start, end, replacement }) => { + content = content.slice(0, start) + replacement + content.slice(end) + }) + } + + switch (sourceType) { + case WebSearchSource.OPENAI: + case WebSearchSource.OPENAI_RESPONSE: + case WebSearchSource.PERPLEXITY: { + // OpenAI 格式: [N](url) → [cite:N] + applyReplacements(/\[(\d+)<\/sup>\]\([^)]*\)/g, (match) => { + const citationNum = parseInt(match[1], 10) + return citationMap.has(citationNum) ? `[cite:${citationNum}]` : null + }) + break + } + case WebSearchSource.GEMINI: { + // Gemini 格式: 根据metadata添加 [cite:N] + const firstCitation = Array.from(citationMap.values())[0] + if (firstCitation?.metadata) { + const textReplacements = new Map() + + // 收集所有需要替换的文本 + firstCitation.metadata.forEach((support: GroundingSupport) => { + if (!support.groundingChunkIndices || !support.segment?.text) return + + const citationNums = support.groundingChunkIndices + const text = support.segment.text + const basicTag = citationNums + .map((citationNum) => { + const citation = citationMap.get(citationNum + 1) + return citation ? `[cite:${citationNum + 1}]` : '' + }) + .filter(Boolean) + .join('') + + if (basicTag) { + textReplacements.set(text, `${text}${basicTag}`) + } + }) + + // 一次性应用所有替换 + textReplacements.forEach((replacement, originalText) => { + const escapedText = originalText.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + applyReplacements(new RegExp(escapedText, 'g'), () => replacement) + }) + } + break + } + default: { + // 简单数字格式: [N] → [cite:N] + applyReplacements(/\[(\d+)\]/g, (match) => { + const citationNum = parseInt(match[1], 10) + return citationMap.has(citationNum) ? `[cite:${citationNum}]` : null + }) + } + } + + return content +} + +/** + * 把文本内容中的 [cite:N] 标记转换为用于渲染的标签 + * @param content 原始文本内容 + * @param citationMap 引用映射表 + * @returns 处理后的文本内容 + */ +export function mapCitationMarksToTags(content: string, citationMap: Map): string { + // 统一替换所有 [cite:N] 标记 + return content.replace(/\[cite:(\d+)\]/g, (match, num) => { + const citationNum = parseInt(num, 10) + const citation = citationMap.get(citationNum) + + if (citation) { + return generateCitationTag(citation) + } + + // 如果没找到对应的引用数据,保持原样(应该不会发生) + return match + }) +} + +/** + * 生成单个用于渲染的引用标签 + * @param citation 引用数据 + * @returns 渲染后的引用标签 + */ +export function generateCitationTag(citation: Citation): string { + const supData = { + id: citation.number, + url: citation.url, + title: citation.title || citation.hostname || '', + content: citation.content?.substring(0, 200) + } + const citationJson = encodeHTML(JSON.stringify(supData)) + + // 判断是否为有效链接 + const isLink = citation.url && citation.url.startsWith('http') + + // 生成链接格式: [N](url) + // 或者生成空括号格式: [N]() + return `[${citation.number}]` + (isLink ? `(${citation.url})` : '()') +} diff --git a/src/renderer/src/utils/formats.ts b/src/renderer/src/utils/formats.ts index ee64efd443..5c8aea70ba 100644 --- a/src/renderer/src/utils/formats.ts +++ b/src/renderer/src/utils/formats.ts @@ -88,34 +88,6 @@ export function removeSvgEmptyLines(text: string): string { }) } -// export function withGeminiGrounding(block: MainTextMessageBlock | TranslationMessageBlock): string { -// // TODO -// // const citationBlock = findCitationBlockWithGrounding(block) -// // const groundingSupports = citationBlock?.groundingMetadata?.groundingSupports - -// const content = block.content - -// // if (!groundingSupports || groundingSupports.length === 0) { -// // return content -// // } - -// // groundingSupports.forEach((support) => { -// // const text = support?.segment?.text -// // const indices = support?.groundingChunkIndices - -// // if (!text || !indices) return - -// // const nodes = indices.reduce((acc, index) => { -// // acc.push(`${index + 1}`) -// // return acc -// // }, [] as string[]) - -// // content = content.replace(text, `${text} ${nodes.join(' ')}`) -// // }) - -// return content -// } - export function withGenerateImage(message: Message): { content: string; images?: string[] } { const originalContent = getMainTextContent(message) const imagePattern = new RegExp(`!\\[[^\\]]*\\]\\((.*?)\\s*("(?:.*[^"])")?\\s*\\)`) diff --git a/src/renderer/src/utils/linkConverter.ts b/src/renderer/src/utils/linkConverter.ts index 652c2f4283..220333f027 100644 --- a/src/renderer/src/utils/linkConverter.ts +++ b/src/renderer/src/utils/linkConverter.ts @@ -1,3 +1,5 @@ +import { WebSearchResponse, WebSearchSource } from '@renderer/types' + // Counter for numbering links let linkCounter = 1 // Buffer to hold incomplete link fragments across chunks @@ -236,11 +238,13 @@ export function convertLinks( } // Rule 3: If the link text is not a URL/host, keep the text and add the numbered link - if (!isHost(linkText)) { - result += `${linkText} [${counter}](${url})` - } else { - // Rule 2: If the link text is a URL/host, replace with numbered link + // 增加一个条件:如果 linkText 是纯数字,也直接替换 + if (isHost(linkText) || /^\d+$/.test(linkText)) { + // Rule 2: If the link text is a URL/host or purely digits, replace with numbered link result += `[${counter}](${url})` + } else { + // If the link text is neither a URL/host nor purely digits, keep the text and add the numbered link + result += `${linkText} [${counter}](${url})` } position += match[0].length @@ -337,6 +341,25 @@ export function completeLinks(text: string, webSearch: any[]): string { }) } +/** + * 根据webSearch结果补全链接,将[num]转换为[num](webSearch[num-1].url) + * @param {string} text 原始文本 + * @param {any[]} webSearch webSearch结果 + * @returns {string} 补全后的文本 + */ +export function completionPerplexityLinks(text: string, webSearch: any[]): string { + return text.replace(/\[(\d+)\]/g, (match, numStr) => { + const num = parseInt(numStr) + const index = num - 1 + // 检查 webSearch 数组中是否存在对应的 URL + if (index >= 0 && index < webSearch.length && webSearch[index].url) { + return `[${num}](${webSearch[index].url})` + } + // 如果没有找到对应的 URL,保持原样 + return match + }) +} + /** * 从Markdown文本中提取所有URL * 支持以下格式: @@ -463,8 +486,18 @@ export function extractWebSearchReferences(text: string): Array<{ export function smartLinkConverter( text: string, providerType: string = 'openai', - resetCounter: boolean = false + resetCounter: boolean = false, + webSearchResults?: WebSearchResponse ): { text: string; hasBufferedContent: boolean } { + if (webSearchResults) { + const webSearch = webSearchResults.results + switch (webSearchResults.source) { + case WebSearchSource.PERPLEXITY: { + text = completionPerplexityLinks(text, webSearch as any[]) + break + } + } + } // 检测文本中的引用模式 const references = extractWebSearchReferences(text)