From 12f4500c475afadad9255342cab8007f1eb9978b Mon Sep 17 00:00:00 2001 From: SuYao Date: Wed, 7 May 2025 22:09:12 +0800 Subject: [PATCH] refactor(middleware): add extractReasoningMiddleware for enhanced text processing (#5637) --- .../middlewares/extractReasoningMiddleware.ts | 118 +++++ .../src/providers/AiProvider/BaseProvider.ts | 48 --- .../AiProvider/OpenAICompatibleProvider.ts | 407 ++++++++---------- src/renderer/src/utils/formats.ts | 70 --- src/renderer/src/utils/getPotentialIndex.ts | 29 ++ src/renderer/src/utils/stream.ts | 25 ++ 6 files changed, 355 insertions(+), 342 deletions(-) create mode 100644 src/renderer/src/middlewares/extractReasoningMiddleware.ts create mode 100644 src/renderer/src/utils/getPotentialIndex.ts create mode 100644 src/renderer/src/utils/stream.ts diff --git a/src/renderer/src/middlewares/extractReasoningMiddleware.ts b/src/renderer/src/middlewares/extractReasoningMiddleware.ts new file mode 100644 index 0000000000..4c39925aaf --- /dev/null +++ b/src/renderer/src/middlewares/extractReasoningMiddleware.ts @@ -0,0 +1,118 @@ +// Modified from https://github.com/vercel/ai/blob/845080d80b8538bb9c7e527d2213acb5f33ac9c2/packages/ai/core/middleware/extract-reasoning-middleware.ts + +import { getPotentialStartIndex } from '../utils/getPotentialIndex' + +export interface ExtractReasoningMiddlewareOptions { + openingTag: string + closingTag: string + separator?: string + enableReasoning?: boolean +} + +function escapeRegExp(str: string) { + return str.replace(/[.*+?^${}()|[\\]\\]/g, '\\$&') +} + +// 支持泛型 T,默认 T = { type: string; textDelta: string } +export function extractReasoningMiddleware({ + openingTag, + closingTag, + separator = '\n', + enableReasoning +}: ExtractReasoningMiddlewareOptions) { + const openingTagEscaped = escapeRegExp(openingTag) + const closingTagEscaped = escapeRegExp(closingTag) + + return { + wrapGenerate: async ({ doGenerate }: { doGenerate: () => Promise<{ text: string } & Record> }) => { + const { text: rawText, ...rest } = await doGenerate() + if (rawText == null) { + return { text: rawText, ...rest } + } + const text = rawText + const regexp = new RegExp(`${openingTagEscaped}(.*?)${closingTagEscaped}`, 'gs') + const matches = Array.from(text.matchAll(regexp)) + if (!matches.length) { + return { text, ...rest } + } + const reasoning = matches.map((match: RegExpMatchArray) => match[1]).join(separator) + let textWithoutReasoning = text + for (let i = matches.length - 1; i >= 0; i--) { + const match = matches[i] as RegExpMatchArray + const beforeMatch = textWithoutReasoning.slice(0, match.index as number) + const afterMatch = textWithoutReasoning.slice((match.index as number) + match[0].length) + textWithoutReasoning = + beforeMatch + (beforeMatch.length > 0 && afterMatch.length > 0 ? separator : '') + afterMatch + } + return { ...rest, text: textWithoutReasoning, reasoning } + }, + wrapStream: async ({ + doStream + }: { + doStream: () => Promise<{ stream: ReadableStream } & Record> + }) => { + const { stream, ...rest } = await doStream() + if (!enableReasoning) { + return { + stream, + ...rest + } + } + let isFirstReasoning = true + let isFirstText = true + let afterSwitch = false + let isReasoning = false + let buffer = '' + return { + stream: stream.pipeThrough( + new TransformStream({ + transform: (chunk, controller) => { + if (chunk.type !== 'text-delta') { + controller.enqueue(chunk) + return + } + // @ts-expect-error: textDelta 只在 text-delta/reasoning chunk 上 + buffer += chunk.textDelta + function publish(text: string) { + if (text.length > 0) { + const prefix = afterSwitch && (isReasoning ? !isFirstReasoning : !isFirstText) ? separator : '' + controller.enqueue({ + ...chunk, + type: isReasoning ? 'reasoning' : 'text-delta', + textDelta: prefix + text + }) + afterSwitch = false + if (isReasoning) { + isFirstReasoning = false + } else { + isFirstText = false + } + } + } + while (true) { + const nextTag = isReasoning ? closingTag : openingTag + const startIndex = getPotentialStartIndex(buffer, nextTag) + if (startIndex == null) { + publish(buffer) + buffer = '' + break + } + publish(buffer.slice(0, startIndex)) + const foundFullMatch = startIndex + nextTag.length <= buffer.length + if (foundFullMatch) { + buffer = buffer.slice(startIndex + nextTag.length) + isReasoning = !isReasoning + afterSwitch = true + } else { + buffer = buffer.slice(startIndex) + break + } + } + } + }) + ), + ...rest + } + } + } +} diff --git a/src/renderer/src/providers/AiProvider/BaseProvider.ts b/src/renderer/src/providers/AiProvider/BaseProvider.ts index dfca89790d..f39f09b20c 100644 --- a/src/renderer/src/providers/AiProvider/BaseProvider.ts +++ b/src/renderer/src/providers/AiProvider/BaseProvider.ts @@ -15,7 +15,6 @@ import type { Message } from '@renderer/types/newMessage' import { delay, isJSON, parseJSON } from '@renderer/utils' import { addAbortController, removeAbortController } from '@renderer/utils/abortController' import { formatApiHost } from '@renderer/utils/api' -import { glmZeroPreviewProcessor, thinkTagProcessor, ThoughtProcessor } from '@renderer/utils/formats' import { getMainTextContent } from '@renderer/utils/messageUtils/find' import { isEmpty } from 'lodash' import type OpenAI from 'openai' @@ -230,51 +229,4 @@ export default abstract class BaseProvider { cleanup } } - - /** - * Finds the appropriate thinking processor for a given text chunk and model. - * Returns the processor if found, otherwise undefined. - */ - protected findThinkingProcessor(chunkText: string, model: Model | undefined): ThoughtProcessor | undefined { - if (!model) return undefined - - const processors: ThoughtProcessor[] = [thinkTagProcessor, glmZeroPreviewProcessor] - return processors.find((p) => p.canProcess(chunkText, model)) - } - - /** - * Returns a function closure that handles incremental reasoning text for a specific stream. - * The returned function processes a chunk, emits THINKING_DELTA for new reasoning, - * and returns the associated content part. - */ - protected handleThinkingTags() { - let memoizedReasoning = '' - // Returns a function that handles a single chunk potentially containing thinking tags - return (chunkText: string, processor: ThoughtProcessor, onChunk: (chunk: any) => void): string => { - // Returns the processed content part - const { reasoning, content } = processor.process(chunkText) - let deltaReasoning = '' - - if (reasoning && reasoning.trim()) { - // Check if the new reasoning starts with the previous one - if (reasoning.startsWith(memoizedReasoning)) { - deltaReasoning = reasoning.substring(memoizedReasoning.length) - } else { - // If not a continuation, send the whole new reasoning - deltaReasoning = reasoning - // console.warn("Thinking content did not start with previous memoized version. Sending full content.") - } - memoizedReasoning = reasoning // Update memoized state - } else { - // If no reasoning, reset memoized state? Let's reset. - memoizedReasoning = '' - } - - if (deltaReasoning) { - onChunk({ type: ChunkType.THINKING_DELTA, text: deltaReasoning }) - } - - return content // Return the content part for TEXT_DELTA emission - } - } } diff --git a/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts b/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts index 437f384cb4..950b2c138f 100644 --- a/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts +++ b/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts @@ -18,6 +18,7 @@ import { } from '@renderer/config/models' import { getStoreSetting } from '@renderer/hooks/useSettings' import i18n from '@renderer/i18n' +import { extractReasoningMiddleware } from '@renderer/middlewares/extractReasoningMiddleware' import { getAssistantSettings, getDefaultModel, getTopNamingModel } from '@renderer/services/AssistantService' import { EVENT_NAMES } from '@renderer/services/EventService' import { @@ -51,6 +52,7 @@ import { import { mcpToolCallResponseToOpenAICompatibleMessage, parseAndCallTools } from '@renderer/utils/mcp-tools' import { findFileBlocks, findImageBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find' import { buildSystemPrompt } from '@renderer/utils/prompt' +import { asyncGeneratorToReadableStream, readableStreamAsyncIterable } from '@renderer/utils/stream' import { isEmpty, takeRight } from 'lodash' import OpenAI, { AzureOpenAI } from 'openai' import { @@ -62,6 +64,11 @@ import { import { CompletionsParams } from '.' import OpenAIProvider from './OpenAIProvider' +// 1. 定义联合类型 +export type OpenAIStreamChunk = + | { type: 'reasoning' | 'text-delta'; textDelta: string } + | { type: 'finish'; finishReason: any; usage: any; delta: any; chunk: any } + export default class OpenAICompatibleProvider extends OpenAIProvider { constructor(provider: Provider) { super(provider) @@ -331,6 +338,10 @@ export default class OpenAICompatibleProvider extends OpenAIProvider { const { contextCount, maxTokens, streamOutput } = getAssistantSettings(assistant) const isEnabledWebSearch = assistant.enableWebSearch || !!assistant.webSearchProviderId messages = addImageFileToContents(messages) + const enableReasoning = + ((isSupportedThinkingTokenModel(model) || isSupportedReasoningEffortModel(model)) && + assistant.settings?.reasoning_effort !== undefined) || + (isReasoningModel(model) && (!isSupportedThinkingTokenModel(model) || !isSupportedReasoningEffortModel(model))) let systemMessage = { role: 'system', content: assistant.prompt || '' } if (isSupportedReasoningEffortOpenAIModel(model)) { systemMessage = { @@ -357,52 +368,7 @@ export default class OpenAICompatibleProvider extends OpenAIProvider { return streamOutput } - let hasReasoningContent = false - let lastChunk = '' - const isReasoningJustDone = ( - delta: OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta & { - reasoning_content?: string - reasoning?: string - thinking?: string - } - ) => { - if (!delta?.content) { - return false - } - - // 检查当前chunk和上一个chunk的组合是否形成###Response标记 - const combinedChunks = lastChunk + delta.content - lastChunk = delta.content - - // 检测思考结束 - if (combinedChunks.includes('###Response') || delta.content === '') { - return true - } - - // 如果有reasoning_content或reasoning,说明是在思考中 - if (delta?.reasoning_content || delta?.reasoning || delta?.thinking) { - hasReasoningContent = true - } - - // 如果之前有reasoning_content或reasoning,现在有普通content,说明思考结束 - return !!(hasReasoningContent && delta.content) - } - - let time_first_token_millsec = 0 - let time_first_token_millsec_delta = 0 - let time_first_content_millsec = 0 const start_time_millsec = new Date().getTime() - console.log( - `completions start_time_millsec ${new Date(start_time_millsec).toLocaleString(undefined, { - year: 'numeric', - month: 'numeric', - day: 'numeric', - hour: 'numeric', - minute: 'numeric', - second: 'numeric', - fractionalSecondDigits: 3 - })}` - ) const lastUserMessage = _messages.findLast((m) => m.role === 'user') const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true) const { signal } = abortController @@ -436,9 +402,7 @@ export default class OpenAICompatibleProvider extends OpenAIProvider { } as ChatCompletionMessageParam) toolResults.forEach((ts) => reqMessages.push(ts as ChatCompletionMessageParam)) - console.debug('[tool] reqMessages before processing', model.id, reqMessages) reqMessages = processReqMessages(model, reqMessages) - console.debug('[tool] reqMessages', model.id, reqMessages) const newStream = await this.sdk.chat.completions // @ts-ignore key is not typed .create( @@ -499,193 +463,192 @@ export default class OpenAICompatibleProvider extends OpenAIProvider { // let isThinkingInContent: ThoughtProcessor | undefined = undefined // const processThinkingChunk = this.handleThinkingTags() let isFirstChunk = true - let isFirstThinkingChunk = true - for await (const chunk of stream) { - if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) { - break - } + let time_first_token_millsec = 0 + let time_first_token_millsec_delta = 0 + let time_first_content_millsec = 0 + let time_thinking_start = 0 - const delta = chunk.choices[0]?.delta - const finishReason = chunk.choices[0]?.finish_reason - - // --- Incremental onChunk calls --- - - // 1. Reasoning Content - const reasoningContent = delta?.reasoning_content || delta?.reasoning - const currentTime = new Date().getTime() // Get current time for each chunk - - if (time_first_token_millsec === 0 && isFirstThinkingChunk && reasoningContent) { - // 记录第一个token的时间 - time_first_token_millsec = currentTime - // 记录第一个token的时间差 - time_first_token_millsec_delta = currentTime - start_time_millsec - console.log( - `completions time_first_token_millsec ${new Date(currentTime).toLocaleString(undefined, { - year: 'numeric', - month: 'numeric', - day: 'numeric', - hour: 'numeric', - minute: 'numeric', - second: 'numeric', - fractionalSecondDigits: 3 - })}` - ) - isFirstThinkingChunk = false - } - if (reasoningContent) { - thinkingContent += reasoningContent - hasReasoningContent = true // Keep track if reasoning occurred - - // Calculate thinking time as time elapsed since start until this chunk - const thinking_time = currentTime - time_first_token_millsec - onChunk({ type: ChunkType.THINKING_DELTA, text: reasoningContent, thinking_millsec: thinking_time }) - } - if (isReasoningJustDone(delta) && time_first_content_millsec === 0) { - time_first_content_millsec = currentTime - final_time_thinking_millsec_delta = time_first_content_millsec - time_first_token_millsec - onChunk({ - type: ChunkType.THINKING_COMPLETE, - text: thinkingContent, - thinking_millsec: final_time_thinking_millsec_delta - }) - - thinkingContent = '' - isFirstThinkingChunk = true - hasReasoningContent = false - } - - // 2. Text Content - if (delta?.content) { - if (isEnabledWebSearch) { - if (delta?.annotations) { - delta.content = convertLinks(delta.content || '', isFirstChunk) - } else if (assistant.model?.provider === 'openrouter') { - delta.content = convertLinksToOpenRouter(delta.content || '', isFirstChunk) - } else if (isZhipuModel(assistant.model)) { - delta.content = convertLinksToZhipu(delta.content || '', isFirstChunk) - } else if (isHunyuanSearchModel(assistant.model)) { - delta.content = convertLinksToHunyuan( - delta.content || '', - chunk.search_info.search_results || [], - isFirstChunk - ) - } - } - // 说明前面没有思考内容 - if (isFirstChunk && time_first_token_millsec === 0 && time_first_token_millsec_delta === 0) { - isFirstChunk = false - time_first_token_millsec = currentTime - time_first_token_millsec_delta = time_first_token_millsec - start_time_millsec - } - content += delta.content // Still accumulate for processToolUses - - // isThinkingInContent = this.findThinkingProcessor(content, model) - // if (isThinkingInContent) { - // processThinkingChunk(content, isThinkingInContent, onChunk) - onChunk({ type: ChunkType.TEXT_DELTA, text: delta.content }) - // } else { - // } - } - // console.log('delta?.finish_reason', delta?.finish_reason) - if (!isEmpty(finishReason) || delta?.annotations) { - onChunk({ type: ChunkType.TEXT_COMPLETE, text: content }) - final_time_completion_millsec_delta = currentTime - start_time_millsec - console.log( - `completions final_time_completion_millsec ${new Date(currentTime).toLocaleString(undefined, { - year: 'numeric', - month: 'numeric', - day: 'numeric', - hour: 'numeric', - minute: 'numeric', - second: 'numeric', - fractionalSecondDigits: 3 - })}` - ) - // 6. Usage (If provided per chunk) - Capture the last known usage - if (chunk.usage) { - // console.log('chunk.usage', chunk.usage) - lastUsage = chunk.usage // Update with the latest usage info - // Send incremental usage update if needed by UI (optional, keep if useful) - // onChunk({ type: 'block_in_progress', response: { usage: chunk.usage } }) + // 1. 初始化中间件 + const reasoningTags = [ + { openingTag: '', closingTag: '', separator: '\n' }, + { openingTag: '###Thinking', closingTag: '###Response', separator: '\n' } + ] + const getAppropriateTag = (model: Model) => { + if (model.id.includes('qwen3')) return reasoningTags[0] + return reasoningTags[0] + } + const reasoningTag = getAppropriateTag(model) + async function* openAIChunkToTextDelta(stream: any): AsyncGenerator { + for await (const chunk of stream) { + if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) { + break } - // 3. Web Search - if (delta?.annotations) { - onChunk({ - type: ChunkType.LLM_WEB_SEARCH_COMPLETE, - llm_web_search: { - results: delta.annotations, - source: WebSearchSource.OPENAI_COMPATIBLE - } - } as LLMWebSearchCompleteChunk) + const delta = chunk.choices[0]?.delta + if (delta?.reasoning_content || delta?.reasoning) { + yield { type: 'reasoning', textDelta: delta.reasoning_content || delta.reasoning } + } + if (delta?.content) { + yield { type: 'text-delta', textDelta: delta.content } } - if (assistant.model?.provider === 'perplexity') { - const citations = chunk.citations - if (citations) { - onChunk({ - type: ChunkType.LLM_WEB_SEARCH_COMPLETE, - llm_web_search: { - results: citations, - source: WebSearchSource.PERPLEXITY - } - } as LLMWebSearchCompleteChunk) - } - } - if (isEnabledWebSearch && isZhipuModel(model) && finishReason === 'stop' && chunk?.web_search) { - onChunk({ - type: ChunkType.LLM_WEB_SEARCH_COMPLETE, - llm_web_search: { - results: chunk.web_search, - source: WebSearchSource.ZHIPU - } - } as LLMWebSearchCompleteChunk) - } - if (isEnabledWebSearch && isHunyuanSearchModel(model) && chunk?.search_info?.search_results) { - onChunk({ - type: ChunkType.LLM_WEB_SEARCH_COMPLETE, - llm_web_search: { - results: chunk.search_info.search_results, - source: WebSearchSource.HUNYUAN - } - } as LLMWebSearchCompleteChunk) + const finishReason = chunk.choices[0]?.finish_reason + if (!isEmpty(finishReason)) { + yield { type: 'finish', finishReason, usage: chunk.usage, delta, chunk } + break } } + } - // --- End of Incremental onChunk calls --- - } // End of for await loop - - // Call processToolUses AFTER the loop finishes processing the main stream content - // Note: parseAndCallTools inside processToolUses should handle its own onChunk for tool responses - await processToolUses(content, idx) - - // Send the final block_complete chunk with accumulated data - onChunk({ - type: ChunkType.BLOCK_COMPLETE, - response: { - // Use the enhanced usage object - usage: lastUsage, - metrics: { - // Get completion tokens from the last usage object if available - completion_tokens: lastUsage?.completion_tokens, - time_completion_millsec: final_time_completion_millsec_delta, - time_first_token_millsec: time_first_token_millsec_delta, - time_thinking_millsec: final_time_thinking_millsec_delta - } - } + // 2. 使用中间件 + const { stream: processedStream } = await extractReasoningMiddleware({ + openingTag: reasoningTag?.openingTag, + closingTag: reasoningTag?.closingTag, + separator: reasoningTag?.separator, + enableReasoning + }).wrapStream({ + doStream: async () => ({ + stream: asyncGeneratorToReadableStream(openAIChunkToTextDelta(stream)) + }) }) - // FIXME: 临时方案,重置时间戳和思考内容 - time_first_token_millsec = 0 - time_first_content_millsec = 0 + // 3. 消费 processedStream,分发 onChunk + for await (const chunk of readableStreamAsyncIterable(processedStream)) { + const currentTime = new Date().getTime() + const delta = chunk.type === 'finish' ? chunk.delta : chunk + const rawChunk = chunk.type === 'finish' ? chunk.chunk : chunk - // OpenAI stream typically doesn't provide a final summary chunk easily. - // We are sending per-chunk usage if available. + switch (chunk.type) { + case 'reasoning': { + if (time_thinking_start === 0) { + time_thinking_start = currentTime + time_first_token_millsec = currentTime + time_first_token_millsec_delta = currentTime - start_time_millsec + } + thinkingContent += chunk.textDelta + const thinking_time = currentTime - time_thinking_start + onChunk({ type: ChunkType.THINKING_DELTA, text: chunk.textDelta, thinking_millsec: thinking_time }) + break + } + case 'text-delta': { + let textDelta = chunk.textDelta + + if (assistant.enableWebSearch && delta) { + const originalDelta = rawChunk?.choices?.[0]?.delta + + if (originalDelta?.annotations) { + textDelta = convertLinks(textDelta, isFirstChunk) + } else if (assistant.model?.provider === 'openrouter') { + textDelta = convertLinksToOpenRouter(textDelta, isFirstChunk) + } else if (isZhipuModel(assistant.model)) { + textDelta = convertLinksToZhipu(textDelta, isFirstChunk) + } else if (isHunyuanSearchModel(assistant.model)) { + const searchResults = rawChunk?.search_info?.search_results || [] + textDelta = convertLinksToHunyuan(textDelta, searchResults, isFirstChunk) + } + } + if (isFirstChunk) { + isFirstChunk = false + if (time_first_token_millsec === 0) { + time_first_token_millsec = currentTime + time_first_token_millsec_delta = currentTime - start_time_millsec + } + } + content += textDelta + if (time_thinking_start > 0 && time_first_content_millsec === 0) { + time_first_content_millsec = currentTime + final_time_thinking_millsec_delta = time_first_content_millsec - time_thinking_start + + onChunk({ + type: ChunkType.THINKING_COMPLETE, + text: thinkingContent, + thinking_millsec: final_time_thinking_millsec_delta + }) + } + onChunk({ type: ChunkType.TEXT_DELTA, text: textDelta }) + break + } + case 'finish': { + const finishReason = chunk.finishReason + const usage = chunk.usage + const originalFinishDelta = chunk.delta + const originalFinishRawChunk = chunk.chunk + + if (!isEmpty(finishReason)) { + onChunk({ type: ChunkType.TEXT_COMPLETE, text: content }) + final_time_completion_millsec_delta = currentTime - start_time_millsec + if (usage) { + lastUsage = usage + } + if (originalFinishDelta?.annotations) { + onChunk({ + type: ChunkType.LLM_WEB_SEARCH_COMPLETE, + llm_web_search: { + results: originalFinishDelta.annotations, + source: WebSearchSource.OPENAI + } + } as LLMWebSearchCompleteChunk) + } + if (assistant.model?.provider === 'perplexity') { + const citations = originalFinishRawChunk.citations + if (citations) { + onChunk({ + type: ChunkType.LLM_WEB_SEARCH_COMPLETE, + llm_web_search: { + results: citations, + source: WebSearchSource.PERPLEXITY + } + } as LLMWebSearchCompleteChunk) + } + } + if ( + isEnabledWebSearch && + isZhipuModel(model) && + finishReason === 'stop' && + originalFinishRawChunk?.web_search + ) { + onChunk({ + type: ChunkType.LLM_WEB_SEARCH_COMPLETE, + llm_web_search: { + results: originalFinishRawChunk.web_search, + source: WebSearchSource.ZHIPU + } + } as LLMWebSearchCompleteChunk) + } + if ( + isEnabledWebSearch && + isHunyuanSearchModel(model) && + originalFinishRawChunk?.search_info?.search_results + ) { + onChunk({ + type: ChunkType.LLM_WEB_SEARCH_COMPLETE, + llm_web_search: { + results: originalFinishRawChunk.search_info.search_results, + source: WebSearchSource.HUNYUAN + } + } as LLMWebSearchCompleteChunk) + } + } + await processToolUses(content, idx) + onChunk({ + type: ChunkType.BLOCK_COMPLETE, + response: { + usage: lastUsage, + metrics: { + completion_tokens: lastUsage?.completion_tokens, + time_completion_millsec: final_time_completion_millsec_delta, + time_first_token_millsec: time_first_token_millsec_delta, + time_thinking_millsec: final_time_thinking_millsec_delta + } + } + }) + break + } + } + } } - console.debug('[completions] reqMessages before processing', model.id, reqMessages) reqMessages = processReqMessages(model, reqMessages) - console.debug('[completions] reqMessages', model.id, reqMessages) // 等待接口返回流 onChunk({ type: ChunkType.LLM_RESPONSE_CREATED }) const stream = await this.sdk.chat.completions @@ -835,7 +798,6 @@ export default class OpenAICompatibleProvider extends OpenAIProvider { await this.checkIsCopilot() - console.debug('[summaries] reqMessages', model.id, [systemMessage, userMessage]) // @ts-ignore key is not typed const response = await this.sdk.chat.completions.create({ model: model.id, @@ -877,10 +839,8 @@ export default class OpenAICompatibleProvider extends OpenAIProvider { role: 'user', content: userMessageContent } - console.debug('[summaryForSearch] reqMessages', model.id, [systemMessage, userMessage]) const lastUserMessage = messages[messages.length - 1] - console.log('lastUserMessage?.id', lastUserMessage?.id) const { abortController, cleanup } = this.createAbortController(lastUserMessage?.id) const { signal } = abortController @@ -989,7 +949,6 @@ export default class OpenAICompatibleProvider extends OpenAIProvider { try { await this.checkIsCopilot() - console.debug('[checkModel] body', model.id, body) if (!stream) { const response = await this.sdk.chat.completions.create(body as ChatCompletionCreateParamsNonStreaming) if (!response?.choices[0].message) { diff --git a/src/renderer/src/utils/formats.ts b/src/renderer/src/utils/formats.ts index 4c7878ef9c..43f539d79f 100644 --- a/src/renderer/src/utils/formats.ts +++ b/src/renderer/src/utils/formats.ts @@ -1,4 +1,3 @@ -import { Model } from '@renderer/types' import type { Message } from '@renderer/types/newMessage' import { findImageBlocks, getMainTextContent } from './messageUtils/find' @@ -99,75 +98,6 @@ export function removeSvgEmptyLines(text: string): string { // return content // } -export interface ThoughtProcessor { - canProcess: (content: string, model?: Model) => boolean - process: (content: string) => { reasoning: string; content: string } -} - -export const glmZeroPreviewProcessor: ThoughtProcessor = { - canProcess: (content: string, model?: Model) => { - if (!model) return false - - const modelId = model.id || '' - const modelName = model.name || '' - const isGLMZeroPreview = - modelId.toLowerCase().includes('glm-zero-preview') || modelName.toLowerCase().includes('glm-zero-preview') - - return isGLMZeroPreview && content.includes('###Thinking') - }, - process: (content: string) => { - const parts = content.split('###') - const thinkingMatch = parts.find((part) => part.trim().startsWith('Thinking')) - const responseMatch = parts.find((part) => part.trim().startsWith('Response')) - - return { - reasoning: thinkingMatch ? thinkingMatch.replace('Thinking', '').trim() : '', - content: responseMatch ? responseMatch.replace('Response', '').trim() : '' - } - } -} - -export const thinkTagProcessor: ThoughtProcessor = { - canProcess: (content: string, model?: Model) => { - if (!model) return false - - return content.startsWith('') || content.includes('') - }, - process: (content: string) => { - // 处理正常闭合的 think 标签 - const thinkPattern = /^(.*?)<\/think>/s - const matches = content.match(thinkPattern) - if (matches) { - return { - reasoning: matches[1].trim(), - content: content.replace(thinkPattern, '').trim() - } - } - - // 处理只有结束标签的情况 - if (content.includes('') && !content.startsWith('')) { - const parts = content.split('') - return { - reasoning: parts[0].trim(), - content: parts.slice(1).join('').trim() - } - } - - // 处理只有开始标签的情况 - if (content.startsWith('')) { - return { - reasoning: content.slice(7).trim(), // 跳过 '' 标签 - content: '' - } - } - - return { - reasoning: '', - content - } - } -} - export function withGenerateImage(message: Message): { content: string; images?: string[] } { const originalContent = getMainTextContent(message) const imagePattern = new RegExp(`!\\[[^\\]]*\\]\\((.*?)\\s*("(?:.*[^"])")?\\s*\\)`) diff --git a/src/renderer/src/utils/getPotentialIndex.ts b/src/renderer/src/utils/getPotentialIndex.ts new file mode 100644 index 0000000000..4cec103316 --- /dev/null +++ b/src/renderer/src/utils/getPotentialIndex.ts @@ -0,0 +1,29 @@ +// Copied from https://github.com/vercel/ai/blob/main/packages/ai/core/util/get-potential-start-index.ts + +/** + * Returns the index of the start of the searchedText in the text, or null if it + * is not found. + */ +export function getPotentialStartIndex(text: string, searchedText: string): number | null { + // Return null immediately if searchedText is empty. + if (searchedText.length === 0) { + return null + } + + // Check if the searchedText exists as a direct substring of text. + const directIndex = text.indexOf(searchedText) + if (directIndex !== -1) { + return directIndex + } + + // Otherwise, look for the largest suffix of "text" that matches + // a prefix of "searchedText". We go from the end of text inward. + for (let i = text.length - 1; i >= 0; i--) { + const suffix = text.substring(i) + if (searchedText.startsWith(suffix)) { + return i + } + } + + return null +} diff --git a/src/renderer/src/utils/stream.ts b/src/renderer/src/utils/stream.ts new file mode 100644 index 0000000000..77119db8f1 --- /dev/null +++ b/src/renderer/src/utils/stream.ts @@ -0,0 +1,25 @@ +export function readableStreamAsyncIterable(stream: ReadableStream): AsyncIterable { + const reader = stream.getReader() + return { + [Symbol.asyncIterator](): AsyncIterator { + return { + async next(): Promise> { + return reader.read() as Promise> + } + } + } + } +} + +export function asyncGeneratorToReadableStream(gen: AsyncGenerator): ReadableStream { + return new ReadableStream({ + async pull(controller) { + const { value, done } = await gen.next() + if (done) { + controller.close() + } else { + controller.enqueue(value) + } + } + }) +}