mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-01-10 23:59:45 +08:00
refactor(middleware): add extractReasoningMiddleware for enhanced text processing (#5637)
This commit is contained in:
parent
06a590328c
commit
f8b4b41b91
118
src/renderer/src/middlewares/extractReasoningMiddleware.ts
Normal file
118
src/renderer/src/middlewares/extractReasoningMiddleware.ts
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
// Modified from https://github.com/vercel/ai/blob/845080d80b8538bb9c7e527d2213acb5f33ac9c2/packages/ai/core/middleware/extract-reasoning-middleware.ts
|
||||||
|
|
||||||
|
import { getPotentialStartIndex } from '../utils/getPotentialIndex'
|
||||||
|
|
||||||
|
export interface ExtractReasoningMiddlewareOptions {
|
||||||
|
openingTag: string
|
||||||
|
closingTag: string
|
||||||
|
separator?: string
|
||||||
|
enableReasoning?: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
function escapeRegExp(str: string) {
|
||||||
|
return str.replace(/[.*+?^${}()|[\\]\\]/g, '\\$&')
|
||||||
|
}
|
||||||
|
|
||||||
|
// 支持泛型 T,默认 T = { type: string; textDelta: string }
|
||||||
|
export function extractReasoningMiddleware<T extends { type: string } = { type: string; textDelta: string }>({
|
||||||
|
openingTag,
|
||||||
|
closingTag,
|
||||||
|
separator = '\n',
|
||||||
|
enableReasoning
|
||||||
|
}: ExtractReasoningMiddlewareOptions) {
|
||||||
|
const openingTagEscaped = escapeRegExp(openingTag)
|
||||||
|
const closingTagEscaped = escapeRegExp(closingTag)
|
||||||
|
|
||||||
|
return {
|
||||||
|
wrapGenerate: async ({ doGenerate }: { doGenerate: () => Promise<{ text: string } & Record<string, any>> }) => {
|
||||||
|
const { text: rawText, ...rest } = await doGenerate()
|
||||||
|
if (rawText == null) {
|
||||||
|
return { text: rawText, ...rest }
|
||||||
|
}
|
||||||
|
const text = rawText
|
||||||
|
const regexp = new RegExp(`${openingTagEscaped}(.*?)${closingTagEscaped}`, 'gs')
|
||||||
|
const matches = Array.from(text.matchAll(regexp))
|
||||||
|
if (!matches.length) {
|
||||||
|
return { text, ...rest }
|
||||||
|
}
|
||||||
|
const reasoning = matches.map((match: RegExpMatchArray) => match[1]).join(separator)
|
||||||
|
let textWithoutReasoning = text
|
||||||
|
for (let i = matches.length - 1; i >= 0; i--) {
|
||||||
|
const match = matches[i] as RegExpMatchArray
|
||||||
|
const beforeMatch = textWithoutReasoning.slice(0, match.index as number)
|
||||||
|
const afterMatch = textWithoutReasoning.slice((match.index as number) + match[0].length)
|
||||||
|
textWithoutReasoning =
|
||||||
|
beforeMatch + (beforeMatch.length > 0 && afterMatch.length > 0 ? separator : '') + afterMatch
|
||||||
|
}
|
||||||
|
return { ...rest, text: textWithoutReasoning, reasoning }
|
||||||
|
},
|
||||||
|
wrapStream: async ({
|
||||||
|
doStream
|
||||||
|
}: {
|
||||||
|
doStream: () => Promise<{ stream: ReadableStream<T> } & Record<string, any>>
|
||||||
|
}) => {
|
||||||
|
const { stream, ...rest } = await doStream()
|
||||||
|
if (!enableReasoning) {
|
||||||
|
return {
|
||||||
|
stream,
|
||||||
|
...rest
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let isFirstReasoning = true
|
||||||
|
let isFirstText = true
|
||||||
|
let afterSwitch = false
|
||||||
|
let isReasoning = false
|
||||||
|
let buffer = ''
|
||||||
|
return {
|
||||||
|
stream: stream.pipeThrough(
|
||||||
|
new TransformStream<T, T>({
|
||||||
|
transform: (chunk, controller) => {
|
||||||
|
if (chunk.type !== 'text-delta') {
|
||||||
|
controller.enqueue(chunk)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// @ts-expect-error: textDelta 只在 text-delta/reasoning chunk 上
|
||||||
|
buffer += chunk.textDelta
|
||||||
|
function publish(text: string) {
|
||||||
|
if (text.length > 0) {
|
||||||
|
const prefix = afterSwitch && (isReasoning ? !isFirstReasoning : !isFirstText) ? separator : ''
|
||||||
|
controller.enqueue({
|
||||||
|
...chunk,
|
||||||
|
type: isReasoning ? 'reasoning' : 'text-delta',
|
||||||
|
textDelta: prefix + text
|
||||||
|
})
|
||||||
|
afterSwitch = false
|
||||||
|
if (isReasoning) {
|
||||||
|
isFirstReasoning = false
|
||||||
|
} else {
|
||||||
|
isFirstText = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (true) {
|
||||||
|
const nextTag = isReasoning ? closingTag : openingTag
|
||||||
|
const startIndex = getPotentialStartIndex(buffer, nextTag)
|
||||||
|
if (startIndex == null) {
|
||||||
|
publish(buffer)
|
||||||
|
buffer = ''
|
||||||
|
break
|
||||||
|
}
|
||||||
|
publish(buffer.slice(0, startIndex))
|
||||||
|
const foundFullMatch = startIndex + nextTag.length <= buffer.length
|
||||||
|
if (foundFullMatch) {
|
||||||
|
buffer = buffer.slice(startIndex + nextTag.length)
|
||||||
|
isReasoning = !isReasoning
|
||||||
|
afterSwitch = true
|
||||||
|
} else {
|
||||||
|
buffer = buffer.slice(startIndex)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
),
|
||||||
|
...rest
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -15,7 +15,6 @@ import type { Message } from '@renderer/types/newMessage'
|
|||||||
import { delay, isJSON, parseJSON } from '@renderer/utils'
|
import { delay, isJSON, parseJSON } from '@renderer/utils'
|
||||||
import { addAbortController, removeAbortController } from '@renderer/utils/abortController'
|
import { addAbortController, removeAbortController } from '@renderer/utils/abortController'
|
||||||
import { formatApiHost } from '@renderer/utils/api'
|
import { formatApiHost } from '@renderer/utils/api'
|
||||||
import { glmZeroPreviewProcessor, thinkTagProcessor, ThoughtProcessor } from '@renderer/utils/formats'
|
|
||||||
import { getMainTextContent } from '@renderer/utils/messageUtils/find'
|
import { getMainTextContent } from '@renderer/utils/messageUtils/find'
|
||||||
import { isEmpty } from 'lodash'
|
import { isEmpty } from 'lodash'
|
||||||
import type OpenAI from 'openai'
|
import type OpenAI from 'openai'
|
||||||
@ -230,51 +229,4 @@ export default abstract class BaseProvider {
|
|||||||
cleanup
|
cleanup
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Finds the appropriate thinking processor for a given text chunk and model.
|
|
||||||
* Returns the processor if found, otherwise undefined.
|
|
||||||
*/
|
|
||||||
protected findThinkingProcessor(chunkText: string, model: Model | undefined): ThoughtProcessor | undefined {
|
|
||||||
if (!model) return undefined
|
|
||||||
|
|
||||||
const processors: ThoughtProcessor[] = [thinkTagProcessor, glmZeroPreviewProcessor]
|
|
||||||
return processors.find((p) => p.canProcess(chunkText, model))
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a function closure that handles incremental reasoning text for a specific stream.
|
|
||||||
* The returned function processes a chunk, emits THINKING_DELTA for new reasoning,
|
|
||||||
* and returns the associated content part.
|
|
||||||
*/
|
|
||||||
protected handleThinkingTags() {
|
|
||||||
let memoizedReasoning = ''
|
|
||||||
// Returns a function that handles a single chunk potentially containing thinking tags
|
|
||||||
return (chunkText: string, processor: ThoughtProcessor, onChunk: (chunk: any) => void): string => {
|
|
||||||
// Returns the processed content part
|
|
||||||
const { reasoning, content } = processor.process(chunkText)
|
|
||||||
let deltaReasoning = ''
|
|
||||||
|
|
||||||
if (reasoning && reasoning.trim()) {
|
|
||||||
// Check if the new reasoning starts with the previous one
|
|
||||||
if (reasoning.startsWith(memoizedReasoning)) {
|
|
||||||
deltaReasoning = reasoning.substring(memoizedReasoning.length)
|
|
||||||
} else {
|
|
||||||
// If not a continuation, send the whole new reasoning
|
|
||||||
deltaReasoning = reasoning
|
|
||||||
// console.warn("Thinking content did not start with previous memoized version. Sending full content.")
|
|
||||||
}
|
|
||||||
memoizedReasoning = reasoning // Update memoized state
|
|
||||||
} else {
|
|
||||||
// If no reasoning, reset memoized state? Let's reset.
|
|
||||||
memoizedReasoning = ''
|
|
||||||
}
|
|
||||||
|
|
||||||
if (deltaReasoning) {
|
|
||||||
onChunk({ type: ChunkType.THINKING_DELTA, text: deltaReasoning })
|
|
||||||
}
|
|
||||||
|
|
||||||
return content // Return the content part for TEXT_DELTA emission
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -18,6 +18,7 @@ import {
|
|||||||
} from '@renderer/config/models'
|
} from '@renderer/config/models'
|
||||||
import { getStoreSetting } from '@renderer/hooks/useSettings'
|
import { getStoreSetting } from '@renderer/hooks/useSettings'
|
||||||
import i18n from '@renderer/i18n'
|
import i18n from '@renderer/i18n'
|
||||||
|
import { extractReasoningMiddleware } from '@renderer/middlewares/extractReasoningMiddleware'
|
||||||
import { getAssistantSettings, getDefaultModel, getTopNamingModel } from '@renderer/services/AssistantService'
|
import { getAssistantSettings, getDefaultModel, getTopNamingModel } from '@renderer/services/AssistantService'
|
||||||
import { EVENT_NAMES } from '@renderer/services/EventService'
|
import { EVENT_NAMES } from '@renderer/services/EventService'
|
||||||
import {
|
import {
|
||||||
@ -51,6 +52,7 @@ import {
|
|||||||
import { mcpToolCallResponseToOpenAICompatibleMessage, parseAndCallTools } from '@renderer/utils/mcp-tools'
|
import { mcpToolCallResponseToOpenAICompatibleMessage, parseAndCallTools } from '@renderer/utils/mcp-tools'
|
||||||
import { findFileBlocks, findImageBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find'
|
import { findFileBlocks, findImageBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find'
|
||||||
import { buildSystemPrompt } from '@renderer/utils/prompt'
|
import { buildSystemPrompt } from '@renderer/utils/prompt'
|
||||||
|
import { asyncGeneratorToReadableStream, readableStreamAsyncIterable } from '@renderer/utils/stream'
|
||||||
import { isEmpty, takeRight } from 'lodash'
|
import { isEmpty, takeRight } from 'lodash'
|
||||||
import OpenAI, { AzureOpenAI } from 'openai'
|
import OpenAI, { AzureOpenAI } from 'openai'
|
||||||
import {
|
import {
|
||||||
@ -62,6 +64,11 @@ import {
|
|||||||
import { CompletionsParams } from '.'
|
import { CompletionsParams } from '.'
|
||||||
import OpenAIProvider from './OpenAIProvider'
|
import OpenAIProvider from './OpenAIProvider'
|
||||||
|
|
||||||
|
// 1. 定义联合类型
|
||||||
|
export type OpenAIStreamChunk =
|
||||||
|
| { type: 'reasoning' | 'text-delta'; textDelta: string }
|
||||||
|
| { type: 'finish'; finishReason: any; usage: any; delta: any; chunk: any }
|
||||||
|
|
||||||
export default class OpenAICompatibleProvider extends OpenAIProvider {
|
export default class OpenAICompatibleProvider extends OpenAIProvider {
|
||||||
constructor(provider: Provider) {
|
constructor(provider: Provider) {
|
||||||
super(provider)
|
super(provider)
|
||||||
@ -331,6 +338,10 @@ export default class OpenAICompatibleProvider extends OpenAIProvider {
|
|||||||
const { contextCount, maxTokens, streamOutput } = getAssistantSettings(assistant)
|
const { contextCount, maxTokens, streamOutput } = getAssistantSettings(assistant)
|
||||||
const isEnabledWebSearch = assistant.enableWebSearch || !!assistant.webSearchProviderId
|
const isEnabledWebSearch = assistant.enableWebSearch || !!assistant.webSearchProviderId
|
||||||
messages = addImageFileToContents(messages)
|
messages = addImageFileToContents(messages)
|
||||||
|
const enableReasoning =
|
||||||
|
((isSupportedThinkingTokenModel(model) || isSupportedReasoningEffortModel(model)) &&
|
||||||
|
assistant.settings?.reasoning_effort !== undefined) ||
|
||||||
|
(isReasoningModel(model) && (!isSupportedThinkingTokenModel(model) || !isSupportedReasoningEffortModel(model)))
|
||||||
let systemMessage = { role: 'system', content: assistant.prompt || '' }
|
let systemMessage = { role: 'system', content: assistant.prompt || '' }
|
||||||
if (isSupportedReasoningEffortOpenAIModel(model)) {
|
if (isSupportedReasoningEffortOpenAIModel(model)) {
|
||||||
systemMessage = {
|
systemMessage = {
|
||||||
@ -357,52 +368,7 @@ export default class OpenAICompatibleProvider extends OpenAIProvider {
|
|||||||
return streamOutput
|
return streamOutput
|
||||||
}
|
}
|
||||||
|
|
||||||
let hasReasoningContent = false
|
|
||||||
let lastChunk = ''
|
|
||||||
const isReasoningJustDone = (
|
|
||||||
delta: OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta & {
|
|
||||||
reasoning_content?: string
|
|
||||||
reasoning?: string
|
|
||||||
thinking?: string
|
|
||||||
}
|
|
||||||
) => {
|
|
||||||
if (!delta?.content) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// 检查当前chunk和上一个chunk的组合是否形成###Response标记
|
|
||||||
const combinedChunks = lastChunk + delta.content
|
|
||||||
lastChunk = delta.content
|
|
||||||
|
|
||||||
// 检测思考结束
|
|
||||||
if (combinedChunks.includes('###Response') || delta.content === '</think>') {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// 如果有reasoning_content或reasoning,说明是在思考中
|
|
||||||
if (delta?.reasoning_content || delta?.reasoning || delta?.thinking) {
|
|
||||||
hasReasoningContent = true
|
|
||||||
}
|
|
||||||
|
|
||||||
// 如果之前有reasoning_content或reasoning,现在有普通content,说明思考结束
|
|
||||||
return !!(hasReasoningContent && delta.content)
|
|
||||||
}
|
|
||||||
|
|
||||||
let time_first_token_millsec = 0
|
|
||||||
let time_first_token_millsec_delta = 0
|
|
||||||
let time_first_content_millsec = 0
|
|
||||||
const start_time_millsec = new Date().getTime()
|
const start_time_millsec = new Date().getTime()
|
||||||
console.log(
|
|
||||||
`completions start_time_millsec ${new Date(start_time_millsec).toLocaleString(undefined, {
|
|
||||||
year: 'numeric',
|
|
||||||
month: 'numeric',
|
|
||||||
day: 'numeric',
|
|
||||||
hour: 'numeric',
|
|
||||||
minute: 'numeric',
|
|
||||||
second: 'numeric',
|
|
||||||
fractionalSecondDigits: 3
|
|
||||||
})}`
|
|
||||||
)
|
|
||||||
const lastUserMessage = _messages.findLast((m) => m.role === 'user')
|
const lastUserMessage = _messages.findLast((m) => m.role === 'user')
|
||||||
const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
|
const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
|
||||||
const { signal } = abortController
|
const { signal } = abortController
|
||||||
@ -436,9 +402,7 @@ export default class OpenAICompatibleProvider extends OpenAIProvider {
|
|||||||
} as ChatCompletionMessageParam)
|
} as ChatCompletionMessageParam)
|
||||||
toolResults.forEach((ts) => reqMessages.push(ts as ChatCompletionMessageParam))
|
toolResults.forEach((ts) => reqMessages.push(ts as ChatCompletionMessageParam))
|
||||||
|
|
||||||
console.debug('[tool] reqMessages before processing', model.id, reqMessages)
|
|
||||||
reqMessages = processReqMessages(model, reqMessages)
|
reqMessages = processReqMessages(model, reqMessages)
|
||||||
console.debug('[tool] reqMessages', model.id, reqMessages)
|
|
||||||
const newStream = await this.sdk.chat.completions
|
const newStream = await this.sdk.chat.completions
|
||||||
// @ts-ignore key is not typed
|
// @ts-ignore key is not typed
|
||||||
.create(
|
.create(
|
||||||
@ -499,193 +463,192 @@ export default class OpenAICompatibleProvider extends OpenAIProvider {
|
|||||||
// let isThinkingInContent: ThoughtProcessor | undefined = undefined
|
// let isThinkingInContent: ThoughtProcessor | undefined = undefined
|
||||||
// const processThinkingChunk = this.handleThinkingTags()
|
// const processThinkingChunk = this.handleThinkingTags()
|
||||||
let isFirstChunk = true
|
let isFirstChunk = true
|
||||||
let isFirstThinkingChunk = true
|
let time_first_token_millsec = 0
|
||||||
for await (const chunk of stream) {
|
let time_first_token_millsec_delta = 0
|
||||||
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
|
let time_first_content_millsec = 0
|
||||||
break
|
let time_thinking_start = 0
|
||||||
}
|
|
||||||
|
|
||||||
const delta = chunk.choices[0]?.delta
|
// 1. 初始化中间件
|
||||||
const finishReason = chunk.choices[0]?.finish_reason
|
const reasoningTags = [
|
||||||
|
{ openingTag: '<think>', closingTag: '</think>', separator: '\n' },
|
||||||
// --- Incremental onChunk calls ---
|
{ openingTag: '###Thinking', closingTag: '###Response', separator: '\n' }
|
||||||
|
]
|
||||||
// 1. Reasoning Content
|
const getAppropriateTag = (model: Model) => {
|
||||||
const reasoningContent = delta?.reasoning_content || delta?.reasoning
|
if (model.id.includes('qwen3')) return reasoningTags[0]
|
||||||
const currentTime = new Date().getTime() // Get current time for each chunk
|
return reasoningTags[0]
|
||||||
|
}
|
||||||
if (time_first_token_millsec === 0 && isFirstThinkingChunk && reasoningContent) {
|
const reasoningTag = getAppropriateTag(model)
|
||||||
// 记录第一个token的时间
|
async function* openAIChunkToTextDelta(stream: any): AsyncGenerator<OpenAIStreamChunk> {
|
||||||
time_first_token_millsec = currentTime
|
for await (const chunk of stream) {
|
||||||
// 记录第一个token的时间差
|
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
|
||||||
time_first_token_millsec_delta = currentTime - start_time_millsec
|
break
|
||||||
console.log(
|
|
||||||
`completions time_first_token_millsec ${new Date(currentTime).toLocaleString(undefined, {
|
|
||||||
year: 'numeric',
|
|
||||||
month: 'numeric',
|
|
||||||
day: 'numeric',
|
|
||||||
hour: 'numeric',
|
|
||||||
minute: 'numeric',
|
|
||||||
second: 'numeric',
|
|
||||||
fractionalSecondDigits: 3
|
|
||||||
})}`
|
|
||||||
)
|
|
||||||
isFirstThinkingChunk = false
|
|
||||||
}
|
|
||||||
if (reasoningContent) {
|
|
||||||
thinkingContent += reasoningContent
|
|
||||||
hasReasoningContent = true // Keep track if reasoning occurred
|
|
||||||
|
|
||||||
// Calculate thinking time as time elapsed since start until this chunk
|
|
||||||
const thinking_time = currentTime - time_first_token_millsec
|
|
||||||
onChunk({ type: ChunkType.THINKING_DELTA, text: reasoningContent, thinking_millsec: thinking_time })
|
|
||||||
}
|
|
||||||
if (isReasoningJustDone(delta) && time_first_content_millsec === 0) {
|
|
||||||
time_first_content_millsec = currentTime
|
|
||||||
final_time_thinking_millsec_delta = time_first_content_millsec - time_first_token_millsec
|
|
||||||
onChunk({
|
|
||||||
type: ChunkType.THINKING_COMPLETE,
|
|
||||||
text: thinkingContent,
|
|
||||||
thinking_millsec: final_time_thinking_millsec_delta
|
|
||||||
})
|
|
||||||
|
|
||||||
thinkingContent = ''
|
|
||||||
isFirstThinkingChunk = true
|
|
||||||
hasReasoningContent = false
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. Text Content
|
|
||||||
if (delta?.content) {
|
|
||||||
if (isEnabledWebSearch) {
|
|
||||||
if (delta?.annotations) {
|
|
||||||
delta.content = convertLinks(delta.content || '', isFirstChunk)
|
|
||||||
} else if (assistant.model?.provider === 'openrouter') {
|
|
||||||
delta.content = convertLinksToOpenRouter(delta.content || '', isFirstChunk)
|
|
||||||
} else if (isZhipuModel(assistant.model)) {
|
|
||||||
delta.content = convertLinksToZhipu(delta.content || '', isFirstChunk)
|
|
||||||
} else if (isHunyuanSearchModel(assistant.model)) {
|
|
||||||
delta.content = convertLinksToHunyuan(
|
|
||||||
delta.content || '',
|
|
||||||
chunk.search_info.search_results || [],
|
|
||||||
isFirstChunk
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// 说明前面没有思考内容
|
|
||||||
if (isFirstChunk && time_first_token_millsec === 0 && time_first_token_millsec_delta === 0) {
|
|
||||||
isFirstChunk = false
|
|
||||||
time_first_token_millsec = currentTime
|
|
||||||
time_first_token_millsec_delta = time_first_token_millsec - start_time_millsec
|
|
||||||
}
|
|
||||||
content += delta.content // Still accumulate for processToolUses
|
|
||||||
|
|
||||||
// isThinkingInContent = this.findThinkingProcessor(content, model)
|
|
||||||
// if (isThinkingInContent) {
|
|
||||||
// processThinkingChunk(content, isThinkingInContent, onChunk)
|
|
||||||
onChunk({ type: ChunkType.TEXT_DELTA, text: delta.content })
|
|
||||||
// } else {
|
|
||||||
// }
|
|
||||||
}
|
|
||||||
// console.log('delta?.finish_reason', delta?.finish_reason)
|
|
||||||
if (!isEmpty(finishReason) || delta?.annotations) {
|
|
||||||
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
|
|
||||||
final_time_completion_millsec_delta = currentTime - start_time_millsec
|
|
||||||
console.log(
|
|
||||||
`completions final_time_completion_millsec ${new Date(currentTime).toLocaleString(undefined, {
|
|
||||||
year: 'numeric',
|
|
||||||
month: 'numeric',
|
|
||||||
day: 'numeric',
|
|
||||||
hour: 'numeric',
|
|
||||||
minute: 'numeric',
|
|
||||||
second: 'numeric',
|
|
||||||
fractionalSecondDigits: 3
|
|
||||||
})}`
|
|
||||||
)
|
|
||||||
// 6. Usage (If provided per chunk) - Capture the last known usage
|
|
||||||
if (chunk.usage) {
|
|
||||||
// console.log('chunk.usage', chunk.usage)
|
|
||||||
lastUsage = chunk.usage // Update with the latest usage info
|
|
||||||
// Send incremental usage update if needed by UI (optional, keep if useful)
|
|
||||||
// onChunk({ type: 'block_in_progress', response: { usage: chunk.usage } })
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. Web Search
|
const delta = chunk.choices[0]?.delta
|
||||||
if (delta?.annotations) {
|
if (delta?.reasoning_content || delta?.reasoning) {
|
||||||
onChunk({
|
yield { type: 'reasoning', textDelta: delta.reasoning_content || delta.reasoning }
|
||||||
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
|
}
|
||||||
llm_web_search: {
|
if (delta?.content) {
|
||||||
results: delta.annotations,
|
yield { type: 'text-delta', textDelta: delta.content }
|
||||||
source: WebSearchSource.OPENAI_COMPATIBLE
|
|
||||||
}
|
|
||||||
} as LLMWebSearchCompleteChunk)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (assistant.model?.provider === 'perplexity') {
|
const finishReason = chunk.choices[0]?.finish_reason
|
||||||
const citations = chunk.citations
|
if (!isEmpty(finishReason)) {
|
||||||
if (citations) {
|
yield { type: 'finish', finishReason, usage: chunk.usage, delta, chunk }
|
||||||
onChunk({
|
break
|
||||||
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
|
|
||||||
llm_web_search: {
|
|
||||||
results: citations,
|
|
||||||
source: WebSearchSource.PERPLEXITY
|
|
||||||
}
|
|
||||||
} as LLMWebSearchCompleteChunk)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (isEnabledWebSearch && isZhipuModel(model) && finishReason === 'stop' && chunk?.web_search) {
|
|
||||||
onChunk({
|
|
||||||
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
|
|
||||||
llm_web_search: {
|
|
||||||
results: chunk.web_search,
|
|
||||||
source: WebSearchSource.ZHIPU
|
|
||||||
}
|
|
||||||
} as LLMWebSearchCompleteChunk)
|
|
||||||
}
|
|
||||||
if (isEnabledWebSearch && isHunyuanSearchModel(model) && chunk?.search_info?.search_results) {
|
|
||||||
onChunk({
|
|
||||||
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
|
|
||||||
llm_web_search: {
|
|
||||||
results: chunk.search_info.search_results,
|
|
||||||
source: WebSearchSource.HUNYUAN
|
|
||||||
}
|
|
||||||
} as LLMWebSearchCompleteChunk)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// --- End of Incremental onChunk calls ---
|
// 2. 使用中间件
|
||||||
} // End of for await loop
|
const { stream: processedStream } = await extractReasoningMiddleware<OpenAIStreamChunk>({
|
||||||
|
openingTag: reasoningTag?.openingTag,
|
||||||
// Call processToolUses AFTER the loop finishes processing the main stream content
|
closingTag: reasoningTag?.closingTag,
|
||||||
// Note: parseAndCallTools inside processToolUses should handle its own onChunk for tool responses
|
separator: reasoningTag?.separator,
|
||||||
await processToolUses(content, idx)
|
enableReasoning
|
||||||
|
}).wrapStream({
|
||||||
// Send the final block_complete chunk with accumulated data
|
doStream: async () => ({
|
||||||
onChunk({
|
stream: asyncGeneratorToReadableStream(openAIChunkToTextDelta(stream))
|
||||||
type: ChunkType.BLOCK_COMPLETE,
|
})
|
||||||
response: {
|
|
||||||
// Use the enhanced usage object
|
|
||||||
usage: lastUsage,
|
|
||||||
metrics: {
|
|
||||||
// Get completion tokens from the last usage object if available
|
|
||||||
completion_tokens: lastUsage?.completion_tokens,
|
|
||||||
time_completion_millsec: final_time_completion_millsec_delta,
|
|
||||||
time_first_token_millsec: time_first_token_millsec_delta,
|
|
||||||
time_thinking_millsec: final_time_thinking_millsec_delta
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
|
|
||||||
// FIXME: 临时方案,重置时间戳和思考内容
|
// 3. 消费 processedStream,分发 onChunk
|
||||||
time_first_token_millsec = 0
|
for await (const chunk of readableStreamAsyncIterable(processedStream)) {
|
||||||
time_first_content_millsec = 0
|
const currentTime = new Date().getTime()
|
||||||
|
const delta = chunk.type === 'finish' ? chunk.delta : chunk
|
||||||
|
const rawChunk = chunk.type === 'finish' ? chunk.chunk : chunk
|
||||||
|
|
||||||
// OpenAI stream typically doesn't provide a final summary chunk easily.
|
switch (chunk.type) {
|
||||||
// We are sending per-chunk usage if available.
|
case 'reasoning': {
|
||||||
|
if (time_thinking_start === 0) {
|
||||||
|
time_thinking_start = currentTime
|
||||||
|
time_first_token_millsec = currentTime
|
||||||
|
time_first_token_millsec_delta = currentTime - start_time_millsec
|
||||||
|
}
|
||||||
|
thinkingContent += chunk.textDelta
|
||||||
|
const thinking_time = currentTime - time_thinking_start
|
||||||
|
onChunk({ type: ChunkType.THINKING_DELTA, text: chunk.textDelta, thinking_millsec: thinking_time })
|
||||||
|
break
|
||||||
|
}
|
||||||
|
case 'text-delta': {
|
||||||
|
let textDelta = chunk.textDelta
|
||||||
|
|
||||||
|
if (assistant.enableWebSearch && delta) {
|
||||||
|
const originalDelta = rawChunk?.choices?.[0]?.delta
|
||||||
|
|
||||||
|
if (originalDelta?.annotations) {
|
||||||
|
textDelta = convertLinks(textDelta, isFirstChunk)
|
||||||
|
} else if (assistant.model?.provider === 'openrouter') {
|
||||||
|
textDelta = convertLinksToOpenRouter(textDelta, isFirstChunk)
|
||||||
|
} else if (isZhipuModel(assistant.model)) {
|
||||||
|
textDelta = convertLinksToZhipu(textDelta, isFirstChunk)
|
||||||
|
} else if (isHunyuanSearchModel(assistant.model)) {
|
||||||
|
const searchResults = rawChunk?.search_info?.search_results || []
|
||||||
|
textDelta = convertLinksToHunyuan(textDelta, searchResults, isFirstChunk)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (isFirstChunk) {
|
||||||
|
isFirstChunk = false
|
||||||
|
if (time_first_token_millsec === 0) {
|
||||||
|
time_first_token_millsec = currentTime
|
||||||
|
time_first_token_millsec_delta = currentTime - start_time_millsec
|
||||||
|
}
|
||||||
|
}
|
||||||
|
content += textDelta
|
||||||
|
if (time_thinking_start > 0 && time_first_content_millsec === 0) {
|
||||||
|
time_first_content_millsec = currentTime
|
||||||
|
final_time_thinking_millsec_delta = time_first_content_millsec - time_thinking_start
|
||||||
|
|
||||||
|
onChunk({
|
||||||
|
type: ChunkType.THINKING_COMPLETE,
|
||||||
|
text: thinkingContent,
|
||||||
|
thinking_millsec: final_time_thinking_millsec_delta
|
||||||
|
})
|
||||||
|
}
|
||||||
|
onChunk({ type: ChunkType.TEXT_DELTA, text: textDelta })
|
||||||
|
break
|
||||||
|
}
|
||||||
|
case 'finish': {
|
||||||
|
const finishReason = chunk.finishReason
|
||||||
|
const usage = chunk.usage
|
||||||
|
const originalFinishDelta = chunk.delta
|
||||||
|
const originalFinishRawChunk = chunk.chunk
|
||||||
|
|
||||||
|
if (!isEmpty(finishReason)) {
|
||||||
|
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
|
||||||
|
final_time_completion_millsec_delta = currentTime - start_time_millsec
|
||||||
|
if (usage) {
|
||||||
|
lastUsage = usage
|
||||||
|
}
|
||||||
|
if (originalFinishDelta?.annotations) {
|
||||||
|
onChunk({
|
||||||
|
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
|
||||||
|
llm_web_search: {
|
||||||
|
results: originalFinishDelta.annotations,
|
||||||
|
source: WebSearchSource.OPENAI
|
||||||
|
}
|
||||||
|
} as LLMWebSearchCompleteChunk)
|
||||||
|
}
|
||||||
|
if (assistant.model?.provider === 'perplexity') {
|
||||||
|
const citations = originalFinishRawChunk.citations
|
||||||
|
if (citations) {
|
||||||
|
onChunk({
|
||||||
|
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
|
||||||
|
llm_web_search: {
|
||||||
|
results: citations,
|
||||||
|
source: WebSearchSource.PERPLEXITY
|
||||||
|
}
|
||||||
|
} as LLMWebSearchCompleteChunk)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
isEnabledWebSearch &&
|
||||||
|
isZhipuModel(model) &&
|
||||||
|
finishReason === 'stop' &&
|
||||||
|
originalFinishRawChunk?.web_search
|
||||||
|
) {
|
||||||
|
onChunk({
|
||||||
|
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
|
||||||
|
llm_web_search: {
|
||||||
|
results: originalFinishRawChunk.web_search,
|
||||||
|
source: WebSearchSource.ZHIPU
|
||||||
|
}
|
||||||
|
} as LLMWebSearchCompleteChunk)
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
isEnabledWebSearch &&
|
||||||
|
isHunyuanSearchModel(model) &&
|
||||||
|
originalFinishRawChunk?.search_info?.search_results
|
||||||
|
) {
|
||||||
|
onChunk({
|
||||||
|
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
|
||||||
|
llm_web_search: {
|
||||||
|
results: originalFinishRawChunk.search_info.search_results,
|
||||||
|
source: WebSearchSource.HUNYUAN
|
||||||
|
}
|
||||||
|
} as LLMWebSearchCompleteChunk)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
await processToolUses(content, idx)
|
||||||
|
onChunk({
|
||||||
|
type: ChunkType.BLOCK_COMPLETE,
|
||||||
|
response: {
|
||||||
|
usage: lastUsage,
|
||||||
|
metrics: {
|
||||||
|
completion_tokens: lastUsage?.completion_tokens,
|
||||||
|
time_completion_millsec: final_time_completion_millsec_delta,
|
||||||
|
time_first_token_millsec: time_first_token_millsec_delta,
|
||||||
|
time_thinking_millsec: final_time_thinking_millsec_delta
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
console.debug('[completions] reqMessages before processing', model.id, reqMessages)
|
|
||||||
reqMessages = processReqMessages(model, reqMessages)
|
reqMessages = processReqMessages(model, reqMessages)
|
||||||
console.debug('[completions] reqMessages', model.id, reqMessages)
|
|
||||||
// 等待接口返回流
|
// 等待接口返回流
|
||||||
onChunk({ type: ChunkType.LLM_RESPONSE_CREATED })
|
onChunk({ type: ChunkType.LLM_RESPONSE_CREATED })
|
||||||
const stream = await this.sdk.chat.completions
|
const stream = await this.sdk.chat.completions
|
||||||
@ -835,7 +798,6 @@ export default class OpenAICompatibleProvider extends OpenAIProvider {
|
|||||||
|
|
||||||
await this.checkIsCopilot()
|
await this.checkIsCopilot()
|
||||||
|
|
||||||
console.debug('[summaries] reqMessages', model.id, [systemMessage, userMessage])
|
|
||||||
// @ts-ignore key is not typed
|
// @ts-ignore key is not typed
|
||||||
const response = await this.sdk.chat.completions.create({
|
const response = await this.sdk.chat.completions.create({
|
||||||
model: model.id,
|
model: model.id,
|
||||||
@ -877,10 +839,8 @@ export default class OpenAICompatibleProvider extends OpenAIProvider {
|
|||||||
role: 'user',
|
role: 'user',
|
||||||
content: userMessageContent
|
content: userMessageContent
|
||||||
}
|
}
|
||||||
console.debug('[summaryForSearch] reqMessages', model.id, [systemMessage, userMessage])
|
|
||||||
|
|
||||||
const lastUserMessage = messages[messages.length - 1]
|
const lastUserMessage = messages[messages.length - 1]
|
||||||
console.log('lastUserMessage?.id', lastUserMessage?.id)
|
|
||||||
const { abortController, cleanup } = this.createAbortController(lastUserMessage?.id)
|
const { abortController, cleanup } = this.createAbortController(lastUserMessage?.id)
|
||||||
const { signal } = abortController
|
const { signal } = abortController
|
||||||
|
|
||||||
@ -989,7 +949,6 @@ export default class OpenAICompatibleProvider extends OpenAIProvider {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
await this.checkIsCopilot()
|
await this.checkIsCopilot()
|
||||||
console.debug('[checkModel] body', model.id, body)
|
|
||||||
if (!stream) {
|
if (!stream) {
|
||||||
const response = await this.sdk.chat.completions.create(body as ChatCompletionCreateParamsNonStreaming)
|
const response = await this.sdk.chat.completions.create(body as ChatCompletionCreateParamsNonStreaming)
|
||||||
if (!response?.choices[0].message) {
|
if (!response?.choices[0].message) {
|
||||||
|
|||||||
@ -1,4 +1,3 @@
|
|||||||
import { Model } from '@renderer/types'
|
|
||||||
import type { Message } from '@renderer/types/newMessage'
|
import type { Message } from '@renderer/types/newMessage'
|
||||||
|
|
||||||
import { findImageBlocks, getMainTextContent } from './messageUtils/find'
|
import { findImageBlocks, getMainTextContent } from './messageUtils/find'
|
||||||
@ -99,75 +98,6 @@ export function removeSvgEmptyLines(text: string): string {
|
|||||||
// return content
|
// return content
|
||||||
// }
|
// }
|
||||||
|
|
||||||
export interface ThoughtProcessor {
|
|
||||||
canProcess: (content: string, model?: Model) => boolean
|
|
||||||
process: (content: string) => { reasoning: string; content: string }
|
|
||||||
}
|
|
||||||
|
|
||||||
export const glmZeroPreviewProcessor: ThoughtProcessor = {
|
|
||||||
canProcess: (content: string, model?: Model) => {
|
|
||||||
if (!model) return false
|
|
||||||
|
|
||||||
const modelId = model.id || ''
|
|
||||||
const modelName = model.name || ''
|
|
||||||
const isGLMZeroPreview =
|
|
||||||
modelId.toLowerCase().includes('glm-zero-preview') || modelName.toLowerCase().includes('glm-zero-preview')
|
|
||||||
|
|
||||||
return isGLMZeroPreview && content.includes('###Thinking')
|
|
||||||
},
|
|
||||||
process: (content: string) => {
|
|
||||||
const parts = content.split('###')
|
|
||||||
const thinkingMatch = parts.find((part) => part.trim().startsWith('Thinking'))
|
|
||||||
const responseMatch = parts.find((part) => part.trim().startsWith('Response'))
|
|
||||||
|
|
||||||
return {
|
|
||||||
reasoning: thinkingMatch ? thinkingMatch.replace('Thinking', '').trim() : '',
|
|
||||||
content: responseMatch ? responseMatch.replace('Response', '').trim() : ''
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export const thinkTagProcessor: ThoughtProcessor = {
|
|
||||||
canProcess: (content: string, model?: Model) => {
|
|
||||||
if (!model) return false
|
|
||||||
|
|
||||||
return content.startsWith('<think>') || content.includes('</think>')
|
|
||||||
},
|
|
||||||
process: (content: string) => {
|
|
||||||
// 处理正常闭合的 think 标签
|
|
||||||
const thinkPattern = /^<think>(.*?)<\/think>/s
|
|
||||||
const matches = content.match(thinkPattern)
|
|
||||||
if (matches) {
|
|
||||||
return {
|
|
||||||
reasoning: matches[1].trim(),
|
|
||||||
content: content.replace(thinkPattern, '').trim()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 处理只有结束标签的情况
|
|
||||||
if (content.includes('</think>') && !content.startsWith('<think>')) {
|
|
||||||
const parts = content.split('</think>')
|
|
||||||
return {
|
|
||||||
reasoning: parts[0].trim(),
|
|
||||||
content: parts.slice(1).join('</think>').trim()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 处理只有开始标签的情况
|
|
||||||
if (content.startsWith('<think>')) {
|
|
||||||
return {
|
|
||||||
reasoning: content.slice(7).trim(), // 跳过 '<think>' 标签
|
|
||||||
content: ''
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
reasoning: '',
|
|
||||||
content
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export function withGenerateImage(message: Message): { content: string; images?: string[] } {
|
export function withGenerateImage(message: Message): { content: string; images?: string[] } {
|
||||||
const originalContent = getMainTextContent(message)
|
const originalContent = getMainTextContent(message)
|
||||||
const imagePattern = new RegExp(`!\\[[^\\]]*\\]\\((.*?)\\s*("(?:.*[^"])")?\\s*\\)`)
|
const imagePattern = new RegExp(`!\\[[^\\]]*\\]\\((.*?)\\s*("(?:.*[^"])")?\\s*\\)`)
|
||||||
|
|||||||
29
src/renderer/src/utils/getPotentialIndex.ts
Normal file
29
src/renderer/src/utils/getPotentialIndex.ts
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
// Copied from https://github.com/vercel/ai/blob/main/packages/ai/core/util/get-potential-start-index.ts
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the index of the start of the searchedText in the text, or null if it
|
||||||
|
* is not found.
|
||||||
|
*/
|
||||||
|
export function getPotentialStartIndex(text: string, searchedText: string): number | null {
|
||||||
|
// Return null immediately if searchedText is empty.
|
||||||
|
if (searchedText.length === 0) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the searchedText exists as a direct substring of text.
|
||||||
|
const directIndex = text.indexOf(searchedText)
|
||||||
|
if (directIndex !== -1) {
|
||||||
|
return directIndex
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, look for the largest suffix of "text" that matches
|
||||||
|
// a prefix of "searchedText". We go from the end of text inward.
|
||||||
|
for (let i = text.length - 1; i >= 0; i--) {
|
||||||
|
const suffix = text.substring(i)
|
||||||
|
if (searchedText.startsWith(suffix)) {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null
|
||||||
|
}
|
||||||
25
src/renderer/src/utils/stream.ts
Normal file
25
src/renderer/src/utils/stream.ts
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
export function readableStreamAsyncIterable<T>(stream: ReadableStream<T>): AsyncIterable<T> {
|
||||||
|
const reader = stream.getReader()
|
||||||
|
return {
|
||||||
|
[Symbol.asyncIterator](): AsyncIterator<T> {
|
||||||
|
return {
|
||||||
|
async next(): Promise<IteratorResult<T>> {
|
||||||
|
return reader.read() as Promise<IteratorResult<T>>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function asyncGeneratorToReadableStream<T>(gen: AsyncGenerator<T>): ReadableStream<T> {
|
||||||
|
return new ReadableStream<T>({
|
||||||
|
async pull(controller) {
|
||||||
|
const { value, done } = await gen.next()
|
||||||
|
if (done) {
|
||||||
|
controller.close()
|
||||||
|
} else {
|
||||||
|
controller.enqueue(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user