fix: enhance error handling and metrics tracking across AI providers and message processing

This commit is contained in:
suyao 2025-05-11 13:13:20 +08:00
parent 9b9a395451
commit 4c50dfbd19
No known key found for this signature in database
10 changed files with 283 additions and 348 deletions

View File

@ -16,7 +16,7 @@ const MessageErrorInfo: React.FC<{ block: ErrorMessageBlock }> = ({ block }) =>
const HTTP_ERROR_CODES = [400, 401, 403, 404, 429, 500, 502, 503, 504] const HTTP_ERROR_CODES = [400, 401, 403, 404, 429, 500, 502, 503, 504]
if (block.error && HTTP_ERROR_CODES.includes(block.error?.status)) { if (block.error && HTTP_ERROR_CODES.includes(block.error?.status)) {
return <Alert description={t(`error.http.${block.error.status}`)} type="error" /> return <Alert description={t(`error.http.${block.error.status}`)} message={block.error?.message} type="error" />
} }
if (block?.error?.message) { if (block?.error?.message) {
const errorKey = `error.${block.error.message}` const errorKey = `error.${block.error.message}`

View File

@ -56,10 +56,12 @@ const ThinkingBlock: React.FC<Props> = ({ block }) => {
useEffect(() => { useEffect(() => {
if (isThinking) { if (isThinking) {
intervalId.current = setInterval(() => { intervalId.current = setInterval(() => {
setThinkingTime((prev) => prev + 200) setThinkingTime((prev) => prev + 100)
}, 200) }, 100)
} else { } else if (intervalId.current) {
return // 立即清除计时器
clearInterval(intervalId.current)
intervalId.current = null
} }
return () => { return () => {

View File

@ -10,35 +10,6 @@ interface Props {
} }
const MessageContent: React.FC<Props> = ({ message }) => { const MessageContent: React.FC<Props> = ({ message }) => {
// const { t } = useTranslation()
// if (message.status === 'pending') {
// return (
// )
// }
// if (message.status === 'searching') {
// return (
// <SearchingContainer>
// <Search size={24} />
// <SearchingText>{t('message.searching')}</SearchingText>
// <BarLoader color="#1677ff" />
// </SearchingContainer>
// )
// }
// if (message.status === 'error') {
// return <MessageError message={message} />
// }
// if (message.type === '@' && model) {
// const content = `[@${model.name}](#) ${getBriefInfo(message.content)}`
// return <Markdown message={{ ...message, content }} />
// }
// const toolUseRegex = /<tool_use>([\s\S]*?)<\/tool_use>/g
// console.log('message', message)
return ( return (
<> <>
<Flex gap="8px" wrap style={{ marginBottom: 10 }}> <Flex gap="8px" wrap style={{ marginBottom: 10 }}>

View File

@ -1,45 +0,0 @@
import type { ErrorMessageBlock } from '@renderer/types/newMessage'
import { Alert as AntdAlert } from 'antd'
import { FC } from 'react'
import { useTranslation } from 'react-i18next'
import styled from 'styled-components'
const MessageError: FC<{ block: ErrorMessageBlock }> = ({ block }) => {
return (
<>
{/* <Markdown block={block} role={role} />
{block.error && (
<Markdown
message={{
...block,
content: formatErrorMessage(block.error)
}}
/>
)} */}
<MessageErrorInfo block={block} />
</>
)
}
const MessageErrorInfo: FC<{ block: ErrorMessageBlock }> = ({ block }) => {
const { t } = useTranslation()
const HTTP_ERROR_CODES = [400, 401, 403, 404, 429, 500, 502, 503, 504]
console.log('block', block)
if (block.error && HTTP_ERROR_CODES.includes(block.error?.status)) {
return <Alert description={t(`error.http.${block.error.status}`)} type="error" />
}
if (block?.error?.message) {
return <Alert description={block.error.message} type="error" />
}
return <Alert description={t('error.chat.response')} type="error" />
}
const Alert = styled(AntdAlert)`
margin: 15px 0 8px;
padding: 10px;
font-size: 12px;
`
export default MessageError

View File

@ -30,10 +30,12 @@ import {
MCPCallToolResponse, MCPCallToolResponse,
MCPTool, MCPTool,
MCPToolResponse, MCPToolResponse,
Metrics,
Model, Model,
Provider, Provider,
Suggestion, Suggestion,
ToolCallResponse, ToolCallResponse,
Usage,
WebSearchSource WebSearchSource
} from '@renderer/types' } from '@renderer/types'
import { ChunkType } from '@renderer/types/chunk' import { ChunkType } from '@renderer/types/chunk'
@ -47,7 +49,7 @@ import {
} from '@renderer/utils/mcp-tools' } from '@renderer/utils/mcp-tools'
import { findFileBlocks, findImageBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find' import { findFileBlocks, findImageBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find'
import { buildSystemPrompt } from '@renderer/utils/prompt' import { buildSystemPrompt } from '@renderer/utils/prompt'
import { first, flatten, sum, takeRight } from 'lodash' import { first, flatten, takeRight } from 'lodash'
import OpenAI from 'openai' import OpenAI from 'openai'
import { CompletionsParams } from '.' import { CompletionsParams } from '.'
@ -270,10 +272,24 @@ export default class AnthropicProvider extends BaseProvider {
...this.getCustomParameters(assistant) ...this.getCustomParameters(assistant)
} }
const { abortController, cleanup } = this.createAbortController(lastUserMessage?.id)
const { signal } = abortController
const finalUsage: Usage = {
completion_tokens: 0,
prompt_tokens: 0,
total_tokens: 0
}
const finalMetrics: Metrics = {
completion_tokens: 0,
time_completion_millsec: 0,
time_first_token_millsec: 0
}
const toolResponses: MCPToolResponse[] = []
const processStream = async (body: MessageCreateParamsNonStreaming, idx: number) => {
let time_first_token_millsec = 0 let time_first_token_millsec = 0
let time_first_content_millsec = 0
let checkThinkingContent = false
let thinking_content = ''
const start_time_millsec = new Date().getTime() const start_time_millsec = new Date().getTime()
if (!streamOutput) { if (!streamOutput) {
@ -311,36 +327,27 @@ export default class AnthropicProvider extends BaseProvider {
}) })
} }
const { abortController, cleanup } = this.createAbortController(lastUserMessage?.id) let thinking_content = ''
const { signal } = abortController let isFirstChunk = true
const toolResponses: MCPToolResponse[] = []
const processStream = (body: MessageCreateParamsNonStreaming, idx: number) => {
return new Promise<void>((resolve, reject) => { return new Promise<void>((resolve, reject) => {
// 等待接口返回流 // 等待接口返回流
const toolCalls: ToolUseBlock[] = [] const toolCalls: ToolUseBlock[] = []
let hasThinkingContent = false
this.sdk.messages this.sdk.messages
.stream({ ...body, stream: true }, { signal, timeout: 5 * 60 * 1000 }) .stream({ ...body, stream: true }, { signal, timeout: 5 * 60 * 1000 })
.on('text', (text) => { .on('text', (text) => {
if (hasThinkingContent && !checkThinkingContent) { if (isFirstChunk) {
checkThinkingContent = true isFirstChunk = false
if (time_first_token_millsec == 0) {
time_first_token_millsec = new Date().getTime()
} else {
onChunk({ onChunk({
type: ChunkType.THINKING_COMPLETE, type: ChunkType.THINKING_COMPLETE,
text: thinking_content, text: thinking_content,
thinking_millsec: new Date().getTime() - time_first_content_millsec thinking_millsec: new Date().getTime() - time_first_token_millsec
}) })
} }
if (time_first_token_millsec == 0) {
time_first_token_millsec = new Date().getTime()
}
thinking_content = ''
checkThinkingContent = false
hasThinkingContent = false
if (!hasThinkingContent && time_first_content_millsec === 0) {
time_first_content_millsec = new Date().getTime()
} }
onChunk({ type: ChunkType.TEXT_DELTA, text }) onChunk({ type: ChunkType.TEXT_DELTA, text })
@ -372,34 +379,22 @@ export default class AnthropicProvider extends BaseProvider {
}) })
} }
} }
if (block.type === 'tool_use') {
toolCalls.push(block)
}
}) })
.on('thinking', (thinking) => { .on('thinking', (thinking) => {
hasThinkingContent = true
const currentTime = new Date().getTime() // Get current time for each chunk
if (time_first_token_millsec == 0) { if (time_first_token_millsec == 0) {
time_first_token_millsec = currentTime time_first_token_millsec = new Date().getTime()
} }
// Set time_first_content_millsec ONLY when the first content (thinking or text) arrives
if (time_first_content_millsec === 0) {
time_first_content_millsec = currentTime
}
// Calculate thinking time as time elapsed since start until this chunk
const thinking_time = currentTime - time_first_content_millsec
onChunk({ onChunk({
type: ChunkType.THINKING_DELTA, type: ChunkType.THINKING_DELTA,
text: thinking, text: thinking,
thinking_millsec: thinking_time thinking_millsec: new Date().getTime() - time_first_token_millsec
}) })
thinking_content += thinking thinking_content += thinking
}) })
.on('contentBlock', (content) => {
if (content.type === 'tool_use') {
toolCalls.push(content)
}
})
.on('finalMessage', async (message) => { .on('finalMessage', async (message) => {
const toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = [] const toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
// tool call // tool call
@ -458,29 +453,28 @@ export default class AnthropicProvider extends BaseProvider {
newBody.messages = userMessages newBody.messages = userMessages
onChunk({ type: ChunkType.LLM_RESPONSE_CREATED }) onChunk({ type: ChunkType.LLM_RESPONSE_CREATED })
try {
await processStream(newBody, idx + 1) await processStream(newBody, idx + 1)
} catch (error) {
console.error('Error processing stream:', error)
reject(error)
}
} }
const time_completion_millsec = new Date().getTime() - start_time_millsec finalUsage.prompt_tokens += message.usage.input_tokens
finalUsage.completion_tokens += message.usage.output_tokens
finalUsage.total_tokens += finalUsage.prompt_tokens + finalUsage.completion_tokens
finalMetrics.completion_tokens = finalUsage.completion_tokens
finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec
onChunk({ onChunk({
type: ChunkType.BLOCK_COMPLETE, type: ChunkType.BLOCK_COMPLETE,
response: { response: {
usage: { usage: finalUsage,
prompt_tokens: message.usage.input_tokens, metrics: finalMetrics
completion_tokens: message.usage.output_tokens,
total_tokens: sum(Object.values(message.usage))
},
metrics: {
completion_tokens: message.usage.output_tokens,
time_completion_millsec,
time_first_token_millsec: time_first_token_millsec - start_time_millsec
}
} }
}) })
// FIXME: 临时方案,重置时间戳和思考内容
time_first_token_millsec = 0
time_first_content_millsec = 0
resolve() resolve()
}) })
.on('error', (error) => reject(error)) .on('error', (error) => reject(error))

View File

@ -40,6 +40,7 @@ import {
MCPCallToolResponse, MCPCallToolResponse,
MCPTool, MCPTool,
MCPToolResponse, MCPToolResponse,
Metrics,
Model, Model,
Provider, Provider,
Suggestion, Suggestion,
@ -370,8 +371,17 @@ export default class GeminiProvider extends BaseProvider {
} }
} }
const start_time_millsec = new Date().getTime() const finalUsage: Usage = {
let time_first_token_millsec = 0 completion_tokens: 0,
prompt_tokens: 0,
total_tokens: 0
}
const finalMetrics: Metrics = {
completion_tokens: 0,
time_completion_millsec: 0,
time_first_token_millsec: 0
}
const { cleanup, abortController } = this.createAbortController(userLastMessage?.id, true) const { cleanup, abortController } = this.createAbortController(userLastMessage?.id, true)
@ -445,6 +455,8 @@ export default class GeminiProvider extends BaseProvider {
history.push(messageContents) history.push(messageContents)
let functionCalls: FunctionCall[] = [] let functionCalls: FunctionCall[] = []
let time_first_token_millsec = 0
const start_time_millsec = new Date().getTime()
if (stream instanceof GenerateContentResponse) { if (stream instanceof GenerateContentResponse) {
let content = '' let content = ''
@ -504,34 +516,18 @@ export default class GeminiProvider extends BaseProvider {
} as BlockCompleteChunk) } as BlockCompleteChunk)
} else { } else {
let content = '' let content = ''
let final_time_completion_millsec = 0
let lastUsage: Usage | undefined = undefined
for await (const chunk of stream) { for await (const chunk of stream) {
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) break if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) break
// --- Calculate Metrics --- if (time_first_token_millsec == 0) {
if (time_first_token_millsec == 0 && chunk.text !== undefined) { time_first_token_millsec = new Date().getTime()
// Update based on text arrival
time_first_token_millsec = new Date().getTime() - start_time_millsec
} }
// 1. Text Content
if (chunk.text !== undefined) { if (chunk.text !== undefined) {
content += chunk.text content += chunk.text
onChunk({ type: ChunkType.TEXT_DELTA, text: chunk.text }) onChunk({ type: ChunkType.TEXT_DELTA, text: chunk.text })
} }
// 2. Usage Data
if (chunk.usageMetadata) {
lastUsage = {
prompt_tokens: chunk.usageMetadata.promptTokenCount || 0,
completion_tokens: chunk.usageMetadata.candidatesTokenCount || 0,
total_tokens: chunk.usageMetadata.totalTokenCount || 0
}
final_time_completion_millsec = new Date().getTime() - start_time_millsec
}
// 4. Image Generation
const generateImage = this.processGeminiImageResponse(chunk, onChunk) const generateImage = this.processGeminiImageResponse(chunk, onChunk)
if (generateImage?.images?.length) { if (generateImage?.images?.length) {
onChunk({ type: ChunkType.IMAGE_COMPLETE, image: generateImage }) onChunk({ type: ChunkType.IMAGE_COMPLETE, image: generateImage })
@ -541,8 +537,12 @@ export default class GeminiProvider extends BaseProvider {
if (chunk.text) { if (chunk.text) {
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content }) onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
} }
if (chunk.usageMetadata) {
finalUsage.prompt_tokens += chunk.usageMetadata.promptTokenCount || 0
finalUsage.completion_tokens += chunk.usageMetadata.candidatesTokenCount || 0
finalUsage.total_tokens += chunk.usageMetadata.totalTokenCount || 0
}
if (chunk.candidates?.[0]?.groundingMetadata) { if (chunk.candidates?.[0]?.groundingMetadata) {
// 3. Grounding/Search Metadata
const groundingMetadata = chunk.candidates?.[0]?.groundingMetadata const groundingMetadata = chunk.candidates?.[0]?.groundingMetadata
onChunk({ onChunk({
type: ChunkType.LLM_WEB_SEARCH_COMPLETE, type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
@ -561,17 +561,11 @@ export default class GeminiProvider extends BaseProvider {
functionCalls = functionCalls.concat(chunk.functionCalls) functionCalls = functionCalls.concat(chunk.functionCalls)
} }
onChunk({ finalMetrics.completion_tokens = finalUsage.completion_tokens
type: ChunkType.BLOCK_COMPLETE, finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
response: { finalMetrics.time_first_token_millsec =
metrics: { (finalMetrics.time_first_token_millsec || 0) + (time_first_token_millsec - start_time_millsec)
completion_tokens: lastUsage?.completion_tokens,
time_completion_millsec: final_time_completion_millsec,
time_first_token_millsec
},
usage: lastUsage
} }
})
} }
// --- End Incremental onChunk calls --- // --- End Incremental onChunk calls ---
@ -589,7 +583,15 @@ export default class GeminiProvider extends BaseProvider {
if (toolResults.length) { if (toolResults.length) {
await processToolResults(toolResults, idx) await processToolResults(toolResults, idx)
} }
// FIXME: 由于递归会发送n次
onChunk({
type: ChunkType.BLOCK_COMPLETE,
response: {
usage: finalUsage,
metrics: finalMetrics
} }
})
} }
} }
@ -615,17 +617,6 @@ export default class GeminiProvider extends BaseProvider {
}) })
await processStream(userMessagesStream, 0).finally(cleanup) await processStream(userMessagesStream, 0).finally(cleanup)
const final_time_completion_millsec = new Date().getTime() - start_time_millsec
onChunk({
type: ChunkType.BLOCK_COMPLETE,
response: {
metrics: {
time_completion_millsec: final_time_completion_millsec,
time_first_token_millsec
}
}
})
} }
/** /**

View File

@ -34,6 +34,7 @@ import {
MCPCallToolResponse, MCPCallToolResponse,
MCPTool, MCPTool,
MCPToolResponse, MCPToolResponse,
Metrics,
Model, Model,
Provider, Provider,
Suggestion, Suggestion,
@ -395,7 +396,6 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
return streamOutput return streamOutput
} }
const start_time_millsec = new Date().getTime()
const lastUserMessage = _messages.findLast((m) => m.role === 'user') const lastUserMessage = _messages.findLast((m) => m.role === 'user')
const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true) const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
const { signal } = abortController const { signal } = abortController
@ -423,6 +423,18 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
reqMessages = [systemMessage, ...userMessages].filter(Boolean) as ChatCompletionMessageParam[] reqMessages = [systemMessage, ...userMessages].filter(Boolean) as ChatCompletionMessageParam[]
} }
let finalUsage: Usage = {
completion_tokens: 0,
prompt_tokens: 0,
total_tokens: 0
}
const finalMetrics: Metrics = {
completion_tokens: 0,
time_completion_millsec: 0,
time_first_token_millsec: 0
}
const toolResponses: MCPToolResponse[] = [] const toolResponses: MCPToolResponse[] = []
const processToolResults = async (toolResults: Awaited<ReturnType<typeof parseAndCallTools>>, idx: number) => { const processToolResults = async (toolResults: Awaited<ReturnType<typeof parseAndCallTools>>, idx: number) => {
@ -505,18 +517,17 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
const processStream = async (stream: any, idx: number) => { const processStream = async (stream: any, idx: number) => {
const toolCalls: ChatCompletionMessageToolCall[] = [] const toolCalls: ChatCompletionMessageToolCall[] = []
let time_first_token_millsec = 0
const start_time_millsec = new Date().getTime()
// Handle non-streaming case (already returns early, no change needed here) // Handle non-streaming case (already returns early, no change needed here)
if (!isSupportStreamOutput()) { if (!isSupportStreamOutput()) {
const time_completion_millsec = new Date().getTime() - start_time_millsec
// Calculate final metrics once // Calculate final metrics once
const finalMetrics = { finalMetrics.completion_tokens = stream.usage?.completion_tokens
completion_tokens: stream.usage?.completion_tokens, finalMetrics.time_completion_millsec = new Date().getTime() - start_time_millsec
time_completion_millsec,
time_first_token_millsec: 0 // Non-streaming, first token time is not relevant
}
// Create a synthetic usage object if stream.usage is undefined // Create a synthetic usage object if stream.usage is undefined
const finalUsage = stream.usage finalUsage = { ...stream.usage }
// Separate onChunk calls for text and usage/metrics // Separate onChunk calls for text and usage/metrics
let content = '' let content = ''
stream.choices.forEach((choice) => { stream.choices.forEach((choice) => {
@ -526,7 +537,7 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
onChunk({ onChunk({
type: ChunkType.THINKING_COMPLETE, type: ChunkType.THINKING_COMPLETE,
text: choice.message.reasoning, text: choice.message.reasoning,
thinking_millsec: time_completion_millsec thinking_millsec: new Date().getTime() - start_time_millsec
}) })
} }
// text // text
@ -576,20 +587,9 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
return return
} }
let content = '' // Accumulate content for tool processing if needed let content = ''
let thinkingContent = '' let thinkingContent = ''
// 记录最终的完成时间差
let final_time_completion_millsec_delta = 0
let final_time_thinking_millsec_delta = 0
// Variable to store the last received usage object
let lastUsage: Usage | undefined = undefined
// let isThinkingInContent: ThoughtProcessor | undefined = undefined
// const processThinkingChunk = this.handleThinkingTags()
let isFirstChunk = true let isFirstChunk = true
let time_first_token_millsec = 0
let time_first_token_millsec_delta = 0
let time_first_content_millsec = 0
let time_thinking_start = 0
// 1. 初始化中间件 // 1. 初始化中间件
const reasoningTags = [ const reasoningTags = [
@ -640,25 +640,24 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
// 3. 消费 processedStream分发 onChunk // 3. 消费 processedStream分发 onChunk
for await (const chunk of readableStreamAsyncIterable(processedStream)) { for await (const chunk of readableStreamAsyncIterable(processedStream)) {
const currentTime = new Date().getTime()
const delta = chunk.type === 'finish' ? chunk.delta : chunk const delta = chunk.type === 'finish' ? chunk.delta : chunk
const rawChunk = chunk.type === 'finish' ? chunk.chunk : chunk const rawChunk = chunk.type === 'finish' ? chunk.chunk : chunk
switch (chunk.type) { switch (chunk.type) {
case 'reasoning': { case 'reasoning': {
if (time_thinking_start === 0) { if (time_first_token_millsec === 0) {
time_thinking_start = currentTime time_first_token_millsec = new Date().getTime()
time_first_token_millsec = currentTime
time_first_token_millsec_delta = currentTime - start_time_millsec
} }
thinkingContent += chunk.textDelta thinkingContent += chunk.textDelta
const thinking_time = currentTime - time_thinking_start onChunk({
onChunk({ type: ChunkType.THINKING_DELTA, text: chunk.textDelta, thinking_millsec: thinking_time }) type: ChunkType.THINKING_DELTA,
text: chunk.textDelta,
thinking_millsec: new Date().getTime() - time_first_token_millsec
})
break break
} }
case 'text-delta': { case 'text-delta': {
let textDelta = chunk.textDelta let textDelta = chunk.textDelta
if (assistant.enableWebSearch && delta) { if (assistant.enableWebSearch && delta) {
const originalDelta = rawChunk?.choices?.[0]?.delta const originalDelta = rawChunk?.choices?.[0]?.delta
@ -676,25 +675,32 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
if (isFirstChunk) { if (isFirstChunk) {
isFirstChunk = false isFirstChunk = false
if (time_first_token_millsec === 0) { if (time_first_token_millsec === 0) {
time_first_token_millsec = currentTime time_first_token_millsec = new Date().getTime()
time_first_token_millsec_delta = currentTime - start_time_millsec } else {
}
}
content += textDelta
if (time_thinking_start > 0 && time_first_content_millsec === 0) {
time_first_content_millsec = currentTime
final_time_thinking_millsec_delta = time_first_content_millsec - time_thinking_start
onChunk({ onChunk({
type: ChunkType.THINKING_COMPLETE, type: ChunkType.THINKING_COMPLETE,
text: thinkingContent, text: thinkingContent,
thinking_millsec: final_time_thinking_millsec_delta thinking_millsec: new Date().getTime() - time_first_token_millsec
}) })
} }
}
content += textDelta
onChunk({ type: ChunkType.TEXT_DELTA, text: textDelta }) onChunk({ type: ChunkType.TEXT_DELTA, text: textDelta })
break break
} }
case 'tool-calls': { case 'tool-calls': {
if (isFirstChunk) {
isFirstChunk = false
if (time_first_token_millsec === 0) {
time_first_token_millsec = new Date().getTime()
} else {
onChunk({
type: ChunkType.THINKING_COMPLETE,
text: thinkingContent,
thinking_millsec: new Date().getTime() - time_first_token_millsec
})
}
}
chunk.delta.tool_calls.forEach((toolCall) => { chunk.delta.tool_calls.forEach((toolCall) => {
const { id, index, type, function: fun } = toolCall const { id, index, type, function: fun } = toolCall
if (id && type === 'function' && fun) { if (id && type === 'function' && fun) {
@ -721,10 +727,14 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
if (!isEmpty(finishReason)) { if (!isEmpty(finishReason)) {
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content }) onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
final_time_completion_millsec_delta = currentTime - start_time_millsec
if (usage) { if (usage) {
lastUsage = usage finalUsage.completion_tokens += usage.completion_tokens || 0
finalUsage.prompt_tokens += usage.prompt_tokens || 0
finalUsage.total_tokens += usage.total_tokens || 0
finalMetrics.completion_tokens += usage.completion_tokens || 0
} }
finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec
if (originalFinishDelta?.annotations) { if (originalFinishDelta?.annotations) {
onChunk({ onChunk({
type: ChunkType.LLM_WEB_SEARCH_COMPLETE, type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
@ -774,6 +784,11 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
} as LLMWebSearchCompleteChunk) } as LLMWebSearchCompleteChunk)
} }
} }
break
}
}
}
reqMessages.push({ reqMessages.push({
role: 'assistant', role: 'assistant',
content: content, content: content,
@ -801,22 +816,14 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
if (toolResults.length) { if (toolResults.length) {
await processToolResults(toolResults, idx) await processToolResults(toolResults, idx)
} }
onChunk({ onChunk({
type: ChunkType.BLOCK_COMPLETE, type: ChunkType.BLOCK_COMPLETE,
response: { response: {
usage: lastUsage, usage: finalUsage,
metrics: { metrics: finalMetrics
completion_tokens: lastUsage?.completion_tokens,
time_completion_millsec: final_time_completion_millsec_delta,
time_first_token_millsec: time_first_token_millsec_delta,
time_thinking_millsec: final_time_thinking_millsec_delta
}
} }
}) })
break
}
}
}
} }
reqMessages = processReqMessages(model, reqMessages) reqMessages = processReqMessages(model, reqMessages)

View File

@ -24,6 +24,7 @@ import {
MCPCallToolResponse, MCPCallToolResponse,
MCPTool, MCPTool,
MCPToolResponse, MCPToolResponse,
Metrics,
Model, Model,
Provider, Provider,
Suggestion, Suggestion,
@ -332,7 +333,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
const lastUserMessage = _messages.findLast((m) => m.role === 'user') const lastUserMessage = _messages.findLast((m) => m.role === 'user')
const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true) const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
const { signal } = abortController const { signal } = abortController
let time_first_token_millsec_delta = 0
const start_time_millsec = new Date().getTime() const start_time_millsec = new Date().getTime()
const response = await this.sdk.chat.completions const response = await this.sdk.chat.completions
// @ts-ignore key is not typed // @ts-ignore key is not typed
@ -354,8 +354,17 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
const processStream = async (stream: any) => { const processStream = async (stream: any) => {
let content = '' let content = ''
let isFirstChunk = true let isFirstChunk = true
let final_time_completion_millsec_delta = 0 const finalUsage: Usage = {
let lastUsage: Usage | undefined = undefined completion_tokens: 0,
prompt_tokens: 0,
total_tokens: 0
}
const finalMetrics: Metrics = {
completion_tokens: 0,
time_completion_millsec: 0,
time_first_token_millsec: 0
}
for await (const chunk of stream as any) { for await (const chunk of stream as any) {
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) { if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
break break
@ -368,17 +377,21 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
} }
if (isFirstChunk) { if (isFirstChunk) {
isFirstChunk = false isFirstChunk = false
time_first_token_millsec_delta = new Date().getTime() - start_time_millsec finalMetrics.time_first_token_millsec = new Date().getTime() - start_time_millsec
} }
content += delta.content content += delta.content
onChunk({ type: ChunkType.TEXT_DELTA, text: delta.content }) onChunk({ type: ChunkType.TEXT_DELTA, text: delta.content })
} }
if (!isEmpty(finishReason) || chunk?.annotations) { if (!isEmpty(finishReason) || chunk?.annotations) {
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content }) onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
final_time_completion_millsec_delta = new Date().getTime() - start_time_millsec finalMetrics.time_completion_millsec = new Date().getTime() - start_time_millsec
if (chunk.usage) { if (chunk.usage) {
lastUsage = chunk.usage const usage = chunk.usage as OpenAI.Completions.CompletionUsage
finalUsage.completion_tokens = usage.completion_tokens
finalUsage.prompt_tokens = usage.prompt_tokens
finalUsage.total_tokens = usage.total_tokens
} }
finalMetrics.completion_tokens = finalUsage.completion_tokens
} }
if (delta?.annotations) { if (delta?.annotations) {
onChunk({ onChunk({
@ -393,12 +406,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
onChunk({ onChunk({
type: ChunkType.BLOCK_COMPLETE, type: ChunkType.BLOCK_COMPLETE,
response: { response: {
usage: lastUsage, usage: finalUsage,
metrics: { metrics: finalMetrics
completion_tokens: lastUsage?.completion_tokens,
time_completion_millsec: final_time_completion_millsec_delta,
time_first_token_millsec: time_first_token_millsec_delta
}
} }
}) })
} }
@ -454,9 +463,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
userMessage.push(await this.getReponseMessageParam(message, model)) userMessage.push(await this.getReponseMessageParam(message, model))
} }
let time_first_token_millsec = 0
const start_time_millsec = new Date().getTime()
const lastUserMessage = _messages.findLast((m) => m.role === 'user') const lastUserMessage = _messages.findLast((m) => m.role === 'user')
const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true) const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
const { signal } = abortController const { signal } = abortController
@ -469,6 +475,18 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
reqMessages = [systemMessage, ...userMessage].filter(Boolean) as OpenAI.Responses.EasyInputMessage[] reqMessages = [systemMessage, ...userMessage].filter(Boolean) as OpenAI.Responses.EasyInputMessage[]
} }
const finalUsage: Usage = {
completion_tokens: 0,
prompt_tokens: 0,
total_tokens: 0
}
const finalMetrics: Metrics = {
completion_tokens: 0,
time_completion_millsec: 0,
time_first_token_millsec: 0
}
const toolResponses: MCPToolResponse[] = [] const toolResponses: MCPToolResponse[] = []
const processToolResults = async (toolResults: Awaited<ReturnType<typeof parseAndCallTools>>, idx: number) => { const processToolResults = async (toolResults: Awaited<ReturnType<typeof parseAndCallTools>>, idx: number) => {
@ -548,6 +566,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
idx: number idx: number
) => { ) => {
const toolCalls: OpenAI.Responses.ResponseFunctionToolCall[] = [] const toolCalls: OpenAI.Responses.ResponseFunctionToolCall[] = []
let time_first_token_millsec = 0
const start_time_millsec = new Date().getTime()
if (!streamOutput) { if (!streamOutput) {
const nonStream = stream as OpenAI.Responses.Response const nonStream = stream as OpenAI.Responses.Response
@ -632,8 +652,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
const outputItems: OpenAI.Responses.ResponseOutputItem[] = [] const outputItems: OpenAI.Responses.ResponseOutputItem[] = []
let lastUsage: Usage | undefined = undefined
let final_time_completion_millsec_delta = 0
for await (const chunk of stream as Stream<OpenAI.Responses.ResponseStreamEvent>) { for await (const chunk of stream as Stream<OpenAI.Responses.ResponseStreamEvent>) {
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) { if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
break break
@ -707,18 +725,18 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
} }
break break
case 'response.completed': { case 'response.completed': {
final_time_completion_millsec_delta = new Date().getTime() - start_time_millsec
const completion_tokens = const completion_tokens =
(chunk.response.usage?.output_tokens || 0) + (chunk.response.usage?.output_tokens || 0) +
(chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0) (chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0)
const total_tokens = const total_tokens =
(chunk.response.usage?.total_tokens || 0) + (chunk.response.usage?.total_tokens || 0) +
(chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0) (chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0)
lastUsage = { finalUsage.completion_tokens += completion_tokens
completion_tokens, finalUsage.prompt_tokens += chunk.response.usage?.input_tokens || 0
prompt_tokens: chunk.response.usage?.input_tokens || 0, finalUsage.total_tokens += total_tokens
total_tokens finalMetrics.completion_tokens += completion_tokens
} finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec
break break
} }
case 'error': case 'error':
@ -760,12 +778,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
onChunk({ onChunk({
type: ChunkType.BLOCK_COMPLETE, type: ChunkType.BLOCK_COMPLETE,
response: { response: {
usage: lastUsage, usage: finalUsage,
metrics: { metrics: finalMetrics
completion_tokens: lastUsage?.completion_tokens,
time_completion_millsec: final_time_completion_millsec_delta,
time_first_token_millsec: time_first_token_millsec - start_time_millsec
}
} }
}) })
} }

View File

@ -565,7 +565,7 @@ const fetchAndProcessAssistantResponseImpl = async (
message: pauseErrorLanguagePlaceholder || error.message || 'Stream processing error', message: pauseErrorLanguagePlaceholder || error.message || 'Stream processing error',
originalMessage: error.message, originalMessage: error.message,
stack: error.stack, stack: error.stack,
status: error.status, status: error.status || error.code,
requestId: error.request_id requestId: error.request_id
} }
if (lastBlockId) { if (lastBlockId) {
@ -609,13 +609,14 @@ const fetchAndProcessAssistantResponseImpl = async (
// 更新topic的name // 更新topic的name
autoRenameTopic(assistant, topicId) autoRenameTopic(assistant, topicId)
if (response && !response.usage) { if (response && response.usage?.total_tokens === 0) {
const usage = await estimateMessagesUsage({ assistant, messages: finalContextWithAssistant }) const usage = await estimateMessagesUsage({ assistant, messages: finalContextWithAssistant })
response.usage = usage response.usage = usage
} }
console.log('response', response)
} }
if (response && response.metrics) { if (response && response.metrics) {
if (!response.metrics.completion_tokens && response.usage) { if (response.metrics.completion_tokens === 0 && response.usage?.completion_tokens) {
response = { response = {
...response, ...response,
metrics: { metrics: {

View File

@ -121,8 +121,8 @@ export type Usage = OpenAI.Completions.CompletionUsage & {
} }
export type Metrics = { export type Metrics = {
completion_tokens?: number completion_tokens: number
time_completion_millsec?: number time_completion_millsec: number
time_first_token_millsec?: number time_first_token_millsec?: number
time_thinking_millsec?: number time_thinking_millsec?: number
} }