diff --git a/src/renderer/src/pages/home/Messages/Blocks/ErrorBlock.tsx b/src/renderer/src/pages/home/Messages/Blocks/ErrorBlock.tsx
index 2c8161397d..f55a3ab6b1 100644
--- a/src/renderer/src/pages/home/Messages/Blocks/ErrorBlock.tsx
+++ b/src/renderer/src/pages/home/Messages/Blocks/ErrorBlock.tsx
@@ -16,7 +16,7 @@ const MessageErrorInfo: React.FC<{ block: ErrorMessageBlock }> = ({ block }) =>
const HTTP_ERROR_CODES = [400, 401, 403, 404, 429, 500, 502, 503, 504]
if (block.error && HTTP_ERROR_CODES.includes(block.error?.status)) {
- return
+ return
}
if (block?.error?.message) {
const errorKey = `error.${block.error.message}`
diff --git a/src/renderer/src/pages/home/Messages/Blocks/ThinkingBlock.tsx b/src/renderer/src/pages/home/Messages/Blocks/ThinkingBlock.tsx
index f1d546bd58..fa3c2b9ff4 100644
--- a/src/renderer/src/pages/home/Messages/Blocks/ThinkingBlock.tsx
+++ b/src/renderer/src/pages/home/Messages/Blocks/ThinkingBlock.tsx
@@ -56,10 +56,12 @@ const ThinkingBlock: React.FC = ({ block }) => {
useEffect(() => {
if (isThinking) {
intervalId.current = setInterval(() => {
- setThinkingTime((prev) => prev + 200)
- }, 200)
- } else {
- return
+ setThinkingTime((prev) => prev + 100)
+ }, 100)
+ } else if (intervalId.current) {
+ // 立即清除计时器
+ clearInterval(intervalId.current)
+ intervalId.current = null
}
return () => {
diff --git a/src/renderer/src/pages/home/Messages/MessageContent.tsx b/src/renderer/src/pages/home/Messages/MessageContent.tsx
index 60aa2cdf6b..b01feb2be9 100644
--- a/src/renderer/src/pages/home/Messages/MessageContent.tsx
+++ b/src/renderer/src/pages/home/Messages/MessageContent.tsx
@@ -10,35 +10,6 @@ interface Props {
}
const MessageContent: React.FC = ({ message }) => {
- // const { t } = useTranslation()
- // if (message.status === 'pending') {
- // return (
-
- // )
- // }
-
- // if (message.status === 'searching') {
- // return (
- //
- //
- // {t('message.searching')}
- //
- //
- // )
- // }
-
- // if (message.status === 'error') {
- // return
- // }
-
- // if (message.type === '@' && model) {
- // const content = `[@${model.name}](#) ${getBriefInfo(message.content)}`
- // return
- // }
- // const toolUseRegex = /([\s\S]*?)<\/tool_use>/g
-
- // console.log('message', message)
-
return (
<>
diff --git a/src/renderer/src/pages/home/Messages/MessageError.tsx b/src/renderer/src/pages/home/Messages/MessageError.tsx
deleted file mode 100644
index e0c0ea00bd..0000000000
--- a/src/renderer/src/pages/home/Messages/MessageError.tsx
+++ /dev/null
@@ -1,45 +0,0 @@
-import type { ErrorMessageBlock } from '@renderer/types/newMessage'
-import { Alert as AntdAlert } from 'antd'
-import { FC } from 'react'
-import { useTranslation } from 'react-i18next'
-import styled from 'styled-components'
-
-const MessageError: FC<{ block: ErrorMessageBlock }> = ({ block }) => {
- return (
- <>
- {/*
- {block.error && (
-
- )} */}
-
- >
- )
-}
-
-const MessageErrorInfo: FC<{ block: ErrorMessageBlock }> = ({ block }) => {
- const { t } = useTranslation()
-
- const HTTP_ERROR_CODES = [400, 401, 403, 404, 429, 500, 502, 503, 504]
- console.log('block', block)
- if (block.error && HTTP_ERROR_CODES.includes(block.error?.status)) {
- return
- }
- if (block?.error?.message) {
- return
- }
-
- return
-}
-
-const Alert = styled(AntdAlert)`
- margin: 15px 0 8px;
- padding: 10px;
- font-size: 12px;
-`
-
-export default MessageError
diff --git a/src/renderer/src/providers/AiProvider/AnthropicProvider.ts b/src/renderer/src/providers/AiProvider/AnthropicProvider.ts
index 1bb58c22ab..3f2929bdd0 100644
--- a/src/renderer/src/providers/AiProvider/AnthropicProvider.ts
+++ b/src/renderer/src/providers/AiProvider/AnthropicProvider.ts
@@ -30,10 +30,12 @@ import {
MCPCallToolResponse,
MCPTool,
MCPToolResponse,
+ Metrics,
Model,
Provider,
Suggestion,
ToolCallResponse,
+ Usage,
WebSearchSource
} from '@renderer/types'
import { ChunkType } from '@renderer/types/chunk'
@@ -47,7 +49,7 @@ import {
} from '@renderer/utils/mcp-tools'
import { findFileBlocks, findImageBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find'
import { buildSystemPrompt } from '@renderer/utils/prompt'
-import { first, flatten, sum, takeRight } from 'lodash'
+import { first, flatten, takeRight } from 'lodash'
import OpenAI from 'openai'
import { CompletionsParams } from '.'
@@ -270,77 +272,82 @@ export default class AnthropicProvider extends BaseProvider {
...this.getCustomParameters(assistant)
}
- let time_first_token_millsec = 0
- let time_first_content_millsec = 0
- let checkThinkingContent = false
- let thinking_content = ''
- const start_time_millsec = new Date().getTime()
-
- if (!streamOutput) {
- const message = await this.sdk.messages.create({ ...body, stream: false })
- const time_completion_millsec = new Date().getTime() - start_time_millsec
-
- let text = ''
- let reasoning_content = ''
-
- if (message.content && message.content.length > 0) {
- const thinkingBlock = message.content.find((block) => block.type === 'thinking')
- const textBlock = message.content.find((block) => block.type === 'text')
-
- if (thinkingBlock && 'thinking' in thinkingBlock) {
- reasoning_content = thinkingBlock.thinking
- }
-
- if (textBlock && 'text' in textBlock) {
- text = textBlock.text
- }
- }
-
- return onChunk({
- type: ChunkType.BLOCK_COMPLETE,
- response: {
- text,
- reasoning_content,
- usage: message.usage as any,
- metrics: {
- completion_tokens: message.usage.output_tokens,
- time_completion_millsec,
- time_first_token_millsec: 0
- }
- }
- })
- }
-
const { abortController, cleanup } = this.createAbortController(lastUserMessage?.id)
const { signal } = abortController
+
+ const finalUsage: Usage = {
+ completion_tokens: 0,
+ prompt_tokens: 0,
+ total_tokens: 0
+ }
+
+ const finalMetrics: Metrics = {
+ completion_tokens: 0,
+ time_completion_millsec: 0,
+ time_first_token_millsec: 0
+ }
const toolResponses: MCPToolResponse[] = []
- const processStream = (body: MessageCreateParamsNonStreaming, idx: number) => {
+ const processStream = async (body: MessageCreateParamsNonStreaming, idx: number) => {
+ let time_first_token_millsec = 0
+ const start_time_millsec = new Date().getTime()
+
+ if (!streamOutput) {
+ const message = await this.sdk.messages.create({ ...body, stream: false })
+ const time_completion_millsec = new Date().getTime() - start_time_millsec
+
+ let text = ''
+ let reasoning_content = ''
+
+ if (message.content && message.content.length > 0) {
+ const thinkingBlock = message.content.find((block) => block.type === 'thinking')
+ const textBlock = message.content.find((block) => block.type === 'text')
+
+ if (thinkingBlock && 'thinking' in thinkingBlock) {
+ reasoning_content = thinkingBlock.thinking
+ }
+
+ if (textBlock && 'text' in textBlock) {
+ text = textBlock.text
+ }
+ }
+
+ return onChunk({
+ type: ChunkType.BLOCK_COMPLETE,
+ response: {
+ text,
+ reasoning_content,
+ usage: message.usage as any,
+ metrics: {
+ completion_tokens: message.usage.output_tokens,
+ time_completion_millsec,
+ time_first_token_millsec: 0
+ }
+ }
+ })
+ }
+
+ let thinking_content = ''
+ let isFirstChunk = true
+
return new Promise((resolve, reject) => {
// 等待接口返回流
const toolCalls: ToolUseBlock[] = []
- let hasThinkingContent = false
+
this.sdk.messages
.stream({ ...body, stream: true }, { signal, timeout: 5 * 60 * 1000 })
.on('text', (text) => {
- if (hasThinkingContent && !checkThinkingContent) {
- checkThinkingContent = true
- onChunk({
- type: ChunkType.THINKING_COMPLETE,
- text: thinking_content,
- thinking_millsec: new Date().getTime() - time_first_content_millsec
- })
- }
- if (time_first_token_millsec == 0) {
- time_first_token_millsec = new Date().getTime()
- }
-
- thinking_content = ''
- checkThinkingContent = false
- hasThinkingContent = false
-
- if (!hasThinkingContent && time_first_content_millsec === 0) {
- time_first_content_millsec = new Date().getTime()
+ if (isFirstChunk) {
+ isFirstChunk = false
+ if (time_first_token_millsec == 0) {
+ time_first_token_millsec = new Date().getTime()
+ } else {
+ onChunk({
+ type: ChunkType.THINKING_COMPLETE,
+ text: thinking_content,
+ thinking_millsec: new Date().getTime() - time_first_token_millsec
+ })
+ }
}
onChunk({ type: ChunkType.TEXT_DELTA, text })
@@ -372,34 +379,22 @@ export default class AnthropicProvider extends BaseProvider {
})
}
}
+ if (block.type === 'tool_use') {
+ toolCalls.push(block)
+ }
})
.on('thinking', (thinking) => {
- hasThinkingContent = true
- const currentTime = new Date().getTime() // Get current time for each chunk
-
if (time_first_token_millsec == 0) {
- time_first_token_millsec = currentTime
+ time_first_token_millsec = new Date().getTime()
}
- // Set time_first_content_millsec ONLY when the first content (thinking or text) arrives
- if (time_first_content_millsec === 0) {
- time_first_content_millsec = currentTime
- }
-
- // Calculate thinking time as time elapsed since start until this chunk
- const thinking_time = currentTime - time_first_content_millsec
onChunk({
type: ChunkType.THINKING_DELTA,
text: thinking,
- thinking_millsec: thinking_time
+ thinking_millsec: new Date().getTime() - time_first_token_millsec
})
thinking_content += thinking
})
- .on('contentBlock', (content) => {
- if (content.type === 'tool_use') {
- toolCalls.push(content)
- }
- })
.on('finalMessage', async (message) => {
const toolResults: Awaited> = []
// tool call
@@ -458,29 +453,28 @@ export default class AnthropicProvider extends BaseProvider {
newBody.messages = userMessages
onChunk({ type: ChunkType.LLM_RESPONSE_CREATED })
- await processStream(newBody, idx + 1)
+ try {
+ await processStream(newBody, idx + 1)
+ } catch (error) {
+ console.error('Error processing stream:', error)
+ reject(error)
+ }
}
- const time_completion_millsec = new Date().getTime() - start_time_millsec
+ finalUsage.prompt_tokens += message.usage.input_tokens
+ finalUsage.completion_tokens += message.usage.output_tokens
+ finalUsage.total_tokens += finalUsage.prompt_tokens + finalUsage.completion_tokens
+ finalMetrics.completion_tokens = finalUsage.completion_tokens
+ finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
+ finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec
onChunk({
type: ChunkType.BLOCK_COMPLETE,
response: {
- usage: {
- prompt_tokens: message.usage.input_tokens,
- completion_tokens: message.usage.output_tokens,
- total_tokens: sum(Object.values(message.usage))
- },
- metrics: {
- completion_tokens: message.usage.output_tokens,
- time_completion_millsec,
- time_first_token_millsec: time_first_token_millsec - start_time_millsec
- }
+ usage: finalUsage,
+ metrics: finalMetrics
}
})
- // FIXME: 临时方案,重置时间戳和思考内容
- time_first_token_millsec = 0
- time_first_content_millsec = 0
resolve()
})
.on('error', (error) => reject(error))
diff --git a/src/renderer/src/providers/AiProvider/GeminiProvider.ts b/src/renderer/src/providers/AiProvider/GeminiProvider.ts
index c04632efd3..52323efc1d 100644
--- a/src/renderer/src/providers/AiProvider/GeminiProvider.ts
+++ b/src/renderer/src/providers/AiProvider/GeminiProvider.ts
@@ -40,6 +40,7 @@ import {
MCPCallToolResponse,
MCPTool,
MCPToolResponse,
+ Metrics,
Model,
Provider,
Suggestion,
@@ -370,8 +371,17 @@ export default class GeminiProvider extends BaseProvider {
}
}
- const start_time_millsec = new Date().getTime()
- let time_first_token_millsec = 0
+ const finalUsage: Usage = {
+ completion_tokens: 0,
+ prompt_tokens: 0,
+ total_tokens: 0
+ }
+
+ const finalMetrics: Metrics = {
+ completion_tokens: 0,
+ time_completion_millsec: 0,
+ time_first_token_millsec: 0
+ }
const { cleanup, abortController } = this.createAbortController(userLastMessage?.id, true)
@@ -445,6 +455,8 @@ export default class GeminiProvider extends BaseProvider {
history.push(messageContents)
let functionCalls: FunctionCall[] = []
+ let time_first_token_millsec = 0
+ const start_time_millsec = new Date().getTime()
if (stream instanceof GenerateContentResponse) {
let content = ''
@@ -504,34 +516,18 @@ export default class GeminiProvider extends BaseProvider {
} as BlockCompleteChunk)
} else {
let content = ''
- let final_time_completion_millsec = 0
- let lastUsage: Usage | undefined = undefined
for await (const chunk of stream) {
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) break
- // --- Calculate Metrics ---
- if (time_first_token_millsec == 0 && chunk.text !== undefined) {
- // Update based on text arrival
- time_first_token_millsec = new Date().getTime() - start_time_millsec
+ if (time_first_token_millsec == 0) {
+ time_first_token_millsec = new Date().getTime()
}
- // 1. Text Content
if (chunk.text !== undefined) {
content += chunk.text
onChunk({ type: ChunkType.TEXT_DELTA, text: chunk.text })
}
- // 2. Usage Data
- if (chunk.usageMetadata) {
- lastUsage = {
- prompt_tokens: chunk.usageMetadata.promptTokenCount || 0,
- completion_tokens: chunk.usageMetadata.candidatesTokenCount || 0,
- total_tokens: chunk.usageMetadata.totalTokenCount || 0
- }
- final_time_completion_millsec = new Date().getTime() - start_time_millsec
- }
-
- // 4. Image Generation
const generateImage = this.processGeminiImageResponse(chunk, onChunk)
if (generateImage?.images?.length) {
onChunk({ type: ChunkType.IMAGE_COMPLETE, image: generateImage })
@@ -541,8 +537,12 @@ export default class GeminiProvider extends BaseProvider {
if (chunk.text) {
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
}
+ if (chunk.usageMetadata) {
+ finalUsage.prompt_tokens += chunk.usageMetadata.promptTokenCount || 0
+ finalUsage.completion_tokens += chunk.usageMetadata.candidatesTokenCount || 0
+ finalUsage.total_tokens += chunk.usageMetadata.totalTokenCount || 0
+ }
if (chunk.candidates?.[0]?.groundingMetadata) {
- // 3. Grounding/Search Metadata
const groundingMetadata = chunk.candidates?.[0]?.groundingMetadata
onChunk({
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
@@ -561,35 +561,37 @@ export default class GeminiProvider extends BaseProvider {
functionCalls = functionCalls.concat(chunk.functionCalls)
}
- onChunk({
- type: ChunkType.BLOCK_COMPLETE,
- response: {
- metrics: {
- completion_tokens: lastUsage?.completion_tokens,
- time_completion_millsec: final_time_completion_millsec,
- time_first_token_millsec
- },
- usage: lastUsage
- }
- })
- }
-
- // --- End Incremental onChunk calls ---
-
- // Call processToolUses AFTER potentially processing text content in this chunk
- // This assumes tools might be specified within the text stream
- // Note: parseAndCallTools inside should handle its own onChunk for tool responses
- let toolResults: Awaited> = []
- if (functionCalls.length) {
- toolResults = await processToolCalls(functionCalls)
- }
- if (content.length) {
- toolResults = toolResults.concat(await processToolUses(content))
- }
- if (toolResults.length) {
- await processToolResults(toolResults, idx)
+ finalMetrics.completion_tokens = finalUsage.completion_tokens
+ finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
+ finalMetrics.time_first_token_millsec =
+ (finalMetrics.time_first_token_millsec || 0) + (time_first_token_millsec - start_time_millsec)
}
}
+
+ // --- End Incremental onChunk calls ---
+
+ // Call processToolUses AFTER potentially processing text content in this chunk
+ // This assumes tools might be specified within the text stream
+ // Note: parseAndCallTools inside should handle its own onChunk for tool responses
+ let toolResults: Awaited> = []
+ if (functionCalls.length) {
+ toolResults = await processToolCalls(functionCalls)
+ }
+ if (content.length) {
+ toolResults = toolResults.concat(await processToolUses(content))
+ }
+ if (toolResults.length) {
+ await processToolResults(toolResults, idx)
+ }
+
+ // FIXME: 由于递归,会发送n次
+ onChunk({
+ type: ChunkType.BLOCK_COMPLETE,
+ response: {
+ usage: finalUsage,
+ metrics: finalMetrics
+ }
+ })
}
}
@@ -615,17 +617,6 @@ export default class GeminiProvider extends BaseProvider {
})
await processStream(userMessagesStream, 0).finally(cleanup)
-
- const final_time_completion_millsec = new Date().getTime() - start_time_millsec
- onChunk({
- type: ChunkType.BLOCK_COMPLETE,
- response: {
- metrics: {
- time_completion_millsec: final_time_completion_millsec,
- time_first_token_millsec
- }
- }
- })
}
/**
diff --git a/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts b/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts
index cfb617d46a..f9a9042074 100644
--- a/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts
+++ b/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts
@@ -34,6 +34,7 @@ import {
MCPCallToolResponse,
MCPTool,
MCPToolResponse,
+ Metrics,
Model,
Provider,
Suggestion,
@@ -395,7 +396,6 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
return streamOutput
}
- const start_time_millsec = new Date().getTime()
const lastUserMessage = _messages.findLast((m) => m.role === 'user')
const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
const { signal } = abortController
@@ -423,6 +423,18 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
reqMessages = [systemMessage, ...userMessages].filter(Boolean) as ChatCompletionMessageParam[]
}
+ let finalUsage: Usage = {
+ completion_tokens: 0,
+ prompt_tokens: 0,
+ total_tokens: 0
+ }
+
+ const finalMetrics: Metrics = {
+ completion_tokens: 0,
+ time_completion_millsec: 0,
+ time_first_token_millsec: 0
+ }
+
const toolResponses: MCPToolResponse[] = []
const processToolResults = async (toolResults: Awaited>, idx: number) => {
@@ -505,18 +517,17 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
const processStream = async (stream: any, idx: number) => {
const toolCalls: ChatCompletionMessageToolCall[] = []
+ let time_first_token_millsec = 0
+ const start_time_millsec = new Date().getTime()
+
// Handle non-streaming case (already returns early, no change needed here)
if (!isSupportStreamOutput()) {
- const time_completion_millsec = new Date().getTime() - start_time_millsec
// Calculate final metrics once
- const finalMetrics = {
- completion_tokens: stream.usage?.completion_tokens,
- time_completion_millsec,
- time_first_token_millsec: 0 // Non-streaming, first token time is not relevant
- }
+ finalMetrics.completion_tokens = stream.usage?.completion_tokens
+ finalMetrics.time_completion_millsec = new Date().getTime() - start_time_millsec
// Create a synthetic usage object if stream.usage is undefined
- const finalUsage = stream.usage
+ finalUsage = { ...stream.usage }
// Separate onChunk calls for text and usage/metrics
let content = ''
stream.choices.forEach((choice) => {
@@ -526,7 +537,7 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
onChunk({
type: ChunkType.THINKING_COMPLETE,
text: choice.message.reasoning,
- thinking_millsec: time_completion_millsec
+ thinking_millsec: new Date().getTime() - start_time_millsec
})
}
// text
@@ -576,20 +587,9 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
return
}
- let content = '' // Accumulate content for tool processing if needed
+ let content = ''
let thinkingContent = ''
- // 记录最终的完成时间差
- let final_time_completion_millsec_delta = 0
- let final_time_thinking_millsec_delta = 0
- // Variable to store the last received usage object
- let lastUsage: Usage | undefined = undefined
- // let isThinkingInContent: ThoughtProcessor | undefined = undefined
- // const processThinkingChunk = this.handleThinkingTags()
let isFirstChunk = true
- let time_first_token_millsec = 0
- let time_first_token_millsec_delta = 0
- let time_first_content_millsec = 0
- let time_thinking_start = 0
// 1. 初始化中间件
const reasoningTags = [
@@ -640,25 +640,24 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
// 3. 消费 processedStream,分发 onChunk
for await (const chunk of readableStreamAsyncIterable(processedStream)) {
- const currentTime = new Date().getTime()
const delta = chunk.type === 'finish' ? chunk.delta : chunk
const rawChunk = chunk.type === 'finish' ? chunk.chunk : chunk
switch (chunk.type) {
case 'reasoning': {
- if (time_thinking_start === 0) {
- time_thinking_start = currentTime
- time_first_token_millsec = currentTime
- time_first_token_millsec_delta = currentTime - start_time_millsec
+ if (time_first_token_millsec === 0) {
+ time_first_token_millsec = new Date().getTime()
}
thinkingContent += chunk.textDelta
- const thinking_time = currentTime - time_thinking_start
- onChunk({ type: ChunkType.THINKING_DELTA, text: chunk.textDelta, thinking_millsec: thinking_time })
+ onChunk({
+ type: ChunkType.THINKING_DELTA,
+ text: chunk.textDelta,
+ thinking_millsec: new Date().getTime() - time_first_token_millsec
+ })
break
}
case 'text-delta': {
let textDelta = chunk.textDelta
-
if (assistant.enableWebSearch && delta) {
const originalDelta = rawChunk?.choices?.[0]?.delta
@@ -676,25 +675,32 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
if (isFirstChunk) {
isFirstChunk = false
if (time_first_token_millsec === 0) {
- time_first_token_millsec = currentTime
- time_first_token_millsec_delta = currentTime - start_time_millsec
+ time_first_token_millsec = new Date().getTime()
+ } else {
+ onChunk({
+ type: ChunkType.THINKING_COMPLETE,
+ text: thinkingContent,
+ thinking_millsec: new Date().getTime() - time_first_token_millsec
+ })
}
}
content += textDelta
- if (time_thinking_start > 0 && time_first_content_millsec === 0) {
- time_first_content_millsec = currentTime
- final_time_thinking_millsec_delta = time_first_content_millsec - time_thinking_start
-
- onChunk({
- type: ChunkType.THINKING_COMPLETE,
- text: thinkingContent,
- thinking_millsec: final_time_thinking_millsec_delta
- })
- }
onChunk({ type: ChunkType.TEXT_DELTA, text: textDelta })
break
}
case 'tool-calls': {
+ if (isFirstChunk) {
+ isFirstChunk = false
+ if (time_first_token_millsec === 0) {
+ time_first_token_millsec = new Date().getTime()
+ } else {
+ onChunk({
+ type: ChunkType.THINKING_COMPLETE,
+ text: thinkingContent,
+ thinking_millsec: new Date().getTime() - time_first_token_millsec
+ })
+ }
+ }
chunk.delta.tool_calls.forEach((toolCall) => {
const { id, index, type, function: fun } = toolCall
if (id && type === 'function' && fun) {
@@ -721,10 +727,14 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
if (!isEmpty(finishReason)) {
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
- final_time_completion_millsec_delta = currentTime - start_time_millsec
if (usage) {
- lastUsage = usage
+ finalUsage.completion_tokens += usage.completion_tokens || 0
+ finalUsage.prompt_tokens += usage.prompt_tokens || 0
+ finalUsage.total_tokens += usage.total_tokens || 0
+ finalMetrics.completion_tokens += usage.completion_tokens || 0
}
+ finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
+ finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec
if (originalFinishDelta?.annotations) {
onChunk({
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
@@ -774,49 +784,46 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
} as LLMWebSearchCompleteChunk)
}
}
- reqMessages.push({
- role: 'assistant',
- content: content,
- tool_calls: toolCalls.length
- ? toolCalls.map((toolCall) => ({
- id: toolCall.id,
- function: {
- ...toolCall.function,
- arguments:
- typeof toolCall.function.arguments === 'string'
- ? toolCall.function.arguments
- : JSON.stringify(toolCall.function.arguments)
- },
- type: 'function'
- }))
- : undefined
- })
- let toolResults: Awaited> = []
- if (toolCalls.length) {
- toolResults = await processToolCalls(mcpTools, toolCalls)
- }
- if (content.length) {
- toolResults = toolResults.concat(await processToolUses(content))
- }
- if (toolResults.length) {
- await processToolResults(toolResults, idx)
- }
- onChunk({
- type: ChunkType.BLOCK_COMPLETE,
- response: {
- usage: lastUsage,
- metrics: {
- completion_tokens: lastUsage?.completion_tokens,
- time_completion_millsec: final_time_completion_millsec_delta,
- time_first_token_millsec: time_first_token_millsec_delta,
- time_thinking_millsec: final_time_thinking_millsec_delta
- }
- }
- })
break
}
}
}
+
+ reqMessages.push({
+ role: 'assistant',
+ content: content,
+ tool_calls: toolCalls.length
+ ? toolCalls.map((toolCall) => ({
+ id: toolCall.id,
+ function: {
+ ...toolCall.function,
+ arguments:
+ typeof toolCall.function.arguments === 'string'
+ ? toolCall.function.arguments
+ : JSON.stringify(toolCall.function.arguments)
+ },
+ type: 'function'
+ }))
+ : undefined
+ })
+ let toolResults: Awaited> = []
+ if (toolCalls.length) {
+ toolResults = await processToolCalls(mcpTools, toolCalls)
+ }
+ if (content.length) {
+ toolResults = toolResults.concat(await processToolUses(content))
+ }
+ if (toolResults.length) {
+ await processToolResults(toolResults, idx)
+ }
+
+ onChunk({
+ type: ChunkType.BLOCK_COMPLETE,
+ response: {
+ usage: finalUsage,
+ metrics: finalMetrics
+ }
+ })
}
reqMessages = processReqMessages(model, reqMessages)
diff --git a/src/renderer/src/providers/AiProvider/OpenAIProvider.ts b/src/renderer/src/providers/AiProvider/OpenAIProvider.ts
index 747dad5986..154b1a7357 100644
--- a/src/renderer/src/providers/AiProvider/OpenAIProvider.ts
+++ b/src/renderer/src/providers/AiProvider/OpenAIProvider.ts
@@ -24,6 +24,7 @@ import {
MCPCallToolResponse,
MCPTool,
MCPToolResponse,
+ Metrics,
Model,
Provider,
Suggestion,
@@ -332,7 +333,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
const lastUserMessage = _messages.findLast((m) => m.role === 'user')
const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
const { signal } = abortController
- let time_first_token_millsec_delta = 0
const start_time_millsec = new Date().getTime()
const response = await this.sdk.chat.completions
// @ts-ignore key is not typed
@@ -354,8 +354,17 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
const processStream = async (stream: any) => {
let content = ''
let isFirstChunk = true
- let final_time_completion_millsec_delta = 0
- let lastUsage: Usage | undefined = undefined
+ const finalUsage: Usage = {
+ completion_tokens: 0,
+ prompt_tokens: 0,
+ total_tokens: 0
+ }
+
+ const finalMetrics: Metrics = {
+ completion_tokens: 0,
+ time_completion_millsec: 0,
+ time_first_token_millsec: 0
+ }
for await (const chunk of stream as any) {
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
break
@@ -368,17 +377,21 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
}
if (isFirstChunk) {
isFirstChunk = false
- time_first_token_millsec_delta = new Date().getTime() - start_time_millsec
+ finalMetrics.time_first_token_millsec = new Date().getTime() - start_time_millsec
}
content += delta.content
onChunk({ type: ChunkType.TEXT_DELTA, text: delta.content })
}
if (!isEmpty(finishReason) || chunk?.annotations) {
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
- final_time_completion_millsec_delta = new Date().getTime() - start_time_millsec
+ finalMetrics.time_completion_millsec = new Date().getTime() - start_time_millsec
if (chunk.usage) {
- lastUsage = chunk.usage
+ const usage = chunk.usage as OpenAI.Completions.CompletionUsage
+ finalUsage.completion_tokens = usage.completion_tokens
+ finalUsage.prompt_tokens = usage.prompt_tokens
+ finalUsage.total_tokens = usage.total_tokens
}
+ finalMetrics.completion_tokens = finalUsage.completion_tokens
}
if (delta?.annotations) {
onChunk({
@@ -393,12 +406,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
onChunk({
type: ChunkType.BLOCK_COMPLETE,
response: {
- usage: lastUsage,
- metrics: {
- completion_tokens: lastUsage?.completion_tokens,
- time_completion_millsec: final_time_completion_millsec_delta,
- time_first_token_millsec: time_first_token_millsec_delta
- }
+ usage: finalUsage,
+ metrics: finalMetrics
}
})
}
@@ -454,9 +463,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
userMessage.push(await this.getReponseMessageParam(message, model))
}
- let time_first_token_millsec = 0
- const start_time_millsec = new Date().getTime()
-
const lastUserMessage = _messages.findLast((m) => m.role === 'user')
const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
const { signal } = abortController
@@ -469,6 +475,18 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
reqMessages = [systemMessage, ...userMessage].filter(Boolean) as OpenAI.Responses.EasyInputMessage[]
}
+ const finalUsage: Usage = {
+ completion_tokens: 0,
+ prompt_tokens: 0,
+ total_tokens: 0
+ }
+
+ const finalMetrics: Metrics = {
+ completion_tokens: 0,
+ time_completion_millsec: 0,
+ time_first_token_millsec: 0
+ }
+
const toolResponses: MCPToolResponse[] = []
const processToolResults = async (toolResults: Awaited>, idx: number) => {
@@ -548,6 +566,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
idx: number
) => {
const toolCalls: OpenAI.Responses.ResponseFunctionToolCall[] = []
+ let time_first_token_millsec = 0
+ const start_time_millsec = new Date().getTime()
if (!streamOutput) {
const nonStream = stream as OpenAI.Responses.Response
@@ -632,8 +652,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
const outputItems: OpenAI.Responses.ResponseOutputItem[] = []
- let lastUsage: Usage | undefined = undefined
- let final_time_completion_millsec_delta = 0
for await (const chunk of stream as Stream) {
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
break
@@ -707,18 +725,18 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
}
break
case 'response.completed': {
- final_time_completion_millsec_delta = new Date().getTime() - start_time_millsec
const completion_tokens =
(chunk.response.usage?.output_tokens || 0) +
(chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0)
const total_tokens =
(chunk.response.usage?.total_tokens || 0) +
(chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0)
- lastUsage = {
- completion_tokens,
- prompt_tokens: chunk.response.usage?.input_tokens || 0,
- total_tokens
- }
+ finalUsage.completion_tokens += completion_tokens
+ finalUsage.prompt_tokens += chunk.response.usage?.input_tokens || 0
+ finalUsage.total_tokens += total_tokens
+ finalMetrics.completion_tokens += completion_tokens
+ finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
+ finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec
break
}
case 'error':
@@ -760,12 +778,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
onChunk({
type: ChunkType.BLOCK_COMPLETE,
response: {
- usage: lastUsage,
- metrics: {
- completion_tokens: lastUsage?.completion_tokens,
- time_completion_millsec: final_time_completion_millsec_delta,
- time_first_token_millsec: time_first_token_millsec - start_time_millsec
- }
+ usage: finalUsage,
+ metrics: finalMetrics
}
})
}
diff --git a/src/renderer/src/store/thunk/messageThunk.ts b/src/renderer/src/store/thunk/messageThunk.ts
index ef5b24ccaa..4e586a9a66 100644
--- a/src/renderer/src/store/thunk/messageThunk.ts
+++ b/src/renderer/src/store/thunk/messageThunk.ts
@@ -565,7 +565,7 @@ const fetchAndProcessAssistantResponseImpl = async (
message: pauseErrorLanguagePlaceholder || error.message || 'Stream processing error',
originalMessage: error.message,
stack: error.stack,
- status: error.status,
+ status: error.status || error.code,
requestId: error.request_id
}
if (lastBlockId) {
@@ -609,13 +609,14 @@ const fetchAndProcessAssistantResponseImpl = async (
// 更新topic的name
autoRenameTopic(assistant, topicId)
- if (response && !response.usage) {
+ if (response && response.usage?.total_tokens === 0) {
const usage = await estimateMessagesUsage({ assistant, messages: finalContextWithAssistant })
response.usage = usage
}
+ console.log('response', response)
}
if (response && response.metrics) {
- if (!response.metrics.completion_tokens && response.usage) {
+ if (response.metrics.completion_tokens === 0 && response.usage?.completion_tokens) {
response = {
...response,
metrics: {
diff --git a/src/renderer/src/types/index.ts b/src/renderer/src/types/index.ts
index e09e826b1b..0873fc0a99 100644
--- a/src/renderer/src/types/index.ts
+++ b/src/renderer/src/types/index.ts
@@ -121,8 +121,8 @@ export type Usage = OpenAI.Completions.CompletionUsage & {
}
export type Metrics = {
- completion_tokens?: number
- time_completion_millsec?: number
+ completion_tokens: number
+ time_completion_millsec: number
time_first_token_millsec?: number
time_thinking_millsec?: number
}