diff --git a/src/renderer/src/pages/home/Messages/Blocks/ErrorBlock.tsx b/src/renderer/src/pages/home/Messages/Blocks/ErrorBlock.tsx
index 2c8161397d..f55a3ab6b1 100644
--- a/src/renderer/src/pages/home/Messages/Blocks/ErrorBlock.tsx
+++ b/src/renderer/src/pages/home/Messages/Blocks/ErrorBlock.tsx
@@ -16,7 +16,7 @@ const MessageErrorInfo: React.FC<{ block: ErrorMessageBlock }> = ({ block }) =>
 
   const HTTP_ERROR_CODES = [400, 401, 403, 404, 429, 500, 502, 503, 504]
   if (block.error && HTTP_ERROR_CODES.includes(block.error?.status)) {
-    return <Alert description={t(`error.http.${block.error.status}`)} type="error" />
+    return <Alert description={t(`error.http.${block.error.status}`)} message={block.error?.message} type="error" />
   }
   if (block?.error?.message) {
     const errorKey = `error.${block.error.message}`
diff --git a/src/renderer/src/pages/home/Messages/Blocks/ThinkingBlock.tsx b/src/renderer/src/pages/home/Messages/Blocks/ThinkingBlock.tsx
index f1d546bd58..fa3c2b9ff4 100644
--- a/src/renderer/src/pages/home/Messages/Blocks/ThinkingBlock.tsx
+++ b/src/renderer/src/pages/home/Messages/Blocks/ThinkingBlock.tsx
@@ -56,10 +56,12 @@ const ThinkingBlock: React.FC<Props> = ({ block }) => {
   useEffect(() => {
     if (isThinking) {
       intervalId.current = setInterval(() => {
-        setThinkingTime((prev) => prev + 200)
-      }, 200)
-    } else {
-      return
+        setThinkingTime((prev) => prev + 100)
+      }, 100)
+    } else if (intervalId.current) {
+      // 立即清除计时器
+      clearInterval(intervalId.current)
+      intervalId.current = null
     }
 
     return () => {
diff --git a/src/renderer/src/pages/home/Messages/MessageContent.tsx b/src/renderer/src/pages/home/Messages/MessageContent.tsx
index 60aa2cdf6b..b01feb2be9 100644
--- a/src/renderer/src/pages/home/Messages/MessageContent.tsx
+++ b/src/renderer/src/pages/home/Messages/MessageContent.tsx
@@ -10,35 +10,6 @@ interface Props {
 }
 
 const MessageContent: React.FC<Props> = ({ message }) => {
-  // const { t } = useTranslation()
-  // if (message.status === 'pending') {
-  //   return (
-
-  //   )
-  // }
-
-  // if (message.status === 'searching') {
-  //   return (
-  //     <SearchingContainer>
-  //       <Search size={24} />
-  //       <SearchingText>{t('message.searching')}</SearchingText>
-  //       <BarLoader color="#1677ff" />
-  //     </SearchingContainer>
-  //   )
-  // }
-
-  // if (message.status === 'error') {
-  //   return <MessageError message={message} />
-  // }
-
-  // if (message.type === '@' && model) {
-  //   const content = `[@${model.name}](#)  ${getBriefInfo(message.content)}`
-  //   return <Markdown message={{ ...message, content }} />
-  // }
-  // const toolUseRegex = /<tool_use>([\s\S]*?)<\/tool_use>/g
-
-  // console.log('message', message)
-
   return (
     <>
       <Flex gap="8px" wrap style={{ marginBottom: 10 }}>
diff --git a/src/renderer/src/pages/home/Messages/MessageError.tsx b/src/renderer/src/pages/home/Messages/MessageError.tsx
deleted file mode 100644
index e0c0ea00bd..0000000000
--- a/src/renderer/src/pages/home/Messages/MessageError.tsx
+++ /dev/null
@@ -1,45 +0,0 @@
-import type { ErrorMessageBlock } from '@renderer/types/newMessage'
-import { Alert as AntdAlert } from 'antd'
-import { FC } from 'react'
-import { useTranslation } from 'react-i18next'
-import styled from 'styled-components'
-
-const MessageError: FC<{ block: ErrorMessageBlock }> = ({ block }) => {
-  return (
-    <>
-      {/* <Markdown block={block} role={role} />
-      {block.error && (
-        <Markdown
-          message={{
-            ...block,
-            content: formatErrorMessage(block.error)
-          }}
-        />
-      )} */}
-      <MessageErrorInfo block={block} />
-    </>
-  )
-}
-
-const MessageErrorInfo: FC<{ block: ErrorMessageBlock }> = ({ block }) => {
-  const { t } = useTranslation()
-
-  const HTTP_ERROR_CODES = [400, 401, 403, 404, 429, 500, 502, 503, 504]
-  console.log('block', block)
-  if (block.error && HTTP_ERROR_CODES.includes(block.error?.status)) {
-    return <Alert description={t(`error.http.${block.error.status}`)} type="error" />
-  }
-  if (block?.error?.message) {
-    return <Alert description={block.error.message} type="error" />
-  }
-
-  return <Alert description={t('error.chat.response')} type="error" />
-}
-
-const Alert = styled(AntdAlert)`
-  margin: 15px 0 8px;
-  padding: 10px;
-  font-size: 12px;
-`
-
-export default MessageError
diff --git a/src/renderer/src/providers/AiProvider/AnthropicProvider.ts b/src/renderer/src/providers/AiProvider/AnthropicProvider.ts
index 1bb58c22ab..3f2929bdd0 100644
--- a/src/renderer/src/providers/AiProvider/AnthropicProvider.ts
+++ b/src/renderer/src/providers/AiProvider/AnthropicProvider.ts
@@ -30,10 +30,12 @@ import {
   MCPCallToolResponse,
   MCPTool,
   MCPToolResponse,
+  Metrics,
   Model,
   Provider,
   Suggestion,
   ToolCallResponse,
+  Usage,
   WebSearchSource
 } from '@renderer/types'
 import { ChunkType } from '@renderer/types/chunk'
@@ -47,7 +49,7 @@ import {
 } from '@renderer/utils/mcp-tools'
 import { findFileBlocks, findImageBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find'
 import { buildSystemPrompt } from '@renderer/utils/prompt'
-import { first, flatten, sum, takeRight } from 'lodash'
+import { first, flatten, takeRight } from 'lodash'
 import OpenAI from 'openai'
 
 import { CompletionsParams } from '.'
@@ -270,77 +272,82 @@ export default class AnthropicProvider extends BaseProvider {
       ...this.getCustomParameters(assistant)
     }
 
-    let time_first_token_millsec = 0
-    let time_first_content_millsec = 0
-    let checkThinkingContent = false
-    let thinking_content = ''
-    const start_time_millsec = new Date().getTime()
-
-    if (!streamOutput) {
-      const message = await this.sdk.messages.create({ ...body, stream: false })
-      const time_completion_millsec = new Date().getTime() - start_time_millsec
-
-      let text = ''
-      let reasoning_content = ''
-
-      if (message.content && message.content.length > 0) {
-        const thinkingBlock = message.content.find((block) => block.type === 'thinking')
-        const textBlock = message.content.find((block) => block.type === 'text')
-
-        if (thinkingBlock && 'thinking' in thinkingBlock) {
-          reasoning_content = thinkingBlock.thinking
-        }
-
-        if (textBlock && 'text' in textBlock) {
-          text = textBlock.text
-        }
-      }
-
-      return onChunk({
-        type: ChunkType.BLOCK_COMPLETE,
-        response: {
-          text,
-          reasoning_content,
-          usage: message.usage as any,
-          metrics: {
-            completion_tokens: message.usage.output_tokens,
-            time_completion_millsec,
-            time_first_token_millsec: 0
-          }
-        }
-      })
-    }
-
     const { abortController, cleanup } = this.createAbortController(lastUserMessage?.id)
     const { signal } = abortController
+
+    const finalUsage: Usage = {
+      completion_tokens: 0,
+      prompt_tokens: 0,
+      total_tokens: 0
+    }
+
+    const finalMetrics: Metrics = {
+      completion_tokens: 0,
+      time_completion_millsec: 0,
+      time_first_token_millsec: 0
+    }
     const toolResponses: MCPToolResponse[] = []
 
-    const processStream = (body: MessageCreateParamsNonStreaming, idx: number) => {
+    const processStream = async (body: MessageCreateParamsNonStreaming, idx: number) => {
+      let time_first_token_millsec = 0
+      const start_time_millsec = new Date().getTime()
+
+      if (!streamOutput) {
+        const message = await this.sdk.messages.create({ ...body, stream: false })
+        const time_completion_millsec = new Date().getTime() - start_time_millsec
+
+        let text = ''
+        let reasoning_content = ''
+
+        if (message.content && message.content.length > 0) {
+          const thinkingBlock = message.content.find((block) => block.type === 'thinking')
+          const textBlock = message.content.find((block) => block.type === 'text')
+
+          if (thinkingBlock && 'thinking' in thinkingBlock) {
+            reasoning_content = thinkingBlock.thinking
+          }
+
+          if (textBlock && 'text' in textBlock) {
+            text = textBlock.text
+          }
+        }
+
+        return onChunk({
+          type: ChunkType.BLOCK_COMPLETE,
+          response: {
+            text,
+            reasoning_content,
+            usage: message.usage as any,
+            metrics: {
+              completion_tokens: message.usage.output_tokens,
+              time_completion_millsec,
+              time_first_token_millsec: 0
+            }
+          }
+        })
+      }
+
+      let thinking_content = ''
+      let isFirstChunk = true
+
       return new Promise<void>((resolve, reject) => {
         // 等待接口返回流
         const toolCalls: ToolUseBlock[] = []
-        let hasThinkingContent = false
+
         this.sdk.messages
           .stream({ ...body, stream: true }, { signal, timeout: 5 * 60 * 1000 })
           .on('text', (text) => {
-            if (hasThinkingContent && !checkThinkingContent) {
-              checkThinkingContent = true
-              onChunk({
-                type: ChunkType.THINKING_COMPLETE,
-                text: thinking_content,
-                thinking_millsec: new Date().getTime() - time_first_content_millsec
-              })
-            }
-            if (time_first_token_millsec == 0) {
-              time_first_token_millsec = new Date().getTime()
-            }
-
-            thinking_content = ''
-            checkThinkingContent = false
-            hasThinkingContent = false
-
-            if (!hasThinkingContent && time_first_content_millsec === 0) {
-              time_first_content_millsec = new Date().getTime()
+            if (isFirstChunk) {
+              isFirstChunk = false
+              if (time_first_token_millsec == 0) {
+                time_first_token_millsec = new Date().getTime()
+              } else {
+                onChunk({
+                  type: ChunkType.THINKING_COMPLETE,
+                  text: thinking_content,
+                  thinking_millsec: new Date().getTime() - time_first_token_millsec
+                })
+              }
             }
 
             onChunk({ type: ChunkType.TEXT_DELTA, text })
@@ -372,34 +379,22 @@ export default class AnthropicProvider extends BaseProvider {
                 })
               }
             }
+            if (block.type === 'tool_use') {
+              toolCalls.push(block)
+            }
           })
           .on('thinking', (thinking) => {
-            hasThinkingContent = true
-            const currentTime = new Date().getTime() // Get current time for each chunk
-
             if (time_first_token_millsec == 0) {
-              time_first_token_millsec = currentTime
+              time_first_token_millsec = new Date().getTime()
             }
 
-            // Set time_first_content_millsec ONLY when the first content (thinking or text) arrives
-            if (time_first_content_millsec === 0) {
-              time_first_content_millsec = currentTime
-            }
-
-            // Calculate thinking time as time elapsed since start until this chunk
-            const thinking_time = currentTime - time_first_content_millsec
             onChunk({
               type: ChunkType.THINKING_DELTA,
               text: thinking,
-              thinking_millsec: thinking_time
+              thinking_millsec: new Date().getTime() - time_first_token_millsec
             })
             thinking_content += thinking
           })
-          .on('contentBlock', (content) => {
-            if (content.type === 'tool_use') {
-              toolCalls.push(content)
-            }
-          })
           .on('finalMessage', async (message) => {
             const toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
             // tool call
@@ -458,29 +453,28 @@ export default class AnthropicProvider extends BaseProvider {
               newBody.messages = userMessages
 
               onChunk({ type: ChunkType.LLM_RESPONSE_CREATED })
-              await processStream(newBody, idx + 1)
+              try {
+                await processStream(newBody, idx + 1)
+              } catch (error) {
+                console.error('Error processing stream:', error)
+                reject(error)
+              }
             }
 
-            const time_completion_millsec = new Date().getTime() - start_time_millsec
+            finalUsage.prompt_tokens += message.usage.input_tokens
+            finalUsage.completion_tokens += message.usage.output_tokens
+            finalUsage.total_tokens += finalUsage.prompt_tokens + finalUsage.completion_tokens
+            finalMetrics.completion_tokens = finalUsage.completion_tokens
+            finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
+            finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec
 
             onChunk({
               type: ChunkType.BLOCK_COMPLETE,
               response: {
-                usage: {
-                  prompt_tokens: message.usage.input_tokens,
-                  completion_tokens: message.usage.output_tokens,
-                  total_tokens: sum(Object.values(message.usage))
-                },
-                metrics: {
-                  completion_tokens: message.usage.output_tokens,
-                  time_completion_millsec,
-                  time_first_token_millsec: time_first_token_millsec - start_time_millsec
-                }
+                usage: finalUsage,
+                metrics: finalMetrics
               }
             })
-            // FIXME: 临时方案，重置时间戳和思考内容
-            time_first_token_millsec = 0
-            time_first_content_millsec = 0
             resolve()
           })
           .on('error', (error) => reject(error))
diff --git a/src/renderer/src/providers/AiProvider/GeminiProvider.ts b/src/renderer/src/providers/AiProvider/GeminiProvider.ts
index c04632efd3..52323efc1d 100644
--- a/src/renderer/src/providers/AiProvider/GeminiProvider.ts
+++ b/src/renderer/src/providers/AiProvider/GeminiProvider.ts
@@ -40,6 +40,7 @@ import {
   MCPCallToolResponse,
   MCPTool,
   MCPToolResponse,
+  Metrics,
   Model,
   Provider,
   Suggestion,
@@ -370,8 +371,17 @@ export default class GeminiProvider extends BaseProvider {
       }
     }
 
-    const start_time_millsec = new Date().getTime()
-    let time_first_token_millsec = 0
+    const finalUsage: Usage = {
+      completion_tokens: 0,
+      prompt_tokens: 0,
+      total_tokens: 0
+    }
+
+    const finalMetrics: Metrics = {
+      completion_tokens: 0,
+      time_completion_millsec: 0,
+      time_first_token_millsec: 0
+    }
 
     const { cleanup, abortController } = this.createAbortController(userLastMessage?.id, true)
 
@@ -445,6 +455,8 @@ export default class GeminiProvider extends BaseProvider {
       history.push(messageContents)
 
       let functionCalls: FunctionCall[] = []
+      let time_first_token_millsec = 0
+      const start_time_millsec = new Date().getTime()
 
       if (stream instanceof GenerateContentResponse) {
         let content = ''
@@ -504,34 +516,18 @@ export default class GeminiProvider extends BaseProvider {
         } as BlockCompleteChunk)
       } else {
         let content = ''
-        let final_time_completion_millsec = 0
-        let lastUsage: Usage | undefined = undefined
         for await (const chunk of stream) {
           if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) break
 
-          // --- Calculate Metrics ---
-          if (time_first_token_millsec == 0 && chunk.text !== undefined) {
-            // Update based on text arrival
-            time_first_token_millsec = new Date().getTime() - start_time_millsec
+          if (time_first_token_millsec == 0) {
+            time_first_token_millsec = new Date().getTime()
           }
 
-          // 1. Text Content
           if (chunk.text !== undefined) {
             content += chunk.text
             onChunk({ type: ChunkType.TEXT_DELTA, text: chunk.text })
           }
 
-          // 2. Usage Data
-          if (chunk.usageMetadata) {
-            lastUsage = {
-              prompt_tokens: chunk.usageMetadata.promptTokenCount || 0,
-              completion_tokens: chunk.usageMetadata.candidatesTokenCount || 0,
-              total_tokens: chunk.usageMetadata.totalTokenCount || 0
-            }
-            final_time_completion_millsec = new Date().getTime() - start_time_millsec
-          }
-
-          // 4. Image Generation
           const generateImage = this.processGeminiImageResponse(chunk, onChunk)
           if (generateImage?.images?.length) {
             onChunk({ type: ChunkType.IMAGE_COMPLETE, image: generateImage })
@@ -541,8 +537,12 @@ export default class GeminiProvider extends BaseProvider {
             if (chunk.text) {
               onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
             }
+            if (chunk.usageMetadata) {
+              finalUsage.prompt_tokens += chunk.usageMetadata.promptTokenCount || 0
+              finalUsage.completion_tokens += chunk.usageMetadata.candidatesTokenCount || 0
+              finalUsage.total_tokens += chunk.usageMetadata.totalTokenCount || 0
+            }
             if (chunk.candidates?.[0]?.groundingMetadata) {
-              // 3. Grounding/Search Metadata
               const groundingMetadata = chunk.candidates?.[0]?.groundingMetadata
               onChunk({
                 type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
@@ -561,35 +561,37 @@ export default class GeminiProvider extends BaseProvider {
               functionCalls = functionCalls.concat(chunk.functionCalls)
             }
 
-            onChunk({
-              type: ChunkType.BLOCK_COMPLETE,
-              response: {
-                metrics: {
-                  completion_tokens: lastUsage?.completion_tokens,
-                  time_completion_millsec: final_time_completion_millsec,
-                  time_first_token_millsec
-                },
-                usage: lastUsage
-              }
-            })
-          }
-
-          // --- End Incremental onChunk calls ---
-
-          // Call processToolUses AFTER potentially processing text content in this chunk
-          // This assumes tools might be specified within the text stream
-          // Note: parseAndCallTools inside should handle its own onChunk for tool responses
-          let toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
-          if (functionCalls.length) {
-            toolResults = await processToolCalls(functionCalls)
-          }
-          if (content.length) {
-            toolResults = toolResults.concat(await processToolUses(content))
-          }
-          if (toolResults.length) {
-            await processToolResults(toolResults, idx)
+            finalMetrics.completion_tokens = finalUsage.completion_tokens
+            finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
+            finalMetrics.time_first_token_millsec =
+              (finalMetrics.time_first_token_millsec || 0) + (time_first_token_millsec - start_time_millsec)
           }
         }
+
+        // --- End Incremental onChunk calls ---
+
+        // Call processToolUses AFTER potentially processing text content in this chunk
+        // This assumes tools might be specified within the text stream
+        // Note: parseAndCallTools inside should handle its own onChunk for tool responses
+        let toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
+        if (functionCalls.length) {
+          toolResults = await processToolCalls(functionCalls)
+        }
+        if (content.length) {
+          toolResults = toolResults.concat(await processToolUses(content))
+        }
+        if (toolResults.length) {
+          await processToolResults(toolResults, idx)
+        }
+
+        // FIXME: 由于递归，会发送n次
+        onChunk({
+          type: ChunkType.BLOCK_COMPLETE,
+          response: {
+            usage: finalUsage,
+            metrics: finalMetrics
+          }
+        })
       }
     }
 
@@ -615,17 +617,6 @@ export default class GeminiProvider extends BaseProvider {
     })
 
     await processStream(userMessagesStream, 0).finally(cleanup)
-
-    const final_time_completion_millsec = new Date().getTime() - start_time_millsec
-    onChunk({
-      type: ChunkType.BLOCK_COMPLETE,
-      response: {
-        metrics: {
-          time_completion_millsec: final_time_completion_millsec,
-          time_first_token_millsec
-        }
-      }
-    })
   }
 
   /**
diff --git a/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts b/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts
index cfb617d46a..f9a9042074 100644
--- a/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts
+++ b/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts
@@ -34,6 +34,7 @@ import {
   MCPCallToolResponse,
   MCPTool,
   MCPToolResponse,
+  Metrics,
   Model,
   Provider,
   Suggestion,
@@ -395,7 +396,6 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
       return streamOutput
     }
 
-    const start_time_millsec = new Date().getTime()
     const lastUserMessage = _messages.findLast((m) => m.role === 'user')
     const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
     const { signal } = abortController
@@ -423,6 +423,18 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
       reqMessages = [systemMessage, ...userMessages].filter(Boolean) as ChatCompletionMessageParam[]
     }
 
+    let finalUsage: Usage = {
+      completion_tokens: 0,
+      prompt_tokens: 0,
+      total_tokens: 0
+    }
+
+    const finalMetrics: Metrics = {
+      completion_tokens: 0,
+      time_completion_millsec: 0,
+      time_first_token_millsec: 0
+    }
+
     const toolResponses: MCPToolResponse[] = []
 
     const processToolResults = async (toolResults: Awaited<ReturnType<typeof parseAndCallTools>>, idx: number) => {
@@ -505,18 +517,17 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
 
     const processStream = async (stream: any, idx: number) => {
       const toolCalls: ChatCompletionMessageToolCall[] = []
+      let time_first_token_millsec = 0
+      const start_time_millsec = new Date().getTime()
+
       // Handle non-streaming case (already returns early, no change needed here)
       if (!isSupportStreamOutput()) {
-        const time_completion_millsec = new Date().getTime() - start_time_millsec
         // Calculate final metrics once
-        const finalMetrics = {
-          completion_tokens: stream.usage?.completion_tokens,
-          time_completion_millsec,
-          time_first_token_millsec: 0 // Non-streaming, first token time is not relevant
-        }
+        finalMetrics.completion_tokens = stream.usage?.completion_tokens
+        finalMetrics.time_completion_millsec = new Date().getTime() - start_time_millsec
 
         // Create a synthetic usage object if stream.usage is undefined
-        const finalUsage = stream.usage
+        finalUsage = { ...stream.usage }
         // Separate onChunk calls for text and usage/metrics
         let content = ''
         stream.choices.forEach((choice) => {
@@ -526,7 +537,7 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
             onChunk({
               type: ChunkType.THINKING_COMPLETE,
               text: choice.message.reasoning,
-              thinking_millsec: time_completion_millsec
+              thinking_millsec: new Date().getTime() - start_time_millsec
             })
           }
           // text
@@ -576,20 +587,9 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
         return
       }
 
-      let content = '' // Accumulate content for tool processing if needed
+      let content = ''
       let thinkingContent = ''
-      // 记录最终的完成时间差
-      let final_time_completion_millsec_delta = 0
-      let final_time_thinking_millsec_delta = 0
-      // Variable to store the last received usage object
-      let lastUsage: Usage | undefined = undefined
-      // let isThinkingInContent: ThoughtProcessor | undefined = undefined
-      // const processThinkingChunk = this.handleThinkingTags()
       let isFirstChunk = true
-      let time_first_token_millsec = 0
-      let time_first_token_millsec_delta = 0
-      let time_first_content_millsec = 0
-      let time_thinking_start = 0
 
       // 1. 初始化中间件
       const reasoningTags = [
@@ -640,25 +640,24 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
 
       // 3. 消费 processedStream，分发 onChunk
       for await (const chunk of readableStreamAsyncIterable(processedStream)) {
-        const currentTime = new Date().getTime()
         const delta = chunk.type === 'finish' ? chunk.delta : chunk
         const rawChunk = chunk.type === 'finish' ? chunk.chunk : chunk
 
         switch (chunk.type) {
           case 'reasoning': {
-            if (time_thinking_start === 0) {
-              time_thinking_start = currentTime
-              time_first_token_millsec = currentTime
-              time_first_token_millsec_delta = currentTime - start_time_millsec
+            if (time_first_token_millsec === 0) {
+              time_first_token_millsec = new Date().getTime()
             }
             thinkingContent += chunk.textDelta
-            const thinking_time = currentTime - time_thinking_start
-            onChunk({ type: ChunkType.THINKING_DELTA, text: chunk.textDelta, thinking_millsec: thinking_time })
+            onChunk({
+              type: ChunkType.THINKING_DELTA,
+              text: chunk.textDelta,
+              thinking_millsec: new Date().getTime() - time_first_token_millsec
+            })
             break
           }
           case 'text-delta': {
             let textDelta = chunk.textDelta
-
             if (assistant.enableWebSearch && delta) {
               const originalDelta = rawChunk?.choices?.[0]?.delta
 
@@ -676,25 +675,32 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
             if (isFirstChunk) {
               isFirstChunk = false
               if (time_first_token_millsec === 0) {
-                time_first_token_millsec = currentTime
-                time_first_token_millsec_delta = currentTime - start_time_millsec
+                time_first_token_millsec = new Date().getTime()
+              } else {
+                onChunk({
+                  type: ChunkType.THINKING_COMPLETE,
+                  text: thinkingContent,
+                  thinking_millsec: new Date().getTime() - time_first_token_millsec
+                })
               }
             }
             content += textDelta
-            if (time_thinking_start > 0 && time_first_content_millsec === 0) {
-              time_first_content_millsec = currentTime
-              final_time_thinking_millsec_delta = time_first_content_millsec - time_thinking_start
-
-              onChunk({
-                type: ChunkType.THINKING_COMPLETE,
-                text: thinkingContent,
-                thinking_millsec: final_time_thinking_millsec_delta
-              })
-            }
             onChunk({ type: ChunkType.TEXT_DELTA, text: textDelta })
             break
           }
           case 'tool-calls': {
+            if (isFirstChunk) {
+              isFirstChunk = false
+              if (time_first_token_millsec === 0) {
+                time_first_token_millsec = new Date().getTime()
+              } else {
+                onChunk({
+                  type: ChunkType.THINKING_COMPLETE,
+                  text: thinkingContent,
+                  thinking_millsec: new Date().getTime() - time_first_token_millsec
+                })
+              }
+            }
             chunk.delta.tool_calls.forEach((toolCall) => {
               const { id, index, type, function: fun } = toolCall
               if (id && type === 'function' && fun) {
@@ -721,10 +727,14 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
 
             if (!isEmpty(finishReason)) {
               onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
-              final_time_completion_millsec_delta = currentTime - start_time_millsec
               if (usage) {
-                lastUsage = usage
+                finalUsage.completion_tokens += usage.completion_tokens || 0
+                finalUsage.prompt_tokens += usage.prompt_tokens || 0
+                finalUsage.total_tokens += usage.total_tokens || 0
+                finalMetrics.completion_tokens += usage.completion_tokens || 0
               }
+              finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
+              finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec
               if (originalFinishDelta?.annotations) {
                 onChunk({
                   type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
@@ -774,49 +784,46 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
                 } as LLMWebSearchCompleteChunk)
               }
             }
-            reqMessages.push({
-              role: 'assistant',
-              content: content,
-              tool_calls: toolCalls.length
-                ? toolCalls.map((toolCall) => ({
-                    id: toolCall.id,
-                    function: {
-                      ...toolCall.function,
-                      arguments:
-                        typeof toolCall.function.arguments === 'string'
-                          ? toolCall.function.arguments
-                          : JSON.stringify(toolCall.function.arguments)
-                    },
-                    type: 'function'
-                  }))
-                : undefined
-            })
-            let toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
-            if (toolCalls.length) {
-              toolResults = await processToolCalls(mcpTools, toolCalls)
-            }
-            if (content.length) {
-              toolResults = toolResults.concat(await processToolUses(content))
-            }
-            if (toolResults.length) {
-              await processToolResults(toolResults, idx)
-            }
-            onChunk({
-              type: ChunkType.BLOCK_COMPLETE,
-              response: {
-                usage: lastUsage,
-                metrics: {
-                  completion_tokens: lastUsage?.completion_tokens,
-                  time_completion_millsec: final_time_completion_millsec_delta,
-                  time_first_token_millsec: time_first_token_millsec_delta,
-                  time_thinking_millsec: final_time_thinking_millsec_delta
-                }
-              }
-            })
             break
           }
         }
       }
+
+      reqMessages.push({
+        role: 'assistant',
+        content: content,
+        tool_calls: toolCalls.length
+          ? toolCalls.map((toolCall) => ({
+              id: toolCall.id,
+              function: {
+                ...toolCall.function,
+                arguments:
+                  typeof toolCall.function.arguments === 'string'
+                    ? toolCall.function.arguments
+                    : JSON.stringify(toolCall.function.arguments)
+              },
+              type: 'function'
+            }))
+          : undefined
+      })
+      let toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
+      if (toolCalls.length) {
+        toolResults = await processToolCalls(mcpTools, toolCalls)
+      }
+      if (content.length) {
+        toolResults = toolResults.concat(await processToolUses(content))
+      }
+      if (toolResults.length) {
+        await processToolResults(toolResults, idx)
+      }
+
+      onChunk({
+        type: ChunkType.BLOCK_COMPLETE,
+        response: {
+          usage: finalUsage,
+          metrics: finalMetrics
+        }
+      })
     }
 
     reqMessages = processReqMessages(model, reqMessages)
diff --git a/src/renderer/src/providers/AiProvider/OpenAIProvider.ts b/src/renderer/src/providers/AiProvider/OpenAIProvider.ts
index 747dad5986..154b1a7357 100644
--- a/src/renderer/src/providers/AiProvider/OpenAIProvider.ts
+++ b/src/renderer/src/providers/AiProvider/OpenAIProvider.ts
@@ -24,6 +24,7 @@ import {
   MCPCallToolResponse,
   MCPTool,
   MCPToolResponse,
+  Metrics,
   Model,
   Provider,
   Suggestion,
@@ -332,7 +333,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
       const lastUserMessage = _messages.findLast((m) => m.role === 'user')
       const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
       const { signal } = abortController
-      let time_first_token_millsec_delta = 0
       const start_time_millsec = new Date().getTime()
       const response = await this.sdk.chat.completions
         // @ts-ignore key is not typed
@@ -354,8 +354,17 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
       const processStream = async (stream: any) => {
         let content = ''
         let isFirstChunk = true
-        let final_time_completion_millsec_delta = 0
-        let lastUsage: Usage | undefined = undefined
+        const finalUsage: Usage = {
+          completion_tokens: 0,
+          prompt_tokens: 0,
+          total_tokens: 0
+        }
+
+        const finalMetrics: Metrics = {
+          completion_tokens: 0,
+          time_completion_millsec: 0,
+          time_first_token_millsec: 0
+        }
         for await (const chunk of stream as any) {
           if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
             break
@@ -368,17 +377,21 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
             }
             if (isFirstChunk) {
               isFirstChunk = false
-              time_first_token_millsec_delta = new Date().getTime() - start_time_millsec
+              finalMetrics.time_first_token_millsec = new Date().getTime() - start_time_millsec
             }
             content += delta.content
             onChunk({ type: ChunkType.TEXT_DELTA, text: delta.content })
           }
           if (!isEmpty(finishReason) || chunk?.annotations) {
             onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
-            final_time_completion_millsec_delta = new Date().getTime() - start_time_millsec
+            finalMetrics.time_completion_millsec = new Date().getTime() - start_time_millsec
             if (chunk.usage) {
-              lastUsage = chunk.usage
+              const usage = chunk.usage as OpenAI.Completions.CompletionUsage
+              finalUsage.completion_tokens = usage.completion_tokens
+              finalUsage.prompt_tokens = usage.prompt_tokens
+              finalUsage.total_tokens = usage.total_tokens
             }
+            finalMetrics.completion_tokens = finalUsage.completion_tokens
           }
           if (delta?.annotations) {
             onChunk({
@@ -393,12 +406,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
         onChunk({
           type: ChunkType.BLOCK_COMPLETE,
           response: {
-            usage: lastUsage,
-            metrics: {
-              completion_tokens: lastUsage?.completion_tokens,
-              time_completion_millsec: final_time_completion_millsec_delta,
-              time_first_token_millsec: time_first_token_millsec_delta
-            }
+            usage: finalUsage,
+            metrics: finalMetrics
           }
         })
       }
@@ -454,9 +463,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
       userMessage.push(await this.getReponseMessageParam(message, model))
     }
 
-    let time_first_token_millsec = 0
-    const start_time_millsec = new Date().getTime()
-
     const lastUserMessage = _messages.findLast((m) => m.role === 'user')
     const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
     const { signal } = abortController
@@ -469,6 +475,18 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
       reqMessages = [systemMessage, ...userMessage].filter(Boolean) as OpenAI.Responses.EasyInputMessage[]
     }
 
+    const finalUsage: Usage = {
+      completion_tokens: 0,
+      prompt_tokens: 0,
+      total_tokens: 0
+    }
+
+    const finalMetrics: Metrics = {
+      completion_tokens: 0,
+      time_completion_millsec: 0,
+      time_first_token_millsec: 0
+    }
+
     const toolResponses: MCPToolResponse[] = []
 
     const processToolResults = async (toolResults: Awaited<ReturnType<typeof parseAndCallTools>>, idx: number) => {
@@ -548,6 +566,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
       idx: number
     ) => {
       const toolCalls: OpenAI.Responses.ResponseFunctionToolCall[] = []
+      let time_first_token_millsec = 0
+      const start_time_millsec = new Date().getTime()
 
       if (!streamOutput) {
         const nonStream = stream as OpenAI.Responses.Response
@@ -632,8 +652,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
 
       const outputItems: OpenAI.Responses.ResponseOutputItem[] = []
 
-      let lastUsage: Usage | undefined = undefined
-      let final_time_completion_millsec_delta = 0
       for await (const chunk of stream as Stream<OpenAI.Responses.ResponseStreamEvent>) {
         if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
           break
@@ -707,18 +725,18 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
             }
             break
           case 'response.completed': {
-            final_time_completion_millsec_delta = new Date().getTime() - start_time_millsec
             const completion_tokens =
               (chunk.response.usage?.output_tokens || 0) +
               (chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0)
             const total_tokens =
               (chunk.response.usage?.total_tokens || 0) +
               (chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0)
-            lastUsage = {
-              completion_tokens,
-              prompt_tokens: chunk.response.usage?.input_tokens || 0,
-              total_tokens
-            }
+            finalUsage.completion_tokens += completion_tokens
+            finalUsage.prompt_tokens += chunk.response.usage?.input_tokens || 0
+            finalUsage.total_tokens += total_tokens
+            finalMetrics.completion_tokens += completion_tokens
+            finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
+            finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec
             break
           }
           case 'error':
@@ -760,12 +778,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
       onChunk({
         type: ChunkType.BLOCK_COMPLETE,
         response: {
-          usage: lastUsage,
-          metrics: {
-            completion_tokens: lastUsage?.completion_tokens,
-            time_completion_millsec: final_time_completion_millsec_delta,
-            time_first_token_millsec: time_first_token_millsec - start_time_millsec
-          }
+          usage: finalUsage,
+          metrics: finalMetrics
         }
       })
     }
diff --git a/src/renderer/src/store/thunk/messageThunk.ts b/src/renderer/src/store/thunk/messageThunk.ts
index ef5b24ccaa..4e586a9a66 100644
--- a/src/renderer/src/store/thunk/messageThunk.ts
+++ b/src/renderer/src/store/thunk/messageThunk.ts
@@ -565,7 +565,7 @@ const fetchAndProcessAssistantResponseImpl = async (
           message: pauseErrorLanguagePlaceholder || error.message || 'Stream processing error',
           originalMessage: error.message,
           stack: error.stack,
-          status: error.status,
+          status: error.status || error.code,
           requestId: error.request_id
         }
         if (lastBlockId) {
@@ -609,13 +609,14 @@ const fetchAndProcessAssistantResponseImpl = async (
           // 更新topic的name
           autoRenameTopic(assistant, topicId)
 
-          if (response && !response.usage) {
+          if (response && response.usage?.total_tokens === 0) {
             const usage = await estimateMessagesUsage({ assistant, messages: finalContextWithAssistant })
             response.usage = usage
           }
+          console.log('response', response)
         }
         if (response && response.metrics) {
-          if (!response.metrics.completion_tokens && response.usage) {
+          if (response.metrics.completion_tokens === 0 && response.usage?.completion_tokens) {
             response = {
               ...response,
               metrics: {
diff --git a/src/renderer/src/types/index.ts b/src/renderer/src/types/index.ts
index e09e826b1b..0873fc0a99 100644
--- a/src/renderer/src/types/index.ts
+++ b/src/renderer/src/types/index.ts
@@ -121,8 +121,8 @@ export type Usage = OpenAI.Completions.CompletionUsage & {
 }
 
 export type Metrics = {
-  completion_tokens?: number
-  time_completion_millsec?: number
+  completion_tokens: number
+  time_completion_millsec: number
   time_first_token_millsec?: number
   time_thinking_millsec?: number
 }