fix: message and rerank errors

commit 1c90e23d76a3e1008408bf29add122ccab7dbe6d Merge: 4e792033 1fde0999 Author: kangfenmao <kangfenmao@qq.com> Date: Sun May 11 18:43:56 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit 4e7920336d838501b900a18f1c254f0d17fb54b8 Author: suyao <sy20010504@gmail.com> Date: Sun May 11 18:09:58 2025 +0800 refactor(GeminiProvider): implement image generation handling in chat responses commit cd1ce4c0c65bc2a111ddb3112722cb36e66b2515 Merge: 968de188 235122c8 Author: suyao <sy20010504@gmail.com> Date: Sun May 11 16:45:29 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit 968de18866abacc55fd9bd74c8d618871e64ade6 Author: suyao <sy20010504@gmail.com> Date: Sun May 11 16:41:38 2025 +0800 fix: add new image generation models to the configuration commit 1eaf5801b4c0e2c3fa1aa2ed829b20d97ea57d3f Merge: cb76588d e6655fff Author: suyao <sy20010504@gmail.com> Date: Sun May 11 13:14:17 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit cb76588dc692f35da1f3d4fdbc9217c6a1a36501 Author: suyao <sy20010504@gmail.com> Date: Sun May 11 13:13:20 2025 +0800 fix: enhance error handling and metrics tracking across AI providers and message processing commit c2d6bdabc00b48419773d08c7d6630803d6310c8 Author: suyao <sy20010504@gmail.com> Date: Sun May 11 05:36:52 2025 +0800 fix: update regex for function calling models and improve time tracking logic commit 95340b87d0bba3cdcd173a181953afa42b26da9b Author: suyao <sy20010504@gmail.com> Date: Sun May 11 05:05:05 2025 +0800 fix: adjust thinking millisecond handling in message thunk commit f4d4d3901603f14df616582fa537f3d9c1a694eb Author: suyao <sy20010504@gmail.com> Date: Sun May 11 03:52:26 2025 +0800 fix: remove 'auto' option from qwen model supported options commit e26f603dfe1a9146b0575142363d5ceab30e32df Author: suyao <sy20010504@gmail.com> Date: Sun May 11 03:50:05 2025 +0800 fix: add support for inline base64 image data in image block commit bb0093c656b2b72158db1bf7bfef6aae46b8096c Merge: f9d1339b d39584fc Author: suyao <sy20010504@gmail.com> Date: Sun May 11 03:00:57 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit f9d1339bd3367a4f80da23aac1fdc73b4cd2a075 Author: suyao <sy20010504@gmail.com> Date: Sun May 11 03:00:11 2025 +0800 fix: set default zoomFactor in settings commit 7cf6fd685662a012e2460e722edcbe5ed12f1a1c Merge: ba9c4482 3bebfe27 Author: suyao <sy20010504@gmail.com> Date: Sun May 11 01:31:05 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit ba9c44828578a3b5cc9fd1aaba80158615921785 Merge: 97dffe71 3bf0b6b3 Author: kangfenmao <kangfenmao@qq.com> Date: Sat May 10 20:18:44 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit 97dffe719ee00ace2325c80022a48c44f03d6e26 Author: lizhixuan <daoquqiexing@gmail.com> Date: Sat May 10 11:38:01 2025 +0800 feat(MessageMenubar): add edit option to dropdown for single message editing commit 70157439a1d9778cacc87b1781a84d159a8d3f33 Author: lizhixuan <daoquqiexing@gmail.com> Date: Sat May 10 10:53:10 2025 +0800 refactor(StreamProcessingService): comment out console.log for cleaner code commit fa33ba77a9306ad316f34da4149858192079f7a2 Author: lizhixuan <daoquqiexing@gmail.com> Date: Sat May 10 10:52:08 2025 +0800 refactor(messageThunk): remove console.log statements for cleaner code commit 6544c5d2990adf1943195e1d4d11383859a05488 Author: kangfenmao <kangfenmao@qq.com> Date: Sat May 10 10:17:44 2025 +0800 feat(i18n): add download success and failure messages in multiple languages commit e23bb6744a4a99b2062012691340f78fad4e1952 Merge: 55c5c553 60cc1dee Author: kangfenmao <kangfenmao@qq.com> Date: Sat May 10 09:54:38 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit 55c5c5533eab46a1de9c5511433ed1b1b9e90512 Author: suyao <sy20010504@gmail.com> Date: Fri May 9 22:19:35 2025 +0800 fix: update styled component props to use dollar sign prefix for consistency commit 7a5839e0efdf3eb648b3d26cdf7ce131a2821f6a Author: suyao <sy20010504@gmail.com> Date: Fri May 9 22:02:06 2025 +0800 fix: prevent default action in handleLinkClick for better link handling commit ecb075fddfc2bc5796a804ccde29aaa762d85da6 Merge: df149608 963f04f7 Author: suyao <sy20010504@gmail.com> Date: Fri May 9 21:55:54 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit df149608904039903d74d7a72a722b6c1a567ee3 Merge: 93bd4eb9 e29a9303 Author: suyao <sy20010504@gmail.com> Date: Fri May 9 21:48:13 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit 93bd4eb907816414af5c27efbaa2d3d014c707fc Merge: c4d1deb6 38ff9b90 Author: suyao <sy20010504@gmail.com> Date: Fri May 9 21:27:08 2025 +0800 Merge branch 'fix/next-release-bugs' of github.com:CherryHQ/cherry-studio into fix/next-release-bugs commit c4d1deb6911977a23cf731db6bed80b8352557ff Author: suyao <sy20010504@gmail.com> Date: Fri May 9 21:24:48 2025 +0800 feat: enhance citation handling and add metadata support in citation blocks commit 38ff9b90b8fee91ed1fba7b83c9470bc40bd3429 Author: MyPrototypeWhat <daoquqiexing@gmail.com> Date: Fri May 9 19:47:24 2025 +0800 fix: enhance logging and update async handling in StreamProcessingService and messageThunk - Enabled logging in `createStreamProcessor` for better debugging. - Added logging for updated messages in `updateExistingMessageAndBlocksInDB` and `saveUpdatesToDB`. - Updated `onTextComplete` and `onLLMWebSearchComplete` to handle asynchronous operations correctly. - Commented out unused `saveUpdatedBlockToDB` calls to prevent unnecessary database updates. commit cda0215c9c4e007c2c7240c3c9c8521fb7111774 Author: MyPrototypeWhat <daoquqiexing@gmail.com> Date: Fri May 9 18:47:55 2025 +0800 refactor: optimize block update logic and remove unused code - Updated `throttledBlockUpdate` to handle asynchronous updates directly. - Removed the unused `throttledBlockDbUpdate` function and its related logic. - Added cancellation for throttled updates on error and completion to improve performance and reliability. - Cleaned up commented-out code for better readability. commit de2f5b09c8384eabd4df7253047b838a2759671a Author: MyPrototypeWhat <daoquqiexing@gmail.com> Date: Fri May 9 18:42:00 2025 +0800 refactor: update message handling and state management - Simplified message editing logic by removing unnecessary success/error logging. - Added `updatedAt` timestamp to message updates for better tracking. - Refactored `editMessageBlocks` to accept message ID and updates directly. - Removed unused `getTopicLimit` function from `TopicManager`. - Updated message rendering to use `updatedAt` when available. - Enhanced type definitions to include `updatedAt` in message structure. commit 700fa13971cafb04314817d2d8732c8fbf33c9d7 Author: suyao <sy20010504@gmail.com> Date: Fri May 9 16:19:55 2025 +0800 Remove Zhipu mode and text-only link handling commit 06bd1338cd671b255e477cec76b12663ea759f4c Author: kangfenmao <kangfenmao@qq.com> Date: Fri May 9 15:49:02 2025 +0800 fix: update citation rendering logic in MainTextBlock component - Added a check to determine if the citation URL is a valid link. - Updated citation tag formatting to conditionally include the link based on the URL validity. commit e96c9a569f7708816f57505975d30667929eeb19 Author: kangfenmao <kangfenmao@qq.com> Date: Thu May 8 18:31:14 2025 +0800 style: update ChatNavigation and CitationsList components for improved UI consistency - Added header style to remove borders in ChatNavigation. - Enhanced CitationsList with new Skeleton loading state and improved layout for citation cards. - Refactored CitationLink to a div for better styling control and adjusted padding in OpenButton for a more polished appearance.
2025-12-24 18:50:56 +08:00 · 2025-05-11 18:44:28 +08:00 · 2025-05-11 18:44:28 +08:00 · a64c8ded73
commit a64c8ded73
parent 1fde0999f8
15 changed files with 479 additions and 379 deletions
--- a/src/renderer/src/config/models.ts
+++ b/src/renderer/src/config/models.ts
@ -189,7 +189,7 @@ export const TEXT_TO_IMAGE_REGEX = /flux|diffusion|stabilityai|sd-|dall|cogview|

 // Reasoning models
 export const REASONING_REGEX =
-  /^(o\d+(?:-[\w-]+)?|.*\b(?:reasoner|thinking)\b.*|.*-[rR]\d+.*|.*\bqwq(?:-[\w-]+)?\b.*|.*\bhunyuan-t1(?:-[\w-]+)?\b.*|.*\bglm-zero-preview\b.*|.*\bgrok-3-mini(?:-[\w-]+)?\b.*)$/i
+  /^(o\d+(?:-[\w-]+)?|.*\b(?:reasoning|reasoner|thinking)\b.*|.*-[rR]\d+.*|.*\bqwq(?:-[\w-]+)?\b.*|.*\bhunyuan-t1(?:-[\w-]+)?\b.*|.*\bglm-zero-preview\b.*|.*\bgrok-3-mini(?:-[\w-]+)?\b.*)$/i

 // Embedding models
 export const EMBEDDING_REGEX =
@ -206,7 +206,7 @@ export const FUNCTION_CALLING_MODELS = [
  'gpt-4o-mini',
  'gpt-4',
  'gpt-4.5',
-  'o1(?:-[\\w-]+)?',
+  'o(1|3|4)(?:-[\\w-]+)?',
  'claude',
  'qwen',
  'qwen3',
@ -2153,11 +2153,11 @@ export const TEXT_TO_IMAGES_MODELS_SUPPORT_IMAGE_ENHANCEMENT = [

 export const GENERATE_IMAGE_MODELS = [
  'gemini-2.0-flash-exp-image-generation',
+  'gemini-2.0-flash-preview-image-generation',
  'gemini-2.0-flash-exp',
  'grok-2-image-1212',
  'grok-2-image',
  'grok-2-image-latest',
-  'gpt-4o-image',
  'gpt-image-1'
 ]

@ -2172,6 +2172,7 @@ export const GEMINI_SEARCH_MODELS = [
  'gemini-2.5-pro-exp-03-25',
  'gemini-2.5-pro-preview',
  'gemini-2.5-pro-preview-03-25',
+  'gemini-2.5-pro-preview-05-06',
  'gemini-2.5-flash-preview',
  'gemini-2.5-flash-preview-04-17'
 ]
--- a/src/renderer/src/pages/home/Inputbar/ThinkingButton.tsx
+++ b/src/renderer/src/pages/home/Inputbar/ThinkingButton.tsx
@ -35,7 +35,7 @@ const MODEL_SUPPORTED_OPTIONS: Record<string, ThinkingOption[]> = {
  default: ['off', 'low', 'medium', 'high'],
  grok: ['off', 'low', 'high'],
  gemini: ['off', 'low', 'medium', 'high', 'auto'],
-  qwen: ['off', 'low', 'medium', 'high', 'auto']
+  qwen: ['off', 'low', 'medium', 'high']
 }

 // 选项转换映射表：当选项不支持时使用的替代选项
--- a/src/renderer/src/pages/home/Messages/Blocks/ErrorBlock.tsx
+++ b/src/renderer/src/pages/home/Messages/Blocks/ErrorBlock.tsx
@ -16,7 +16,7 @@ const MessageErrorInfo: React.FC<{ block: ErrorMessageBlock }> = ({ block }) =>

  const HTTP_ERROR_CODES = [400, 401, 403, 404, 429, 500, 502, 503, 504]
  if (block.error && HTTP_ERROR_CODES.includes(block.error?.status)) {
-    return <Alert description={t(`error.http.${block.error.status}`)} type="error" />
+    return <Alert description={t(`error.http.${block.error.status}`)} message={block.error?.message} type="error" />
  }
  if (block?.error?.message) {
    const errorKey = `error.${block.error.message}`
--- a/src/renderer/src/pages/home/Messages/Blocks/ThinkingBlock.tsx
+++ b/src/renderer/src/pages/home/Messages/Blocks/ThinkingBlock.tsx
@ -4,7 +4,7 @@ import { MessageBlockStatus, type ThinkingMessageBlock } from '@renderer/types/n
 import { Collapse, message as antdMessage, Tooltip } from 'antd'
 import { Lightbulb } from 'lucide-react'
 import { motion } from 'motion/react'
-import { memo, useCallback, useEffect, useMemo, useState } from 'react'
+import { memo, useCallback, useEffect, useMemo, useRef, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import styled from 'styled-components'

@ -40,6 +40,8 @@ const ThinkingBlock: React.FC<Props> = ({ block }) => {
  const { t } = useTranslation()
  const { messageFont, fontSize, thoughtAutoCollapse } = useSettings()
  const [activeKey, setActiveKey] = useState<'thought' | ''>(thoughtAutoCollapse ? '' : 'thought')
+  const [thinkingTime, setThinkingTime] = useState(block.thinking_millsec || 0)
+  const intervalId = useRef<NodeJS.Timeout>(null)

  const isThinking = useMemo(() => block.status === MessageBlockStatus.STREAMING, [block.status])

@ -73,13 +75,31 @@ const ThinkingBlock: React.FC<Props> = ({ block }) => {
    }
  }, [block.content, t])

+  // FIXME: 这里统计的和请求处统计的有一定误差
+  useEffect(() => {
+    if (isThinking) {
+      intervalId.current = setInterval(() => {
+        setThinkingTime((prev) => prev + 100)
+      }, 100)
+    } else if (intervalId.current) {
+      // 立即清除计时器
+      clearInterval(intervalId.current)
+      intervalId.current = null
+    }
+
+    return () => {
+      if (intervalId.current) {
+        window.clearInterval(intervalId.current)
+      }
+    }
+  }, [isThinking])
+
+  const thinkingTimeSeconds = useMemo(() => (thinkingTime / 1000).toFixed(1), [thinkingTime])
+
  if (!block.content) {
    return null
  }

-  const thinkingTime = block.thinking_millsec || 0
-  const thinkingTimeSeconds = (thinkingTime / 1000).toFixed(1)
-
  return (
    <CollapseContainer
      activeKey={activeKey}
--- a/src/renderer/src/pages/home/Messages/MessageContent.tsx
+++ b/src/renderer/src/pages/home/Messages/MessageContent.tsx
@ -10,35 +10,6 @@ interface Props {
 }

 const MessageContent: React.FC<Props> = ({ message }) => {
-  // const { t } = useTranslation()
-  // if (message.status === 'pending') {
-  //   return (
-
-  //   )
-  // }
-
-  // if (message.status === 'searching') {
-  //   return (
-  //     <SearchingContainer>
-  //       <Search size={24} />
-  //       <SearchingText>{t('message.searching')}</SearchingText>
-  //       <BarLoader color="#1677ff" />
-  //     </SearchingContainer>
-  //   )
-  // }
-
-  // if (message.status === 'error') {
-  //   return <MessageError message={message} />
-  // }
-
-  // if (message.type === '@' && model) {
-  //   const content = `[@${model.name}](#)  ${getBriefInfo(message.content)}`
-  //   return <Markdown message={{ ...message, content }} />
-  // }
-  // const toolUseRegex = /<tool_use>([\s\S]*?)<\/tool_use>/g
-
-  // console.log('message', message)
-
  return (
    <>
      <Flex gap="8px" wrap style={{ marginBottom: 10 }}>
--- a/src/renderer/src/pages/home/Messages/MessageError.tsx
+++ b/src/renderer/src/pages/home/Messages/MessageError.tsx
@ -1,45 +0,0 @@
-import type { ErrorMessageBlock } from '@renderer/types/newMessage'
-import { Alert as AntdAlert } from 'antd'
-import { FC } from 'react'
-import { useTranslation } from 'react-i18next'
-import styled from 'styled-components'
-
-const MessageError: FC<{ block: ErrorMessageBlock }> = ({ block }) => {
-  return (
-    <>
-      {/* <Markdown block={block} role={role} />
-      {block.error && (
-        <Markdown
-          message={{
-            ...block,
-            content: formatErrorMessage(block.error)
-          }}
-        />
-      )} */}
-      <MessageErrorInfo block={block} />
-    </>
-  )
-}
-
-const MessageErrorInfo: FC<{ block: ErrorMessageBlock }> = ({ block }) => {
-  const { t } = useTranslation()
-
-  const HTTP_ERROR_CODES = [400, 401, 403, 404, 429, 500, 502, 503, 504]
-  console.log('block', block)
-  if (block.error && HTTP_ERROR_CODES.includes(block.error?.status)) {
-    return <Alert description={t(`error.http.${block.error.status}`)} type="error" />
-  }
-  if (block?.error?.message) {
-    return <Alert description={block.error.message} type="error" />
-  }
-
-  return <Alert description={t('error.chat.response')} type="error" />
-}
-
-const Alert = styled(AntdAlert)`
-  margin: 15px 0 8px;
-  padding: 10px;
-  font-size: 12px;
-`
-
-export default MessageError
--- a/src/renderer/src/providers/AiProvider/AnthropicProvider.ts
+++ b/src/renderer/src/providers/AiProvider/AnthropicProvider.ts
@ -30,10 +30,12 @@ import {
  MCPCallToolResponse,
  MCPTool,
  MCPToolResponse,
+  Metrics,
  Model,
  Provider,
  Suggestion,
  ToolCallResponse,
+  Usage,
  WebSearchSource
 } from '@renderer/types'
 import { ChunkType } from '@renderer/types/chunk'
@ -47,7 +49,7 @@ import {
 } from '@renderer/utils/mcp-tools'
 import { findFileBlocks, findImageBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find'
 import { buildSystemPrompt } from '@renderer/utils/prompt'
-import { first, flatten, sum, takeRight } from 'lodash'
+import { first, flatten, takeRight } from 'lodash'
 import OpenAI from 'openai'

 import { CompletionsParams } from '.'
@ -270,77 +272,82 @@ export default class AnthropicProvider extends BaseProvider {
      ...this.getCustomParameters(assistant)
    }

-    let time_first_token_millsec = 0
-    let time_first_content_millsec = 0
-    let checkThinkingContent = false
-    let thinking_content = ''
-    const start_time_millsec = new Date().getTime()
-
-    if (!streamOutput) {
-      const message = await this.sdk.messages.create({ ...body, stream: false })
-      const time_completion_millsec = new Date().getTime() - start_time_millsec
-
-      let text = ''
-      let reasoning_content = ''
-
-      if (message.content && message.content.length > 0) {
-        const thinkingBlock = message.content.find((block) => block.type === 'thinking')
-        const textBlock = message.content.find((block) => block.type === 'text')
-
-        if (thinkingBlock && 'thinking' in thinkingBlock) {
-          reasoning_content = thinkingBlock.thinking
-        }
-
-        if (textBlock && 'text' in textBlock) {
-          text = textBlock.text
-        }
-      }
-
-      return onChunk({
-        type: ChunkType.BLOCK_COMPLETE,
-        response: {
-          text,
-          reasoning_content,
-          usage: message.usage as any,
-          metrics: {
-            completion_tokens: message.usage.output_tokens,
-            time_completion_millsec,
-            time_first_token_millsec: 0
-          }
-        }
-      })
-    }
-
    const { abortController, cleanup } = this.createAbortController(lastUserMessage?.id)
    const { signal } = abortController
+
+    const finalUsage: Usage = {
+      completion_tokens: 0,
+      prompt_tokens: 0,
+      total_tokens: 0
+    }
+
+    const finalMetrics: Metrics = {
+      completion_tokens: 0,
+      time_completion_millsec: 0,
+      time_first_token_millsec: 0
+    }
    const toolResponses: MCPToolResponse[] = []

-    const processStream = (body: MessageCreateParamsNonStreaming, idx: number) => {
+    const processStream = async (body: MessageCreateParamsNonStreaming, idx: number) => {
+      let time_first_token_millsec = 0
+      const start_time_millsec = new Date().getTime()
+
+      if (!streamOutput) {
+        const message = await this.sdk.messages.create({ ...body, stream: false })
+        const time_completion_millsec = new Date().getTime() - start_time_millsec
+
+        let text = ''
+        let reasoning_content = ''
+
+        if (message.content && message.content.length > 0) {
+          const thinkingBlock = message.content.find((block) => block.type === 'thinking')
+          const textBlock = message.content.find((block) => block.type === 'text')
+
+          if (thinkingBlock && 'thinking' in thinkingBlock) {
+            reasoning_content = thinkingBlock.thinking
+          }
+
+          if (textBlock && 'text' in textBlock) {
+            text = textBlock.text
+          }
+        }
+
+        return onChunk({
+          type: ChunkType.BLOCK_COMPLETE,
+          response: {
+            text,
+            reasoning_content,
+            usage: message.usage as any,
+            metrics: {
+              completion_tokens: message.usage.output_tokens,
+              time_completion_millsec,
+              time_first_token_millsec: 0
+            }
+          }
+        })
+      }
+
+      let thinking_content = ''
+      let isFirstChunk = true
+
      return new Promise<void>((resolve, reject) => {
        // 等待接口返回流
        const toolCalls: ToolUseBlock[] = []
-        let hasThinkingContent = false
+
        this.sdk.messages
          .stream({ ...body, stream: true }, { signal, timeout: 5 * 60 * 1000 })
          .on('text', (text) => {
-            if (hasThinkingContent && !checkThinkingContent) {
-              checkThinkingContent = true
-              onChunk({
-                type: ChunkType.THINKING_COMPLETE,
-                text: thinking_content,
-                thinking_millsec: new Date().getTime() - time_first_content_millsec
-              })
-            }
-            if (time_first_token_millsec == 0) {
-              time_first_token_millsec = new Date().getTime()
-            }
-
-            thinking_content = ''
-            checkThinkingContent = false
-            hasThinkingContent = false
-
-            if (!hasThinkingContent && time_first_content_millsec === 0) {
-              time_first_content_millsec = new Date().getTime()
+            if (isFirstChunk) {
+              isFirstChunk = false
+              if (time_first_token_millsec == 0) {
+                time_first_token_millsec = new Date().getTime()
+              } else {
+                onChunk({
+                  type: ChunkType.THINKING_COMPLETE,
+                  text: thinking_content,
+                  thinking_millsec: new Date().getTime() - time_first_token_millsec
+                })
+              }
            }

            onChunk({ type: ChunkType.TEXT_DELTA, text })
@ -372,34 +379,22 @@ export default class AnthropicProvider extends BaseProvider {
                })
              }
            }
+            if (block.type === 'tool_use') {
+              toolCalls.push(block)
+            }
          })
          .on('thinking', (thinking) => {
-            hasThinkingContent = true
-            const currentTime = new Date().getTime() // Get current time for each chunk
-
            if (time_first_token_millsec == 0) {
-              time_first_token_millsec = currentTime
+              time_first_token_millsec = new Date().getTime()
            }

-            // Set time_first_content_millsec ONLY when the first content (thinking or text) arrives
-            if (time_first_content_millsec === 0) {
-              time_first_content_millsec = currentTime
-            }
-
-            // Calculate thinking time as time elapsed since start until this chunk
-            const thinking_time = currentTime - time_first_content_millsec
            onChunk({
              type: ChunkType.THINKING_DELTA,
              text: thinking,
-              thinking_millsec: thinking_time
+              thinking_millsec: new Date().getTime() - time_first_token_millsec
            })
            thinking_content += thinking
          })
-          .on('contentBlock', (content) => {
-            if (content.type === 'tool_use') {
-              toolCalls.push(content)
-            }
-          })
          .on('finalMessage', async (message) => {
            const toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
            // tool call
@ -458,29 +453,28 @@ export default class AnthropicProvider extends BaseProvider {
              newBody.messages = userMessages

              onChunk({ type: ChunkType.LLM_RESPONSE_CREATED })
-              await processStream(newBody, idx + 1)
+              try {
+                await processStream(newBody, idx + 1)
+              } catch (error) {
+                console.error('Error processing stream:', error)
+                reject(error)
+              }
            }

-            const time_completion_millsec = new Date().getTime() - start_time_millsec
+            finalUsage.prompt_tokens += message.usage.input_tokens
+            finalUsage.completion_tokens += message.usage.output_tokens
+            finalUsage.total_tokens += finalUsage.prompt_tokens + finalUsage.completion_tokens
+            finalMetrics.completion_tokens = finalUsage.completion_tokens
+            finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
+            finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec

            onChunk({
              type: ChunkType.BLOCK_COMPLETE,
              response: {
-                usage: {
-                  prompt_tokens: message.usage.input_tokens,
-                  completion_tokens: message.usage.output_tokens,
-                  total_tokens: sum(Object.values(message.usage))
-                },
-                metrics: {
-                  completion_tokens: message.usage.output_tokens,
-                  time_completion_millsec,
-                  time_first_token_millsec: time_first_token_millsec - start_time_millsec
-                }
+                usage: finalUsage,
+                metrics: finalMetrics
              }
            })
-            // FIXME: 临时方案，重置时间戳和思考内容
-            time_first_token_millsec = 0
-            time_first_content_millsec = 0
            resolve()
          })
          .on('error', (error) => reject(error))
--- a/src/renderer/src/providers/AiProvider/GeminiProvider.ts
+++ b/src/renderer/src/providers/AiProvider/GeminiProvider.ts
@ -40,6 +40,7 @@ import {
  MCPCallToolResponse,
  MCPTool,
  MCPToolResponse,
+  Metrics,
  Model,
  Provider,
  Suggestion,
@ -126,7 +127,6 @@ export default class GeminiProvider extends BaseProvider {
   * @returns The message contents
   */
  private async getMessageContents(message: Message): Promise<Content> {
-    console.log('getMessageContents', message)
    const role = message.role === 'user' ? 'user' : 'model'
    const parts: Part[] = [{ text: await this.getMessageContent(message) }]
    // Add any generated images from previous responses
@ -153,6 +153,16 @@ export default class GeminiProvider extends BaseProvider {
          }
        }
      }
+      const file = imageBlock.file
+      if (file) {
+        const base64Data = await window.api.file.base64Image(file.id + file.ext)
+        parts.push({
+          inlineData: {
+            data: base64Data.base64,
+            mimeType: base64Data.mime
+          } as Part['inlineData']
+        })
+      }
    }

    const fileBlocks = findFileBlocks(message)
@ -186,6 +196,50 @@ export default class GeminiProvider extends BaseProvider {
    }
  }

+  private async getImageFileContents(message: Message): Promise<Content> {
+    const role = message.role === 'user' ? 'user' : 'model'
+    const content = getMainTextContent(message)
+    const parts: Part[] = [{ text: content }]
+    const imageBlocks = findImageBlocks(message)
+    for (const imageBlock of imageBlocks) {
+      if (
+        imageBlock.metadata?.generateImageResponse?.images &&
+        imageBlock.metadata.generateImageResponse.images.length > 0
+      ) {
+        for (const imageUrl of imageBlock.metadata.generateImageResponse.images) {
+          if (imageUrl && imageUrl.startsWith('data:')) {
+            // Extract base64 data and mime type from the data URL
+            const matches = imageUrl.match(/^data:(.+);base64,(.*)$/)
+            if (matches && matches.length === 3) {
+              const mimeType = matches[1]
+              const base64Data = matches[2]
+              parts.push({
+                inlineData: {
+                  data: base64Data,
+                  mimeType: mimeType
+                } as Part['inlineData']
+              })
+            }
+          }
+        }
+      }
+      const file = imageBlock.file
+      if (file) {
+        const base64Data = await window.api.file.base64Image(file.id + file.ext)
+        parts.push({
+          inlineData: {
+            data: base64Data.base64,
+            mimeType: base64Data.mime
+          } as Part['inlineData']
+        })
+      }
+    }
+    return {
+      role,
+      parts: parts
+    }
+  }
+
  /**
   * Get the safety settings
   * @returns The safety settings
@ -273,6 +327,18 @@ export default class GeminiProvider extends BaseProvider {
  }: CompletionsParams): Promise<void> {
    const defaultModel = getDefaultModel()
    const model = assistant.model || defaultModel
+    let canGenerateImage = false
+    if (isGenerateImageModel(model)) {
+      if (model.id === 'gemini-2.0-flash-exp') {
+        canGenerateImage = assistant.enableGenerateImage!
+      } else {
+        canGenerateImage = true
+      }
+    }
+    if (canGenerateImage) {
+      await this.generateImageByChat({ messages, assistant, onChunk })
+      return
+    }
    const { contextCount, maxTokens, streamOutput, enableToolUse } = getAssistantSettings(assistant)

    const userMessages = filterUserRoleStartMessages(
@ -309,21 +375,10 @@ export default class GeminiProvider extends BaseProvider {
      })
    }

-    let canGenerateImage = false
-    if (isGenerateImageModel(model)) {
-      if (model.id === 'gemini-2.0-flash-exp') {
-        canGenerateImage = assistant.enableGenerateImage!
-      } else {
-        canGenerateImage = true
-      }
-    }
-
    const generateContentConfig: GenerateContentConfig = {
-      responseModalities: canGenerateImage ? [Modality.TEXT, Modality.IMAGE] : undefined,
-      responseMimeType: canGenerateImage ? 'text/plain' : undefined,
      safetySettings: this.getSafetySettings(),
      // generate image don't need system instruction
-      systemInstruction: isGemmaModel(model) || canGenerateImage ? undefined : systemInstruction,
+      systemInstruction: isGemmaModel(model) ? undefined : systemInstruction,
      temperature: assistant?.settings?.temperature,
      topP: assistant?.settings?.topP,
      maxOutputTokens: maxTokens,
@ -360,8 +415,17 @@ export default class GeminiProvider extends BaseProvider {
      }
    }

-    const start_time_millsec = new Date().getTime()
-    let time_first_token_millsec = 0
+    const finalUsage: Usage = {
+      completion_tokens: 0,
+      prompt_tokens: 0,
+      total_tokens: 0
+    }
+
+    const finalMetrics: Metrics = {
+      completion_tokens: 0,
+      time_completion_millsec: 0,
+      time_first_token_millsec: 0
+    }

    const { cleanup, abortController } = this.createAbortController(userLastMessage?.id, true)

@ -435,6 +499,8 @@ export default class GeminiProvider extends BaseProvider {
      history.push(messageContents)

      let functionCalls: FunctionCall[] = []
+      let time_first_token_millsec = 0
+      const start_time_millsec = new Date().getTime()

      if (stream instanceof GenerateContentResponse) {
        let content = ''
@ -494,45 +560,28 @@ export default class GeminiProvider extends BaseProvider {
        } as BlockCompleteChunk)
      } else {
        let content = ''
-        let final_time_completion_millsec = 0
-        let lastUsage: Usage | undefined = undefined
        for await (const chunk of stream) {
          if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) break

-          // --- Calculate Metrics ---
-          if (time_first_token_millsec == 0 && chunk.text !== undefined) {
-            // Update based on text arrival
-            time_first_token_millsec = new Date().getTime() - start_time_millsec
+          if (time_first_token_millsec == 0) {
+            time_first_token_millsec = new Date().getTime()
          }

-          // 1. Text Content
          if (chunk.text !== undefined) {
            content += chunk.text
            onChunk({ type: ChunkType.TEXT_DELTA, text: chunk.text })
          }

-          // 2. Usage Data
-          if (chunk.usageMetadata) {
-            lastUsage = {
-              prompt_tokens: chunk.usageMetadata.promptTokenCount || 0,
-              completion_tokens: chunk.usageMetadata.candidatesTokenCount || 0,
-              total_tokens: chunk.usageMetadata.totalTokenCount || 0
-            }
-            final_time_completion_millsec = new Date().getTime() - start_time_millsec
-          }
-
-          // 4. Image Generation
-          const generateImage = this.processGeminiImageResponse(chunk, onChunk)
-          if (generateImage?.images?.length) {
-            onChunk({ type: ChunkType.IMAGE_COMPLETE, image: generateImage })
-          }
-
          if (chunk.candidates?.[0]?.finishReason) {
            if (chunk.text) {
              onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
            }
+            if (chunk.usageMetadata) {
+              finalUsage.prompt_tokens += chunk.usageMetadata.promptTokenCount || 0
+              finalUsage.completion_tokens += chunk.usageMetadata.candidatesTokenCount || 0
+              finalUsage.total_tokens += chunk.usageMetadata.totalTokenCount || 0
+            }
            if (chunk.candidates?.[0]?.groundingMetadata) {
-              // 3. Grounding/Search Metadata
              const groundingMetadata = chunk.candidates?.[0]?.groundingMetadata
              onChunk({
                type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
@ -551,35 +600,37 @@ export default class GeminiProvider extends BaseProvider {
              functionCalls = functionCalls.concat(chunk.functionCalls)
            }

-            onChunk({
-              type: ChunkType.BLOCK_COMPLETE,
-              response: {
-                metrics: {
-                  completion_tokens: lastUsage?.completion_tokens,
-                  time_completion_millsec: final_time_completion_millsec,
-                  time_first_token_millsec
-                },
-                usage: lastUsage
-              }
-            })
-          }
-
-          // --- End Incremental onChunk calls ---
-
-          // Call processToolUses AFTER potentially processing text content in this chunk
-          // This assumes tools might be specified within the text stream
-          // Note: parseAndCallTools inside should handle its own onChunk for tool responses
-          let toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
-          if (functionCalls.length) {
-            toolResults = await processToolCalls(functionCalls)
-          }
-          if (content.length) {
-            toolResults = toolResults.concat(await processToolUses(content))
-          }
-          if (toolResults.length) {
-            await processToolResults(toolResults, idx)
+            finalMetrics.completion_tokens = finalUsage.completion_tokens
+            finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
+            finalMetrics.time_first_token_millsec =
+              (finalMetrics.time_first_token_millsec || 0) + (time_first_token_millsec - start_time_millsec)
          }
        }
+
+        // --- End Incremental onChunk calls ---
+
+        // Call processToolUses AFTER potentially processing text content in this chunk
+        // This assumes tools might be specified within the text stream
+        // Note: parseAndCallTools inside should handle its own onChunk for tool responses
+        let toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
+        if (functionCalls.length) {
+          toolResults = await processToolCalls(functionCalls)
+        }
+        if (content.length) {
+          toolResults = toolResults.concat(await processToolUses(content))
+        }
+        if (toolResults.length) {
+          await processToolResults(toolResults, idx)
+        }
+
+        // FIXME: 由于递归，会发送n次
+        onChunk({
+          type: ChunkType.BLOCK_COMPLETE,
+          response: {
+            usage: finalUsage,
+            metrics: finalMetrics
+          }
+        })
      }
    }

@ -605,17 +656,6 @@ export default class GeminiProvider extends BaseProvider {
    })

    await processStream(userMessagesStream, 0).finally(cleanup)
-
-    const final_time_completion_millsec = new Date().getTime() - start_time_millsec
-    onChunk({
-      type: ChunkType.BLOCK_COMPLETE,
-      response: {
-        metrics: {
-          time_completion_millsec: final_time_completion_millsec,
-          time_first_token_millsec
-        }
-      }
-    })
  }

  /**
@ -949,8 +989,97 @@ export default class GeminiProvider extends BaseProvider {
    return data.embeddings?.[0]?.values?.length || 0
  }

-  public generateImageByChat(): Promise<void> {
-    throw new Error('Method not implemented.')
+  public async generateImageByChat({ messages, assistant, onChunk }): Promise<void> {
+    const defaultModel = getDefaultModel()
+    const model = assistant.model || defaultModel
+    const { contextCount, maxTokens } = getAssistantSettings(assistant)
+    const userMessages = filterUserRoleStartMessages(
+      filterEmptyMessages(filterContextMessages(takeRight(messages, contextCount + 2)))
+    )
+
+    const userLastMessage = userMessages.pop()
+    const { abortController } = this.createAbortController(userLastMessage?.id, true)
+    const { signal } = abortController
+    const generateContentConfig: GenerateContentConfig = {
+      responseModalities: [Modality.TEXT, Modality.IMAGE],
+      responseMimeType: 'text/plain',
+      safetySettings: this.getSafetySettings(),
+      temperature: assistant?.settings?.temperature,
+      topP: assistant?.settings?.top_p,
+      maxOutputTokens: maxTokens,
+      abortSignal: signal,
+      ...this.getCustomParameters(assistant)
+    }
+    const history: Content[] = []
+    try {
+      for (const message of userMessages) {
+        history.push(await this.getImageFileContents(message))
+      }
+
+      let time_first_token_millsec = 0
+      const start_time_millsec = new Date().getTime()
+      onChunk({ type: ChunkType.LLM_RESPONSE_CREATED })
+      const chat = this.sdk.chats.create({
+        model: model.id,
+        config: generateContentConfig,
+        history: history
+      })
+      let content = ''
+      const finalUsage: Usage = {
+        prompt_tokens: 0,
+        completion_tokens: 0,
+        total_tokens: 0
+      }
+      const userMessage: Content = await this.getImageFileContents(userLastMessage!)
+      const response = await chat.sendMessageStream({
+        message: userMessage.parts!,
+        config: {
+          ...generateContentConfig,
+          abortSignal: signal
+        }
+      })
+      for await (const chunk of response as AsyncGenerator<GenerateContentResponse>) {
+        if (time_first_token_millsec == 0) {
+          time_first_token_millsec = new Date().getTime()
+        }
+
+        if (chunk.text !== undefined) {
+          content += chunk.text
+          onChunk({ type: ChunkType.TEXT_DELTA, text: chunk.text })
+        }
+        const generateImage = this.processGeminiImageResponse(chunk, onChunk)
+        if (generateImage?.images?.length) {
+          onChunk({ type: ChunkType.IMAGE_COMPLETE, image: generateImage })
+        }
+        if (chunk.candidates?.[0]?.finishReason) {
+          if (chunk.text) {
+            onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
+          }
+          if (chunk.usageMetadata) {
+            finalUsage.prompt_tokens = chunk.usageMetadata.promptTokenCount || 0
+            finalUsage.completion_tokens = chunk.usageMetadata.candidatesTokenCount || 0
+            finalUsage.total_tokens = chunk.usageMetadata.totalTokenCount || 0
+          }
+        }
+      }
+      onChunk({
+        type: ChunkType.BLOCK_COMPLETE,
+        response: {
+          usage: finalUsage,
+          metrics: {
+            completion_tokens: finalUsage.completion_tokens,
+            time_completion_millsec: new Date().getTime() - start_time_millsec,
+            time_first_token_millsec: time_first_token_millsec - start_time_millsec
+          }
+        }
+      })
+    } catch (error) {
+      console.error('[generateImageByChat] error', error)
+      onChunk({
+        type: ChunkType.ERROR,
+        error
+      })
+    }
  }

  public convertMcpTools<T>(mcpTools: MCPTool[]): T[] {
--- a/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts
+++ b/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts
@ -34,6 +34,7 @@ import {
  MCPCallToolResponse,
  MCPTool,
  MCPToolResponse,
+  Metrics,
  Model,
  Provider,
  Suggestion,
@ -395,7 +396,6 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
      return streamOutput
    }

-    const start_time_millsec = new Date().getTime()
    const lastUserMessage = _messages.findLast((m) => m.role === 'user')
    const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
    const { signal } = abortController
@ -423,6 +423,18 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
      reqMessages = [systemMessage, ...userMessages].filter(Boolean) as ChatCompletionMessageParam[]
    }

+    let finalUsage: Usage = {
+      completion_tokens: 0,
+      prompt_tokens: 0,
+      total_tokens: 0
+    }
+
+    const finalMetrics: Metrics = {
+      completion_tokens: 0,
+      time_completion_millsec: 0,
+      time_first_token_millsec: 0
+    }
+
    const toolResponses: MCPToolResponse[] = []

    const processToolResults = async (toolResults: Awaited<ReturnType<typeof parseAndCallTools>>, idx: number) => {
@ -505,18 +517,17 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {

    const processStream = async (stream: any, idx: number) => {
      const toolCalls: ChatCompletionMessageToolCall[] = []
+      let time_first_token_millsec = 0
+      const start_time_millsec = new Date().getTime()
+
      // Handle non-streaming case (already returns early, no change needed here)
      if (!isSupportStreamOutput()) {
-        const time_completion_millsec = new Date().getTime() - start_time_millsec
        // Calculate final metrics once
-        const finalMetrics = {
-          completion_tokens: stream.usage?.completion_tokens,
-          time_completion_millsec,
-          time_first_token_millsec: 0 // Non-streaming, first token time is not relevant
-        }
+        finalMetrics.completion_tokens = stream.usage?.completion_tokens
+        finalMetrics.time_completion_millsec = new Date().getTime() - start_time_millsec

        // Create a synthetic usage object if stream.usage is undefined
-        const finalUsage = stream.usage
+        finalUsage = { ...stream.usage }
        // Separate onChunk calls for text and usage/metrics
        let content = ''
        stream.choices.forEach((choice) => {
@ -526,7 +537,7 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
            onChunk({
              type: ChunkType.THINKING_COMPLETE,
              text: choice.message.reasoning,
-              thinking_millsec: time_completion_millsec
+              thinking_millsec: new Date().getTime() - start_time_millsec
            })
          }
          // text
@ -576,20 +587,9 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
        return
      }

-      let content = '' // Accumulate content for tool processing if needed
+      let content = ''
      let thinkingContent = ''
-      // 记录最终的完成时间差
-      let final_time_completion_millsec_delta = 0
-      let final_time_thinking_millsec_delta = 0
-      // Variable to store the last received usage object
-      let lastUsage: Usage | undefined = undefined
-      // let isThinkingInContent: ThoughtProcessor | undefined = undefined
-      // const processThinkingChunk = this.handleThinkingTags()
      let isFirstChunk = true
-      let time_first_token_millsec = 0
-      let time_first_token_millsec_delta = 0
-      let time_first_content_millsec = 0
-      let time_thinking_start = 0

      // 1. 初始化中间件
      const reasoningTags = [
@ -640,25 +640,24 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {

      // 3. 消费 processedStream，分发 onChunk
      for await (const chunk of readableStreamAsyncIterable(processedStream)) {
-        const currentTime = new Date().getTime()
        const delta = chunk.type === 'finish' ? chunk.delta : chunk
        const rawChunk = chunk.type === 'finish' ? chunk.chunk : chunk

        switch (chunk.type) {
          case 'reasoning': {
-            if (time_thinking_start === 0) {
-              time_thinking_start = currentTime
-              time_first_token_millsec = currentTime
-              time_first_token_millsec_delta = currentTime - start_time_millsec
+            if (time_first_token_millsec === 0) {
+              time_first_token_millsec = new Date().getTime()
            }
            thinkingContent += chunk.textDelta
-            const thinking_time = currentTime - time_thinking_start
-            onChunk({ type: ChunkType.THINKING_DELTA, text: chunk.textDelta, thinking_millsec: thinking_time })
+            onChunk({
+              type: ChunkType.THINKING_DELTA,
+              text: chunk.textDelta,
+              thinking_millsec: new Date().getTime() - time_first_token_millsec
+            })
            break
          }
          case 'text-delta': {
            let textDelta = chunk.textDelta
-
            if (assistant.enableWebSearch && delta) {
              const originalDelta = rawChunk?.choices?.[0]?.delta

@ -676,25 +675,32 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
            if (isFirstChunk) {
              isFirstChunk = false
              if (time_first_token_millsec === 0) {
-                time_first_token_millsec = currentTime
-                time_first_token_millsec_delta = currentTime - start_time_millsec
+                time_first_token_millsec = new Date().getTime()
+              } else {
+                onChunk({
+                  type: ChunkType.THINKING_COMPLETE,
+                  text: thinkingContent,
+                  thinking_millsec: new Date().getTime() - time_first_token_millsec
+                })
              }
            }
            content += textDelta
-            if (time_thinking_start > 0 && time_first_content_millsec === 0) {
-              time_first_content_millsec = currentTime
-              final_time_thinking_millsec_delta = time_first_content_millsec - time_thinking_start
-
-              onChunk({
-                type: ChunkType.THINKING_COMPLETE,
-                text: thinkingContent,
-                thinking_millsec: final_time_thinking_millsec_delta
-              })
-            }
            onChunk({ type: ChunkType.TEXT_DELTA, text: textDelta })
            break
          }
          case 'tool-calls': {
+            if (isFirstChunk) {
+              isFirstChunk = false
+              if (time_first_token_millsec === 0) {
+                time_first_token_millsec = new Date().getTime()
+              } else {
+                onChunk({
+                  type: ChunkType.THINKING_COMPLETE,
+                  text: thinkingContent,
+                  thinking_millsec: new Date().getTime() - time_first_token_millsec
+                })
+              }
+            }
            chunk.delta.tool_calls.forEach((toolCall) => {
              const { id, index, type, function: fun } = toolCall
              if (id && type === 'function' && fun) {
@ -721,10 +727,14 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {

            if (!isEmpty(finishReason)) {
              onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
-              final_time_completion_millsec_delta = currentTime - start_time_millsec
              if (usage) {
-                lastUsage = usage
+                finalUsage.completion_tokens += usage.completion_tokens || 0
+                finalUsage.prompt_tokens += usage.prompt_tokens || 0
+                finalUsage.total_tokens += usage.total_tokens || 0
+                finalMetrics.completion_tokens += usage.completion_tokens || 0
              }
+              finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
+              finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec
              if (originalFinishDelta?.annotations) {
                onChunk({
                  type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
@ -774,49 +784,46 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
                } as LLMWebSearchCompleteChunk)
              }
            }
-            reqMessages.push({
-              role: 'assistant',
-              content: content,
-              tool_calls: toolCalls.length
-                ? toolCalls.map((toolCall) => ({
-                    id: toolCall.id,
-                    function: {
-                      ...toolCall.function,
-                      arguments:
-                        typeof toolCall.function.arguments === 'string'
-                          ? toolCall.function.arguments
-                          : JSON.stringify(toolCall.function.arguments)
-                    },
-                    type: 'function'
-                  }))
-                : undefined
-            })
-            let toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
-            if (toolCalls.length) {
-              toolResults = await processToolCalls(mcpTools, toolCalls)
-            }
-            if (content.length) {
-              toolResults = toolResults.concat(await processToolUses(content))
-            }
-            if (toolResults.length) {
-              await processToolResults(toolResults, idx)
-            }
-            onChunk({
-              type: ChunkType.BLOCK_COMPLETE,
-              response: {
-                usage: lastUsage,
-                metrics: {
-                  completion_tokens: lastUsage?.completion_tokens,
-                  time_completion_millsec: final_time_completion_millsec_delta,
-                  time_first_token_millsec: time_first_token_millsec_delta,
-                  time_thinking_millsec: final_time_thinking_millsec_delta
-                }
-              }
-            })
            break
          }
        }
      }
+
+      reqMessages.push({
+        role: 'assistant',
+        content: content,
+        tool_calls: toolCalls.length
+          ? toolCalls.map((toolCall) => ({
+              id: toolCall.id,
+              function: {
+                ...toolCall.function,
+                arguments:
+                  typeof toolCall.function.arguments === 'string'
+                    ? toolCall.function.arguments
+                    : JSON.stringify(toolCall.function.arguments)
+              },
+              type: 'function'
+            }))
+          : undefined
+      })
+      let toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
+      if (toolCalls.length) {
+        toolResults = await processToolCalls(mcpTools, toolCalls)
+      }
+      if (content.length) {
+        toolResults = toolResults.concat(await processToolUses(content))
+      }
+      if (toolResults.length) {
+        await processToolResults(toolResults, idx)
+      }
+
+      onChunk({
+        type: ChunkType.BLOCK_COMPLETE,
+        response: {
+          usage: finalUsage,
+          metrics: finalMetrics
+        }
+      })
    }

    reqMessages = processReqMessages(model, reqMessages)
--- a/src/renderer/src/providers/AiProvider/OpenAIProvider.ts
+++ b/src/renderer/src/providers/AiProvider/OpenAIProvider.ts
@ -24,6 +24,7 @@ import {
  MCPCallToolResponse,
  MCPTool,
  MCPToolResponse,
+  Metrics,
  Model,
  Provider,
  Suggestion,
@ -332,7 +333,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
      const lastUserMessage = _messages.findLast((m) => m.role === 'user')
      const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
      const { signal } = abortController
-      let time_first_token_millsec_delta = 0
      const start_time_millsec = new Date().getTime()
      const response = await this.sdk.chat.completions
        // @ts-ignore key is not typed
@ -354,8 +354,17 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
      const processStream = async (stream: any) => {
        let content = ''
        let isFirstChunk = true
-        let final_time_completion_millsec_delta = 0
-        let lastUsage: Usage | undefined = undefined
+        const finalUsage: Usage = {
+          completion_tokens: 0,
+          prompt_tokens: 0,
+          total_tokens: 0
+        }
+
+        const finalMetrics: Metrics = {
+          completion_tokens: 0,
+          time_completion_millsec: 0,
+          time_first_token_millsec: 0
+        }
        for await (const chunk of stream as any) {
          if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
            break
@ -368,17 +377,21 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
            }
            if (isFirstChunk) {
              isFirstChunk = false
-              time_first_token_millsec_delta = new Date().getTime() - start_time_millsec
+              finalMetrics.time_first_token_millsec = new Date().getTime() - start_time_millsec
            }
            content += delta.content
            onChunk({ type: ChunkType.TEXT_DELTA, text: delta.content })
          }
          if (!isEmpty(finishReason) || chunk?.annotations) {
            onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
-            final_time_completion_millsec_delta = new Date().getTime() - start_time_millsec
+            finalMetrics.time_completion_millsec = new Date().getTime() - start_time_millsec
            if (chunk.usage) {
-              lastUsage = chunk.usage
+              const usage = chunk.usage as OpenAI.Completions.CompletionUsage
+              finalUsage.completion_tokens = usage.completion_tokens
+              finalUsage.prompt_tokens = usage.prompt_tokens
+              finalUsage.total_tokens = usage.total_tokens
            }
+            finalMetrics.completion_tokens = finalUsage.completion_tokens
          }
          if (delta?.annotations) {
            onChunk({
@ -393,12 +406,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
        onChunk({
          type: ChunkType.BLOCK_COMPLETE,
          response: {
-            usage: lastUsage,
-            metrics: {
-              completion_tokens: lastUsage?.completion_tokens,
-              time_completion_millsec: final_time_completion_millsec_delta,
-              time_first_token_millsec: time_first_token_millsec_delta
-            }
+            usage: finalUsage,
+            metrics: finalMetrics
          }
        })
      }
@ -428,7 +437,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
      type: 'input_text'
    }
    if (isSupportedReasoningEffortOpenAIModel(model)) {
-      systemMessageInput.text = `Formatting re-enabled${systemMessageInput.text ? '\n' + systemMessageInput.text : ''}`
      systemMessage.role = 'developer'
    }

@ -455,9 +463,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
      userMessage.push(await this.getReponseMessageParam(message, model))
    }

-    let time_first_token_millsec = 0
-    const start_time_millsec = new Date().getTime()
-
    const lastUserMessage = _messages.findLast((m) => m.role === 'user')
    const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
    const { signal } = abortController
@ -470,6 +475,18 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
      reqMessages = [systemMessage, ...userMessage].filter(Boolean) as OpenAI.Responses.EasyInputMessage[]
    }

+    const finalUsage: Usage = {
+      completion_tokens: 0,
+      prompt_tokens: 0,
+      total_tokens: 0
+    }
+
+    const finalMetrics: Metrics = {
+      completion_tokens: 0,
+      time_completion_millsec: 0,
+      time_first_token_millsec: 0
+    }
+
    const toolResponses: MCPToolResponse[] = []

    const processToolResults = async (toolResults: Awaited<ReturnType<typeof parseAndCallTools>>, idx: number) => {
@ -549,6 +566,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
      idx: number
    ) => {
      const toolCalls: OpenAI.Responses.ResponseFunctionToolCall[] = []
+      let time_first_token_millsec = 0
+      const start_time_millsec = new Date().getTime()

      if (!streamOutput) {
        const nonStream = stream as OpenAI.Responses.Response
@ -633,17 +652,15 @@ export abstract class BaseOpenAiProvider extends BaseProvider {

      const outputItems: OpenAI.Responses.ResponseOutputItem[] = []

-      let lastUsage: Usage | undefined = undefined
-      let final_time_completion_millsec_delta = 0
      for await (const chunk of stream as Stream<OpenAI.Responses.ResponseStreamEvent>) {
        if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
          break
        }
        switch (chunk.type) {
-          case 'response.created':
-            time_first_token_millsec = new Date().getTime()
-            break
          case 'response.output_item.added':
+            if (time_first_token_millsec === 0) {
+              time_first_token_millsec = new Date().getTime()
+            }
            if (chunk.item.type === 'function_call') {
              outputItems.push(chunk.item)
            }
@ -708,18 +725,18 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
            }
            break
          case 'response.completed': {
-            final_time_completion_millsec_delta = new Date().getTime() - start_time_millsec
            const completion_tokens =
              (chunk.response.usage?.output_tokens || 0) +
              (chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0)
            const total_tokens =
              (chunk.response.usage?.total_tokens || 0) +
              (chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0)
-            lastUsage = {
-              completion_tokens,
-              prompt_tokens: chunk.response.usage?.input_tokens || 0,
-              total_tokens
-            }
+            finalUsage.completion_tokens += completion_tokens
+            finalUsage.prompt_tokens += chunk.response.usage?.input_tokens || 0
+            finalUsage.total_tokens += total_tokens
+            finalMetrics.completion_tokens += completion_tokens
+            finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
+            finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec
            break
          }
          case 'error':
@ -761,12 +778,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
      onChunk({
        type: ChunkType.BLOCK_COMPLETE,
        response: {
-          usage: lastUsage,
-          metrics: {
-            completion_tokens: lastUsage?.completion_tokens,
-            time_completion_millsec: final_time_completion_millsec_delta,
-            time_first_token_millsec: time_first_token_millsec - start_time_millsec
-          }
+          usage: finalUsage,
+          metrics: finalMetrics
        }
      })
    }
--- a/src/renderer/src/services/StreamProcessingService.ts
+++ b/src/renderer/src/services/StreamProcessingService.ts
@ -59,7 +59,7 @@ export function createStreamProcessor(callbacks: StreamProcessorCallbacks = {})
        callbacks.onTextComplete(data.text)
      }
      if (data.type === ChunkType.THINKING_DELTA && callbacks.onThinkingChunk) {
-        callbacks.onThinkingChunk(data.text, data.thinking_millsec)
+        callbacks.onThinkingChunk(data.text)
      }
      if (data.type === ChunkType.THINKING_COMPLETE && callbacks.onThinkingComplete) {
        callbacks.onThinkingComplete(data.text, data.thinking_millsec)
--- a/src/renderer/src/store/index.ts
+++ b/src/renderer/src/store/index.ts
@ -46,7 +46,7 @@ const persistedReducer = persistReducer(
  {
    key: 'cherry-studio',
    storage,
-    version: 99,
+    version: 100,
    blacklist: ['runtime', 'messages', 'messageBlocks'],
    migrate
  },
--- a/src/renderer/src/store/migrate.ts
+++ b/src/renderer/src/store/migrate.ts
@ -1296,6 +1296,16 @@ const migrateConfig = {
    } catch (error) {
      return state
    }
+  },
+  '100': (state: RootState) => {
+    try {
+      if (!state.settings.zoomFactor) {
+        state.settings.zoomFactor = 1
+      }
+      return state
+    } catch (error) {
+      return state
+    }
  }
 }

--- a/src/renderer/src/store/thunk/messageThunk.ts
+++ b/src/renderer/src/store/thunk/messageThunk.ts
@ -398,7 +398,7 @@ const fetchAndProcessAssistantResponseImpl = async (
          } else {
            const newBlock = createThinkingBlock(assistantMsgId, accumulatedThinking, {
              status: MessageBlockStatus.STREAMING,
-              thinking_millsec: thinking_millsec
+              thinking_millsec: 0
            })
            handleBlockTransition(newBlock, MessageBlockType.THINKING)
          }
@ -565,7 +565,7 @@ const fetchAndProcessAssistantResponseImpl = async (
          message: pauseErrorLanguagePlaceholder || error.message || 'Stream processing error',
          originalMessage: error.message,
          stack: error.stack,
-          status: error.status,
+          status: error.status || error.code,
          requestId: error.request_id
        }
        if (lastBlockId) {
@ -609,13 +609,13 @@ const fetchAndProcessAssistantResponseImpl = async (
          // 更新topic的name
          autoRenameTopic(assistant, topicId)

-          if (response && !response.usage) {
+          if (response && response.usage?.total_tokens === 0) {
            const usage = await estimateMessagesUsage({ assistant, messages: finalContextWithAssistant })
            response.usage = usage
          }
        }
        if (response && response.metrics) {
-          if (!response.metrics.completion_tokens && response.usage) {
+          if (response.metrics.completion_tokens === 0 && response.usage?.completion_tokens) {
            response = {
              ...response,
              metrics: {
--- a/src/renderer/src/types/index.ts
+++ b/src/renderer/src/types/index.ts
@ -121,8 +121,8 @@ export type Usage = OpenAI.Completions.CompletionUsage & {
 }

 export type Metrics = {
-  completion_tokens?: number
-  time_completion_millsec?: number
+  completion_tokens: number
+  time_completion_millsec: number
  time_first_token_millsec?: number
  time_thinking_millsec?: number
 }