From a64c8ded73feedd80d58533a9c1f63ea70f2937f Mon Sep 17 00:00:00 2001 From: kangfenmao Date: Sun, 11 May 2025 18:44:28 +0800 Subject: [PATCH] fix: message and rerank errors commit 1c90e23d76a3e1008408bf29add122ccab7dbe6d Merge: 4e792033 1fde0999 Author: kangfenmao Date: Sun May 11 18:43:56 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit 4e7920336d838501b900a18f1c254f0d17fb54b8 Author: suyao Date: Sun May 11 18:09:58 2025 +0800 refactor(GeminiProvider): implement image generation handling in chat responses commit cd1ce4c0c65bc2a111ddb3112722cb36e66b2515 Merge: 968de188 235122c8 Author: suyao Date: Sun May 11 16:45:29 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit 968de18866abacc55fd9bd74c8d618871e64ade6 Author: suyao Date: Sun May 11 16:41:38 2025 +0800 fix: add new image generation models to the configuration commit 1eaf5801b4c0e2c3fa1aa2ed829b20d97ea57d3f Merge: cb76588d e6655fff Author: suyao Date: Sun May 11 13:14:17 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit cb76588dc692f35da1f3d4fdbc9217c6a1a36501 Author: suyao Date: Sun May 11 13:13:20 2025 +0800 fix: enhance error handling and metrics tracking across AI providers and message processing commit c2d6bdabc00b48419773d08c7d6630803d6310c8 Author: suyao Date: Sun May 11 05:36:52 2025 +0800 fix: update regex for function calling models and improve time tracking logic commit 95340b87d0bba3cdcd173a181953afa42b26da9b Author: suyao Date: Sun May 11 05:05:05 2025 +0800 fix: adjust thinking millisecond handling in message thunk commit f4d4d3901603f14df616582fa537f3d9c1a694eb Author: suyao Date: Sun May 11 03:52:26 2025 +0800 fix: remove 'auto' option from qwen model supported options commit e26f603dfe1a9146b0575142363d5ceab30e32df Author: suyao Date: Sun May 11 03:50:05 2025 +0800 fix: add support for inline base64 image data in image block commit bb0093c656b2b72158db1bf7bfef6aae46b8096c Merge: f9d1339b d39584fc Author: suyao Date: Sun May 11 03:00:57 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit f9d1339bd3367a4f80da23aac1fdc73b4cd2a075 Author: suyao Date: Sun May 11 03:00:11 2025 +0800 fix: set default zoomFactor in settings commit 7cf6fd685662a012e2460e722edcbe5ed12f1a1c Merge: ba9c4482 3bebfe27 Author: suyao Date: Sun May 11 01:31:05 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit ba9c44828578a3b5cc9fd1aaba80158615921785 Merge: 97dffe71 3bf0b6b3 Author: kangfenmao Date: Sat May 10 20:18:44 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit 97dffe719ee00ace2325c80022a48c44f03d6e26 Author: lizhixuan Date: Sat May 10 11:38:01 2025 +0800 feat(MessageMenubar): add edit option to dropdown for single message editing commit 70157439a1d9778cacc87b1781a84d159a8d3f33 Author: lizhixuan Date: Sat May 10 10:53:10 2025 +0800 refactor(StreamProcessingService): comment out console.log for cleaner code commit fa33ba77a9306ad316f34da4149858192079f7a2 Author: lizhixuan Date: Sat May 10 10:52:08 2025 +0800 refactor(messageThunk): remove console.log statements for cleaner code commit 6544c5d2990adf1943195e1d4d11383859a05488 Author: kangfenmao Date: Sat May 10 10:17:44 2025 +0800 feat(i18n): add download success and failure messages in multiple languages commit e23bb6744a4a99b2062012691340f78fad4e1952 Merge: 55c5c553 60cc1dee Author: kangfenmao Date: Sat May 10 09:54:38 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit 55c5c5533eab46a1de9c5511433ed1b1b9e90512 Author: suyao Date: Fri May 9 22:19:35 2025 +0800 fix: update styled component props to use dollar sign prefix for consistency commit 7a5839e0efdf3eb648b3d26cdf7ce131a2821f6a Author: suyao Date: Fri May 9 22:02:06 2025 +0800 fix: prevent default action in handleLinkClick for better link handling commit ecb075fddfc2bc5796a804ccde29aaa762d85da6 Merge: df149608 963f04f7 Author: suyao Date: Fri May 9 21:55:54 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit df149608904039903d74d7a72a722b6c1a567ee3 Merge: 93bd4eb9 e29a9303 Author: suyao Date: Fri May 9 21:48:13 2025 +0800 Merge branch 'main' into fix/next-release-bugs commit 93bd4eb907816414af5c27efbaa2d3d014c707fc Merge: c4d1deb6 38ff9b90 Author: suyao Date: Fri May 9 21:27:08 2025 +0800 Merge branch 'fix/next-release-bugs' of github.com:CherryHQ/cherry-studio into fix/next-release-bugs commit c4d1deb6911977a23cf731db6bed80b8352557ff Author: suyao Date: Fri May 9 21:24:48 2025 +0800 feat: enhance citation handling and add metadata support in citation blocks commit 38ff9b90b8fee91ed1fba7b83c9470bc40bd3429 Author: MyPrototypeWhat Date: Fri May 9 19:47:24 2025 +0800 fix: enhance logging and update async handling in StreamProcessingService and messageThunk - Enabled logging in `createStreamProcessor` for better debugging. - Added logging for updated messages in `updateExistingMessageAndBlocksInDB` and `saveUpdatesToDB`. - Updated `onTextComplete` and `onLLMWebSearchComplete` to handle asynchronous operations correctly. - Commented out unused `saveUpdatedBlockToDB` calls to prevent unnecessary database updates. commit cda0215c9c4e007c2c7240c3c9c8521fb7111774 Author: MyPrototypeWhat Date: Fri May 9 18:47:55 2025 +0800 refactor: optimize block update logic and remove unused code - Updated `throttledBlockUpdate` to handle asynchronous updates directly. - Removed the unused `throttledBlockDbUpdate` function and its related logic. - Added cancellation for throttled updates on error and completion to improve performance and reliability. - Cleaned up commented-out code for better readability. commit de2f5b09c8384eabd4df7253047b838a2759671a Author: MyPrototypeWhat Date: Fri May 9 18:42:00 2025 +0800 refactor: update message handling and state management - Simplified message editing logic by removing unnecessary success/error logging. - Added `updatedAt` timestamp to message updates for better tracking. - Refactored `editMessageBlocks` to accept message ID and updates directly. - Removed unused `getTopicLimit` function from `TopicManager`. - Updated message rendering to use `updatedAt` when available. - Enhanced type definitions to include `updatedAt` in message structure. commit 700fa13971cafb04314817d2d8732c8fbf33c9d7 Author: suyao Date: Fri May 9 16:19:55 2025 +0800 Remove Zhipu mode and text-only link handling commit 06bd1338cd671b255e477cec76b12663ea759f4c Author: kangfenmao Date: Fri May 9 15:49:02 2025 +0800 fix: update citation rendering logic in MainTextBlock component - Added a check to determine if the citation URL is a valid link. - Updated citation tag formatting to conditionally include the link based on the URL validity. commit e96c9a569f7708816f57505975d30667929eeb19 Author: kangfenmao Date: Thu May 8 18:31:14 2025 +0800 style: update ChatNavigation and CitationsList components for improved UI consistency - Added header style to remove borders in ChatNavigation. - Enhanced CitationsList with new Skeleton loading state and improved layout for citation cards. - Refactored CitationLink to a div for better styling control and adjusted padding in OpenButton for a more polished appearance. --- src/renderer/src/config/models.ts | 7 +- .../pages/home/Inputbar/ThinkingButton.tsx | 2 +- .../pages/home/Messages/Blocks/ErrorBlock.tsx | 2 +- .../home/Messages/Blocks/ThinkingBlock.tsx | 28 +- .../pages/home/Messages/MessageContent.tsx | 29 -- .../src/pages/home/Messages/MessageError.tsx | 45 --- .../providers/AiProvider/AnthropicProvider.ts | 182 ++++++----- .../providers/AiProvider/GeminiProvider.ts | 287 +++++++++++++----- .../AiProvider/OpenAICompatibleProvider.ts | 171 ++++++----- .../providers/AiProvider/OpenAIProvider.ts | 79 +++-- .../src/services/StreamProcessingService.ts | 2 +- src/renderer/src/store/index.ts | 2 +- src/renderer/src/store/migrate.ts | 10 + src/renderer/src/store/thunk/messageThunk.ts | 8 +- src/renderer/src/types/index.ts | 4 +- 15 files changed, 479 insertions(+), 379 deletions(-) delete mode 100644 src/renderer/src/pages/home/Messages/MessageError.tsx diff --git a/src/renderer/src/config/models.ts b/src/renderer/src/config/models.ts index 65ed25a04f..6555211f14 100644 --- a/src/renderer/src/config/models.ts +++ b/src/renderer/src/config/models.ts @@ -189,7 +189,7 @@ export const TEXT_TO_IMAGE_REGEX = /flux|diffusion|stabilityai|sd-|dall|cogview| // Reasoning models export const REASONING_REGEX = - /^(o\d+(?:-[\w-]+)?|.*\b(?:reasoner|thinking)\b.*|.*-[rR]\d+.*|.*\bqwq(?:-[\w-]+)?\b.*|.*\bhunyuan-t1(?:-[\w-]+)?\b.*|.*\bglm-zero-preview\b.*|.*\bgrok-3-mini(?:-[\w-]+)?\b.*)$/i + /^(o\d+(?:-[\w-]+)?|.*\b(?:reasoning|reasoner|thinking)\b.*|.*-[rR]\d+.*|.*\bqwq(?:-[\w-]+)?\b.*|.*\bhunyuan-t1(?:-[\w-]+)?\b.*|.*\bglm-zero-preview\b.*|.*\bgrok-3-mini(?:-[\w-]+)?\b.*)$/i // Embedding models export const EMBEDDING_REGEX = @@ -206,7 +206,7 @@ export const FUNCTION_CALLING_MODELS = [ 'gpt-4o-mini', 'gpt-4', 'gpt-4.5', - 'o1(?:-[\\w-]+)?', + 'o(1|3|4)(?:-[\\w-]+)?', 'claude', 'qwen', 'qwen3', @@ -2153,11 +2153,11 @@ export const TEXT_TO_IMAGES_MODELS_SUPPORT_IMAGE_ENHANCEMENT = [ export const GENERATE_IMAGE_MODELS = [ 'gemini-2.0-flash-exp-image-generation', + 'gemini-2.0-flash-preview-image-generation', 'gemini-2.0-flash-exp', 'grok-2-image-1212', 'grok-2-image', 'grok-2-image-latest', - 'gpt-4o-image', 'gpt-image-1' ] @@ -2172,6 +2172,7 @@ export const GEMINI_SEARCH_MODELS = [ 'gemini-2.5-pro-exp-03-25', 'gemini-2.5-pro-preview', 'gemini-2.5-pro-preview-03-25', + 'gemini-2.5-pro-preview-05-06', 'gemini-2.5-flash-preview', 'gemini-2.5-flash-preview-04-17' ] diff --git a/src/renderer/src/pages/home/Inputbar/ThinkingButton.tsx b/src/renderer/src/pages/home/Inputbar/ThinkingButton.tsx index 702c1d2823..2caef6c158 100644 --- a/src/renderer/src/pages/home/Inputbar/ThinkingButton.tsx +++ b/src/renderer/src/pages/home/Inputbar/ThinkingButton.tsx @@ -35,7 +35,7 @@ const MODEL_SUPPORTED_OPTIONS: Record = { default: ['off', 'low', 'medium', 'high'], grok: ['off', 'low', 'high'], gemini: ['off', 'low', 'medium', 'high', 'auto'], - qwen: ['off', 'low', 'medium', 'high', 'auto'] + qwen: ['off', 'low', 'medium', 'high'] } // 选项转换映射表:当选项不支持时使用的替代选项 diff --git a/src/renderer/src/pages/home/Messages/Blocks/ErrorBlock.tsx b/src/renderer/src/pages/home/Messages/Blocks/ErrorBlock.tsx index 2c8161397d..f55a3ab6b1 100644 --- a/src/renderer/src/pages/home/Messages/Blocks/ErrorBlock.tsx +++ b/src/renderer/src/pages/home/Messages/Blocks/ErrorBlock.tsx @@ -16,7 +16,7 @@ const MessageErrorInfo: React.FC<{ block: ErrorMessageBlock }> = ({ block }) => const HTTP_ERROR_CODES = [400, 401, 403, 404, 429, 500, 502, 503, 504] if (block.error && HTTP_ERROR_CODES.includes(block.error?.status)) { - return + return } if (block?.error?.message) { const errorKey = `error.${block.error.message}` diff --git a/src/renderer/src/pages/home/Messages/Blocks/ThinkingBlock.tsx b/src/renderer/src/pages/home/Messages/Blocks/ThinkingBlock.tsx index e95f47a493..caf7d3f764 100644 --- a/src/renderer/src/pages/home/Messages/Blocks/ThinkingBlock.tsx +++ b/src/renderer/src/pages/home/Messages/Blocks/ThinkingBlock.tsx @@ -4,7 +4,7 @@ import { MessageBlockStatus, type ThinkingMessageBlock } from '@renderer/types/n import { Collapse, message as antdMessage, Tooltip } from 'antd' import { Lightbulb } from 'lucide-react' import { motion } from 'motion/react' -import { memo, useCallback, useEffect, useMemo, useState } from 'react' +import { memo, useCallback, useEffect, useMemo, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' import styled from 'styled-components' @@ -40,6 +40,8 @@ const ThinkingBlock: React.FC = ({ block }) => { const { t } = useTranslation() const { messageFont, fontSize, thoughtAutoCollapse } = useSettings() const [activeKey, setActiveKey] = useState<'thought' | ''>(thoughtAutoCollapse ? '' : 'thought') + const [thinkingTime, setThinkingTime] = useState(block.thinking_millsec || 0) + const intervalId = useRef(null) const isThinking = useMemo(() => block.status === MessageBlockStatus.STREAMING, [block.status]) @@ -73,13 +75,31 @@ const ThinkingBlock: React.FC = ({ block }) => { } }, [block.content, t]) + // FIXME: 这里统计的和请求处统计的有一定误差 + useEffect(() => { + if (isThinking) { + intervalId.current = setInterval(() => { + setThinkingTime((prev) => prev + 100) + }, 100) + } else if (intervalId.current) { + // 立即清除计时器 + clearInterval(intervalId.current) + intervalId.current = null + } + + return () => { + if (intervalId.current) { + window.clearInterval(intervalId.current) + } + } + }, [isThinking]) + + const thinkingTimeSeconds = useMemo(() => (thinkingTime / 1000).toFixed(1), [thinkingTime]) + if (!block.content) { return null } - const thinkingTime = block.thinking_millsec || 0 - const thinkingTimeSeconds = (thinkingTime / 1000).toFixed(1) - return ( = ({ message }) => { - // const { t } = useTranslation() - // if (message.status === 'pending') { - // return ( - - // ) - // } - - // if (message.status === 'searching') { - // return ( - // - // - // {t('message.searching')} - // - // - // ) - // } - - // if (message.status === 'error') { - // return - // } - - // if (message.type === '@' && model) { - // const content = `[@${model.name}](#) ${getBriefInfo(message.content)}` - // return - // } - // const toolUseRegex = /([\s\S]*?)<\/tool_use>/g - - // console.log('message', message) - return ( <> diff --git a/src/renderer/src/pages/home/Messages/MessageError.tsx b/src/renderer/src/pages/home/Messages/MessageError.tsx deleted file mode 100644 index e0c0ea00bd..0000000000 --- a/src/renderer/src/pages/home/Messages/MessageError.tsx +++ /dev/null @@ -1,45 +0,0 @@ -import type { ErrorMessageBlock } from '@renderer/types/newMessage' -import { Alert as AntdAlert } from 'antd' -import { FC } from 'react' -import { useTranslation } from 'react-i18next' -import styled from 'styled-components' - -const MessageError: FC<{ block: ErrorMessageBlock }> = ({ block }) => { - return ( - <> - {/* - {block.error && ( - - )} */} - - - ) -} - -const MessageErrorInfo: FC<{ block: ErrorMessageBlock }> = ({ block }) => { - const { t } = useTranslation() - - const HTTP_ERROR_CODES = [400, 401, 403, 404, 429, 500, 502, 503, 504] - console.log('block', block) - if (block.error && HTTP_ERROR_CODES.includes(block.error?.status)) { - return - } - if (block?.error?.message) { - return - } - - return -} - -const Alert = styled(AntdAlert)` - margin: 15px 0 8px; - padding: 10px; - font-size: 12px; -` - -export default MessageError diff --git a/src/renderer/src/providers/AiProvider/AnthropicProvider.ts b/src/renderer/src/providers/AiProvider/AnthropicProvider.ts index 1bb58c22ab..3f2929bdd0 100644 --- a/src/renderer/src/providers/AiProvider/AnthropicProvider.ts +++ b/src/renderer/src/providers/AiProvider/AnthropicProvider.ts @@ -30,10 +30,12 @@ import { MCPCallToolResponse, MCPTool, MCPToolResponse, + Metrics, Model, Provider, Suggestion, ToolCallResponse, + Usage, WebSearchSource } from '@renderer/types' import { ChunkType } from '@renderer/types/chunk' @@ -47,7 +49,7 @@ import { } from '@renderer/utils/mcp-tools' import { findFileBlocks, findImageBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find' import { buildSystemPrompt } from '@renderer/utils/prompt' -import { first, flatten, sum, takeRight } from 'lodash' +import { first, flatten, takeRight } from 'lodash' import OpenAI from 'openai' import { CompletionsParams } from '.' @@ -270,77 +272,82 @@ export default class AnthropicProvider extends BaseProvider { ...this.getCustomParameters(assistant) } - let time_first_token_millsec = 0 - let time_first_content_millsec = 0 - let checkThinkingContent = false - let thinking_content = '' - const start_time_millsec = new Date().getTime() - - if (!streamOutput) { - const message = await this.sdk.messages.create({ ...body, stream: false }) - const time_completion_millsec = new Date().getTime() - start_time_millsec - - let text = '' - let reasoning_content = '' - - if (message.content && message.content.length > 0) { - const thinkingBlock = message.content.find((block) => block.type === 'thinking') - const textBlock = message.content.find((block) => block.type === 'text') - - if (thinkingBlock && 'thinking' in thinkingBlock) { - reasoning_content = thinkingBlock.thinking - } - - if (textBlock && 'text' in textBlock) { - text = textBlock.text - } - } - - return onChunk({ - type: ChunkType.BLOCK_COMPLETE, - response: { - text, - reasoning_content, - usage: message.usage as any, - metrics: { - completion_tokens: message.usage.output_tokens, - time_completion_millsec, - time_first_token_millsec: 0 - } - } - }) - } - const { abortController, cleanup } = this.createAbortController(lastUserMessage?.id) const { signal } = abortController + + const finalUsage: Usage = { + completion_tokens: 0, + prompt_tokens: 0, + total_tokens: 0 + } + + const finalMetrics: Metrics = { + completion_tokens: 0, + time_completion_millsec: 0, + time_first_token_millsec: 0 + } const toolResponses: MCPToolResponse[] = [] - const processStream = (body: MessageCreateParamsNonStreaming, idx: number) => { + const processStream = async (body: MessageCreateParamsNonStreaming, idx: number) => { + let time_first_token_millsec = 0 + const start_time_millsec = new Date().getTime() + + if (!streamOutput) { + const message = await this.sdk.messages.create({ ...body, stream: false }) + const time_completion_millsec = new Date().getTime() - start_time_millsec + + let text = '' + let reasoning_content = '' + + if (message.content && message.content.length > 0) { + const thinkingBlock = message.content.find((block) => block.type === 'thinking') + const textBlock = message.content.find((block) => block.type === 'text') + + if (thinkingBlock && 'thinking' in thinkingBlock) { + reasoning_content = thinkingBlock.thinking + } + + if (textBlock && 'text' in textBlock) { + text = textBlock.text + } + } + + return onChunk({ + type: ChunkType.BLOCK_COMPLETE, + response: { + text, + reasoning_content, + usage: message.usage as any, + metrics: { + completion_tokens: message.usage.output_tokens, + time_completion_millsec, + time_first_token_millsec: 0 + } + } + }) + } + + let thinking_content = '' + let isFirstChunk = true + return new Promise((resolve, reject) => { // 等待接口返回流 const toolCalls: ToolUseBlock[] = [] - let hasThinkingContent = false + this.sdk.messages .stream({ ...body, stream: true }, { signal, timeout: 5 * 60 * 1000 }) .on('text', (text) => { - if (hasThinkingContent && !checkThinkingContent) { - checkThinkingContent = true - onChunk({ - type: ChunkType.THINKING_COMPLETE, - text: thinking_content, - thinking_millsec: new Date().getTime() - time_first_content_millsec - }) - } - if (time_first_token_millsec == 0) { - time_first_token_millsec = new Date().getTime() - } - - thinking_content = '' - checkThinkingContent = false - hasThinkingContent = false - - if (!hasThinkingContent && time_first_content_millsec === 0) { - time_first_content_millsec = new Date().getTime() + if (isFirstChunk) { + isFirstChunk = false + if (time_first_token_millsec == 0) { + time_first_token_millsec = new Date().getTime() + } else { + onChunk({ + type: ChunkType.THINKING_COMPLETE, + text: thinking_content, + thinking_millsec: new Date().getTime() - time_first_token_millsec + }) + } } onChunk({ type: ChunkType.TEXT_DELTA, text }) @@ -372,34 +379,22 @@ export default class AnthropicProvider extends BaseProvider { }) } } + if (block.type === 'tool_use') { + toolCalls.push(block) + } }) .on('thinking', (thinking) => { - hasThinkingContent = true - const currentTime = new Date().getTime() // Get current time for each chunk - if (time_first_token_millsec == 0) { - time_first_token_millsec = currentTime + time_first_token_millsec = new Date().getTime() } - // Set time_first_content_millsec ONLY when the first content (thinking or text) arrives - if (time_first_content_millsec === 0) { - time_first_content_millsec = currentTime - } - - // Calculate thinking time as time elapsed since start until this chunk - const thinking_time = currentTime - time_first_content_millsec onChunk({ type: ChunkType.THINKING_DELTA, text: thinking, - thinking_millsec: thinking_time + thinking_millsec: new Date().getTime() - time_first_token_millsec }) thinking_content += thinking }) - .on('contentBlock', (content) => { - if (content.type === 'tool_use') { - toolCalls.push(content) - } - }) .on('finalMessage', async (message) => { const toolResults: Awaited> = [] // tool call @@ -458,29 +453,28 @@ export default class AnthropicProvider extends BaseProvider { newBody.messages = userMessages onChunk({ type: ChunkType.LLM_RESPONSE_CREATED }) - await processStream(newBody, idx + 1) + try { + await processStream(newBody, idx + 1) + } catch (error) { + console.error('Error processing stream:', error) + reject(error) + } } - const time_completion_millsec = new Date().getTime() - start_time_millsec + finalUsage.prompt_tokens += message.usage.input_tokens + finalUsage.completion_tokens += message.usage.output_tokens + finalUsage.total_tokens += finalUsage.prompt_tokens + finalUsage.completion_tokens + finalMetrics.completion_tokens = finalUsage.completion_tokens + finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec + finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec onChunk({ type: ChunkType.BLOCK_COMPLETE, response: { - usage: { - prompt_tokens: message.usage.input_tokens, - completion_tokens: message.usage.output_tokens, - total_tokens: sum(Object.values(message.usage)) - }, - metrics: { - completion_tokens: message.usage.output_tokens, - time_completion_millsec, - time_first_token_millsec: time_first_token_millsec - start_time_millsec - } + usage: finalUsage, + metrics: finalMetrics } }) - // FIXME: 临时方案,重置时间戳和思考内容 - time_first_token_millsec = 0 - time_first_content_millsec = 0 resolve() }) .on('error', (error) => reject(error)) diff --git a/src/renderer/src/providers/AiProvider/GeminiProvider.ts b/src/renderer/src/providers/AiProvider/GeminiProvider.ts index db30fa6c4b..0a49ebe573 100644 --- a/src/renderer/src/providers/AiProvider/GeminiProvider.ts +++ b/src/renderer/src/providers/AiProvider/GeminiProvider.ts @@ -40,6 +40,7 @@ import { MCPCallToolResponse, MCPTool, MCPToolResponse, + Metrics, Model, Provider, Suggestion, @@ -126,7 +127,6 @@ export default class GeminiProvider extends BaseProvider { * @returns The message contents */ private async getMessageContents(message: Message): Promise { - console.log('getMessageContents', message) const role = message.role === 'user' ? 'user' : 'model' const parts: Part[] = [{ text: await this.getMessageContent(message) }] // Add any generated images from previous responses @@ -153,6 +153,16 @@ export default class GeminiProvider extends BaseProvider { } } } + const file = imageBlock.file + if (file) { + const base64Data = await window.api.file.base64Image(file.id + file.ext) + parts.push({ + inlineData: { + data: base64Data.base64, + mimeType: base64Data.mime + } as Part['inlineData'] + }) + } } const fileBlocks = findFileBlocks(message) @@ -186,6 +196,50 @@ export default class GeminiProvider extends BaseProvider { } } + private async getImageFileContents(message: Message): Promise { + const role = message.role === 'user' ? 'user' : 'model' + const content = getMainTextContent(message) + const parts: Part[] = [{ text: content }] + const imageBlocks = findImageBlocks(message) + for (const imageBlock of imageBlocks) { + if ( + imageBlock.metadata?.generateImageResponse?.images && + imageBlock.metadata.generateImageResponse.images.length > 0 + ) { + for (const imageUrl of imageBlock.metadata.generateImageResponse.images) { + if (imageUrl && imageUrl.startsWith('data:')) { + // Extract base64 data and mime type from the data URL + const matches = imageUrl.match(/^data:(.+);base64,(.*)$/) + if (matches && matches.length === 3) { + const mimeType = matches[1] + const base64Data = matches[2] + parts.push({ + inlineData: { + data: base64Data, + mimeType: mimeType + } as Part['inlineData'] + }) + } + } + } + } + const file = imageBlock.file + if (file) { + const base64Data = await window.api.file.base64Image(file.id + file.ext) + parts.push({ + inlineData: { + data: base64Data.base64, + mimeType: base64Data.mime + } as Part['inlineData'] + }) + } + } + return { + role, + parts: parts + } + } + /** * Get the safety settings * @returns The safety settings @@ -273,6 +327,18 @@ export default class GeminiProvider extends BaseProvider { }: CompletionsParams): Promise { const defaultModel = getDefaultModel() const model = assistant.model || defaultModel + let canGenerateImage = false + if (isGenerateImageModel(model)) { + if (model.id === 'gemini-2.0-flash-exp') { + canGenerateImage = assistant.enableGenerateImage! + } else { + canGenerateImage = true + } + } + if (canGenerateImage) { + await this.generateImageByChat({ messages, assistant, onChunk }) + return + } const { contextCount, maxTokens, streamOutput, enableToolUse } = getAssistantSettings(assistant) const userMessages = filterUserRoleStartMessages( @@ -309,21 +375,10 @@ export default class GeminiProvider extends BaseProvider { }) } - let canGenerateImage = false - if (isGenerateImageModel(model)) { - if (model.id === 'gemini-2.0-flash-exp') { - canGenerateImage = assistant.enableGenerateImage! - } else { - canGenerateImage = true - } - } - const generateContentConfig: GenerateContentConfig = { - responseModalities: canGenerateImage ? [Modality.TEXT, Modality.IMAGE] : undefined, - responseMimeType: canGenerateImage ? 'text/plain' : undefined, safetySettings: this.getSafetySettings(), // generate image don't need system instruction - systemInstruction: isGemmaModel(model) || canGenerateImage ? undefined : systemInstruction, + systemInstruction: isGemmaModel(model) ? undefined : systemInstruction, temperature: assistant?.settings?.temperature, topP: assistant?.settings?.topP, maxOutputTokens: maxTokens, @@ -360,8 +415,17 @@ export default class GeminiProvider extends BaseProvider { } } - const start_time_millsec = new Date().getTime() - let time_first_token_millsec = 0 + const finalUsage: Usage = { + completion_tokens: 0, + prompt_tokens: 0, + total_tokens: 0 + } + + const finalMetrics: Metrics = { + completion_tokens: 0, + time_completion_millsec: 0, + time_first_token_millsec: 0 + } const { cleanup, abortController } = this.createAbortController(userLastMessage?.id, true) @@ -435,6 +499,8 @@ export default class GeminiProvider extends BaseProvider { history.push(messageContents) let functionCalls: FunctionCall[] = [] + let time_first_token_millsec = 0 + const start_time_millsec = new Date().getTime() if (stream instanceof GenerateContentResponse) { let content = '' @@ -494,45 +560,28 @@ export default class GeminiProvider extends BaseProvider { } as BlockCompleteChunk) } else { let content = '' - let final_time_completion_millsec = 0 - let lastUsage: Usage | undefined = undefined for await (const chunk of stream) { if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) break - // --- Calculate Metrics --- - if (time_first_token_millsec == 0 && chunk.text !== undefined) { - // Update based on text arrival - time_first_token_millsec = new Date().getTime() - start_time_millsec + if (time_first_token_millsec == 0) { + time_first_token_millsec = new Date().getTime() } - // 1. Text Content if (chunk.text !== undefined) { content += chunk.text onChunk({ type: ChunkType.TEXT_DELTA, text: chunk.text }) } - // 2. Usage Data - if (chunk.usageMetadata) { - lastUsage = { - prompt_tokens: chunk.usageMetadata.promptTokenCount || 0, - completion_tokens: chunk.usageMetadata.candidatesTokenCount || 0, - total_tokens: chunk.usageMetadata.totalTokenCount || 0 - } - final_time_completion_millsec = new Date().getTime() - start_time_millsec - } - - // 4. Image Generation - const generateImage = this.processGeminiImageResponse(chunk, onChunk) - if (generateImage?.images?.length) { - onChunk({ type: ChunkType.IMAGE_COMPLETE, image: generateImage }) - } - if (chunk.candidates?.[0]?.finishReason) { if (chunk.text) { onChunk({ type: ChunkType.TEXT_COMPLETE, text: content }) } + if (chunk.usageMetadata) { + finalUsage.prompt_tokens += chunk.usageMetadata.promptTokenCount || 0 + finalUsage.completion_tokens += chunk.usageMetadata.candidatesTokenCount || 0 + finalUsage.total_tokens += chunk.usageMetadata.totalTokenCount || 0 + } if (chunk.candidates?.[0]?.groundingMetadata) { - // 3. Grounding/Search Metadata const groundingMetadata = chunk.candidates?.[0]?.groundingMetadata onChunk({ type: ChunkType.LLM_WEB_SEARCH_COMPLETE, @@ -551,35 +600,37 @@ export default class GeminiProvider extends BaseProvider { functionCalls = functionCalls.concat(chunk.functionCalls) } - onChunk({ - type: ChunkType.BLOCK_COMPLETE, - response: { - metrics: { - completion_tokens: lastUsage?.completion_tokens, - time_completion_millsec: final_time_completion_millsec, - time_first_token_millsec - }, - usage: lastUsage - } - }) - } - - // --- End Incremental onChunk calls --- - - // Call processToolUses AFTER potentially processing text content in this chunk - // This assumes tools might be specified within the text stream - // Note: parseAndCallTools inside should handle its own onChunk for tool responses - let toolResults: Awaited> = [] - if (functionCalls.length) { - toolResults = await processToolCalls(functionCalls) - } - if (content.length) { - toolResults = toolResults.concat(await processToolUses(content)) - } - if (toolResults.length) { - await processToolResults(toolResults, idx) + finalMetrics.completion_tokens = finalUsage.completion_tokens + finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec + finalMetrics.time_first_token_millsec = + (finalMetrics.time_first_token_millsec || 0) + (time_first_token_millsec - start_time_millsec) } } + + // --- End Incremental onChunk calls --- + + // Call processToolUses AFTER potentially processing text content in this chunk + // This assumes tools might be specified within the text stream + // Note: parseAndCallTools inside should handle its own onChunk for tool responses + let toolResults: Awaited> = [] + if (functionCalls.length) { + toolResults = await processToolCalls(functionCalls) + } + if (content.length) { + toolResults = toolResults.concat(await processToolUses(content)) + } + if (toolResults.length) { + await processToolResults(toolResults, idx) + } + + // FIXME: 由于递归,会发送n次 + onChunk({ + type: ChunkType.BLOCK_COMPLETE, + response: { + usage: finalUsage, + metrics: finalMetrics + } + }) } } @@ -605,17 +656,6 @@ export default class GeminiProvider extends BaseProvider { }) await processStream(userMessagesStream, 0).finally(cleanup) - - const final_time_completion_millsec = new Date().getTime() - start_time_millsec - onChunk({ - type: ChunkType.BLOCK_COMPLETE, - response: { - metrics: { - time_completion_millsec: final_time_completion_millsec, - time_first_token_millsec - } - } - }) } /** @@ -949,8 +989,97 @@ export default class GeminiProvider extends BaseProvider { return data.embeddings?.[0]?.values?.length || 0 } - public generateImageByChat(): Promise { - throw new Error('Method not implemented.') + public async generateImageByChat({ messages, assistant, onChunk }): Promise { + const defaultModel = getDefaultModel() + const model = assistant.model || defaultModel + const { contextCount, maxTokens } = getAssistantSettings(assistant) + const userMessages = filterUserRoleStartMessages( + filterEmptyMessages(filterContextMessages(takeRight(messages, contextCount + 2))) + ) + + const userLastMessage = userMessages.pop() + const { abortController } = this.createAbortController(userLastMessage?.id, true) + const { signal } = abortController + const generateContentConfig: GenerateContentConfig = { + responseModalities: [Modality.TEXT, Modality.IMAGE], + responseMimeType: 'text/plain', + safetySettings: this.getSafetySettings(), + temperature: assistant?.settings?.temperature, + topP: assistant?.settings?.top_p, + maxOutputTokens: maxTokens, + abortSignal: signal, + ...this.getCustomParameters(assistant) + } + const history: Content[] = [] + try { + for (const message of userMessages) { + history.push(await this.getImageFileContents(message)) + } + + let time_first_token_millsec = 0 + const start_time_millsec = new Date().getTime() + onChunk({ type: ChunkType.LLM_RESPONSE_CREATED }) + const chat = this.sdk.chats.create({ + model: model.id, + config: generateContentConfig, + history: history + }) + let content = '' + const finalUsage: Usage = { + prompt_tokens: 0, + completion_tokens: 0, + total_tokens: 0 + } + const userMessage: Content = await this.getImageFileContents(userLastMessage!) + const response = await chat.sendMessageStream({ + message: userMessage.parts!, + config: { + ...generateContentConfig, + abortSignal: signal + } + }) + for await (const chunk of response as AsyncGenerator) { + if (time_first_token_millsec == 0) { + time_first_token_millsec = new Date().getTime() + } + + if (chunk.text !== undefined) { + content += chunk.text + onChunk({ type: ChunkType.TEXT_DELTA, text: chunk.text }) + } + const generateImage = this.processGeminiImageResponse(chunk, onChunk) + if (generateImage?.images?.length) { + onChunk({ type: ChunkType.IMAGE_COMPLETE, image: generateImage }) + } + if (chunk.candidates?.[0]?.finishReason) { + if (chunk.text) { + onChunk({ type: ChunkType.TEXT_COMPLETE, text: content }) + } + if (chunk.usageMetadata) { + finalUsage.prompt_tokens = chunk.usageMetadata.promptTokenCount || 0 + finalUsage.completion_tokens = chunk.usageMetadata.candidatesTokenCount || 0 + finalUsage.total_tokens = chunk.usageMetadata.totalTokenCount || 0 + } + } + } + onChunk({ + type: ChunkType.BLOCK_COMPLETE, + response: { + usage: finalUsage, + metrics: { + completion_tokens: finalUsage.completion_tokens, + time_completion_millsec: new Date().getTime() - start_time_millsec, + time_first_token_millsec: time_first_token_millsec - start_time_millsec + } + } + }) + } catch (error) { + console.error('[generateImageByChat] error', error) + onChunk({ + type: ChunkType.ERROR, + error + }) + } } public convertMcpTools(mcpTools: MCPTool[]): T[] { diff --git a/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts b/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts index cfb617d46a..f9a9042074 100644 --- a/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts +++ b/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts @@ -34,6 +34,7 @@ import { MCPCallToolResponse, MCPTool, MCPToolResponse, + Metrics, Model, Provider, Suggestion, @@ -395,7 +396,6 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider { return streamOutput } - const start_time_millsec = new Date().getTime() const lastUserMessage = _messages.findLast((m) => m.role === 'user') const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true) const { signal } = abortController @@ -423,6 +423,18 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider { reqMessages = [systemMessage, ...userMessages].filter(Boolean) as ChatCompletionMessageParam[] } + let finalUsage: Usage = { + completion_tokens: 0, + prompt_tokens: 0, + total_tokens: 0 + } + + const finalMetrics: Metrics = { + completion_tokens: 0, + time_completion_millsec: 0, + time_first_token_millsec: 0 + } + const toolResponses: MCPToolResponse[] = [] const processToolResults = async (toolResults: Awaited>, idx: number) => { @@ -505,18 +517,17 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider { const processStream = async (stream: any, idx: number) => { const toolCalls: ChatCompletionMessageToolCall[] = [] + let time_first_token_millsec = 0 + const start_time_millsec = new Date().getTime() + // Handle non-streaming case (already returns early, no change needed here) if (!isSupportStreamOutput()) { - const time_completion_millsec = new Date().getTime() - start_time_millsec // Calculate final metrics once - const finalMetrics = { - completion_tokens: stream.usage?.completion_tokens, - time_completion_millsec, - time_first_token_millsec: 0 // Non-streaming, first token time is not relevant - } + finalMetrics.completion_tokens = stream.usage?.completion_tokens + finalMetrics.time_completion_millsec = new Date().getTime() - start_time_millsec // Create a synthetic usage object if stream.usage is undefined - const finalUsage = stream.usage + finalUsage = { ...stream.usage } // Separate onChunk calls for text and usage/metrics let content = '' stream.choices.forEach((choice) => { @@ -526,7 +537,7 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider { onChunk({ type: ChunkType.THINKING_COMPLETE, text: choice.message.reasoning, - thinking_millsec: time_completion_millsec + thinking_millsec: new Date().getTime() - start_time_millsec }) } // text @@ -576,20 +587,9 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider { return } - let content = '' // Accumulate content for tool processing if needed + let content = '' let thinkingContent = '' - // 记录最终的完成时间差 - let final_time_completion_millsec_delta = 0 - let final_time_thinking_millsec_delta = 0 - // Variable to store the last received usage object - let lastUsage: Usage | undefined = undefined - // let isThinkingInContent: ThoughtProcessor | undefined = undefined - // const processThinkingChunk = this.handleThinkingTags() let isFirstChunk = true - let time_first_token_millsec = 0 - let time_first_token_millsec_delta = 0 - let time_first_content_millsec = 0 - let time_thinking_start = 0 // 1. 初始化中间件 const reasoningTags = [ @@ -640,25 +640,24 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider { // 3. 消费 processedStream,分发 onChunk for await (const chunk of readableStreamAsyncIterable(processedStream)) { - const currentTime = new Date().getTime() const delta = chunk.type === 'finish' ? chunk.delta : chunk const rawChunk = chunk.type === 'finish' ? chunk.chunk : chunk switch (chunk.type) { case 'reasoning': { - if (time_thinking_start === 0) { - time_thinking_start = currentTime - time_first_token_millsec = currentTime - time_first_token_millsec_delta = currentTime - start_time_millsec + if (time_first_token_millsec === 0) { + time_first_token_millsec = new Date().getTime() } thinkingContent += chunk.textDelta - const thinking_time = currentTime - time_thinking_start - onChunk({ type: ChunkType.THINKING_DELTA, text: chunk.textDelta, thinking_millsec: thinking_time }) + onChunk({ + type: ChunkType.THINKING_DELTA, + text: chunk.textDelta, + thinking_millsec: new Date().getTime() - time_first_token_millsec + }) break } case 'text-delta': { let textDelta = chunk.textDelta - if (assistant.enableWebSearch && delta) { const originalDelta = rawChunk?.choices?.[0]?.delta @@ -676,25 +675,32 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider { if (isFirstChunk) { isFirstChunk = false if (time_first_token_millsec === 0) { - time_first_token_millsec = currentTime - time_first_token_millsec_delta = currentTime - start_time_millsec + time_first_token_millsec = new Date().getTime() + } else { + onChunk({ + type: ChunkType.THINKING_COMPLETE, + text: thinkingContent, + thinking_millsec: new Date().getTime() - time_first_token_millsec + }) } } content += textDelta - if (time_thinking_start > 0 && time_first_content_millsec === 0) { - time_first_content_millsec = currentTime - final_time_thinking_millsec_delta = time_first_content_millsec - time_thinking_start - - onChunk({ - type: ChunkType.THINKING_COMPLETE, - text: thinkingContent, - thinking_millsec: final_time_thinking_millsec_delta - }) - } onChunk({ type: ChunkType.TEXT_DELTA, text: textDelta }) break } case 'tool-calls': { + if (isFirstChunk) { + isFirstChunk = false + if (time_first_token_millsec === 0) { + time_first_token_millsec = new Date().getTime() + } else { + onChunk({ + type: ChunkType.THINKING_COMPLETE, + text: thinkingContent, + thinking_millsec: new Date().getTime() - time_first_token_millsec + }) + } + } chunk.delta.tool_calls.forEach((toolCall) => { const { id, index, type, function: fun } = toolCall if (id && type === 'function' && fun) { @@ -721,10 +727,14 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider { if (!isEmpty(finishReason)) { onChunk({ type: ChunkType.TEXT_COMPLETE, text: content }) - final_time_completion_millsec_delta = currentTime - start_time_millsec if (usage) { - lastUsage = usage + finalUsage.completion_tokens += usage.completion_tokens || 0 + finalUsage.prompt_tokens += usage.prompt_tokens || 0 + finalUsage.total_tokens += usage.total_tokens || 0 + finalMetrics.completion_tokens += usage.completion_tokens || 0 } + finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec + finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec if (originalFinishDelta?.annotations) { onChunk({ type: ChunkType.LLM_WEB_SEARCH_COMPLETE, @@ -774,49 +784,46 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider { } as LLMWebSearchCompleteChunk) } } - reqMessages.push({ - role: 'assistant', - content: content, - tool_calls: toolCalls.length - ? toolCalls.map((toolCall) => ({ - id: toolCall.id, - function: { - ...toolCall.function, - arguments: - typeof toolCall.function.arguments === 'string' - ? toolCall.function.arguments - : JSON.stringify(toolCall.function.arguments) - }, - type: 'function' - })) - : undefined - }) - let toolResults: Awaited> = [] - if (toolCalls.length) { - toolResults = await processToolCalls(mcpTools, toolCalls) - } - if (content.length) { - toolResults = toolResults.concat(await processToolUses(content)) - } - if (toolResults.length) { - await processToolResults(toolResults, idx) - } - onChunk({ - type: ChunkType.BLOCK_COMPLETE, - response: { - usage: lastUsage, - metrics: { - completion_tokens: lastUsage?.completion_tokens, - time_completion_millsec: final_time_completion_millsec_delta, - time_first_token_millsec: time_first_token_millsec_delta, - time_thinking_millsec: final_time_thinking_millsec_delta - } - } - }) break } } } + + reqMessages.push({ + role: 'assistant', + content: content, + tool_calls: toolCalls.length + ? toolCalls.map((toolCall) => ({ + id: toolCall.id, + function: { + ...toolCall.function, + arguments: + typeof toolCall.function.arguments === 'string' + ? toolCall.function.arguments + : JSON.stringify(toolCall.function.arguments) + }, + type: 'function' + })) + : undefined + }) + let toolResults: Awaited> = [] + if (toolCalls.length) { + toolResults = await processToolCalls(mcpTools, toolCalls) + } + if (content.length) { + toolResults = toolResults.concat(await processToolUses(content)) + } + if (toolResults.length) { + await processToolResults(toolResults, idx) + } + + onChunk({ + type: ChunkType.BLOCK_COMPLETE, + response: { + usage: finalUsage, + metrics: finalMetrics + } + }) } reqMessages = processReqMessages(model, reqMessages) diff --git a/src/renderer/src/providers/AiProvider/OpenAIProvider.ts b/src/renderer/src/providers/AiProvider/OpenAIProvider.ts index 51c690ee41..154b1a7357 100644 --- a/src/renderer/src/providers/AiProvider/OpenAIProvider.ts +++ b/src/renderer/src/providers/AiProvider/OpenAIProvider.ts @@ -24,6 +24,7 @@ import { MCPCallToolResponse, MCPTool, MCPToolResponse, + Metrics, Model, Provider, Suggestion, @@ -332,7 +333,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider { const lastUserMessage = _messages.findLast((m) => m.role === 'user') const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true) const { signal } = abortController - let time_first_token_millsec_delta = 0 const start_time_millsec = new Date().getTime() const response = await this.sdk.chat.completions // @ts-ignore key is not typed @@ -354,8 +354,17 @@ export abstract class BaseOpenAiProvider extends BaseProvider { const processStream = async (stream: any) => { let content = '' let isFirstChunk = true - let final_time_completion_millsec_delta = 0 - let lastUsage: Usage | undefined = undefined + const finalUsage: Usage = { + completion_tokens: 0, + prompt_tokens: 0, + total_tokens: 0 + } + + const finalMetrics: Metrics = { + completion_tokens: 0, + time_completion_millsec: 0, + time_first_token_millsec: 0 + } for await (const chunk of stream as any) { if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) { break @@ -368,17 +377,21 @@ export abstract class BaseOpenAiProvider extends BaseProvider { } if (isFirstChunk) { isFirstChunk = false - time_first_token_millsec_delta = new Date().getTime() - start_time_millsec + finalMetrics.time_first_token_millsec = new Date().getTime() - start_time_millsec } content += delta.content onChunk({ type: ChunkType.TEXT_DELTA, text: delta.content }) } if (!isEmpty(finishReason) || chunk?.annotations) { onChunk({ type: ChunkType.TEXT_COMPLETE, text: content }) - final_time_completion_millsec_delta = new Date().getTime() - start_time_millsec + finalMetrics.time_completion_millsec = new Date().getTime() - start_time_millsec if (chunk.usage) { - lastUsage = chunk.usage + const usage = chunk.usage as OpenAI.Completions.CompletionUsage + finalUsage.completion_tokens = usage.completion_tokens + finalUsage.prompt_tokens = usage.prompt_tokens + finalUsage.total_tokens = usage.total_tokens } + finalMetrics.completion_tokens = finalUsage.completion_tokens } if (delta?.annotations) { onChunk({ @@ -393,12 +406,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider { onChunk({ type: ChunkType.BLOCK_COMPLETE, response: { - usage: lastUsage, - metrics: { - completion_tokens: lastUsage?.completion_tokens, - time_completion_millsec: final_time_completion_millsec_delta, - time_first_token_millsec: time_first_token_millsec_delta - } + usage: finalUsage, + metrics: finalMetrics } }) } @@ -428,7 +437,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider { type: 'input_text' } if (isSupportedReasoningEffortOpenAIModel(model)) { - systemMessageInput.text = `Formatting re-enabled${systemMessageInput.text ? '\n' + systemMessageInput.text : ''}` systemMessage.role = 'developer' } @@ -455,9 +463,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider { userMessage.push(await this.getReponseMessageParam(message, model)) } - let time_first_token_millsec = 0 - const start_time_millsec = new Date().getTime() - const lastUserMessage = _messages.findLast((m) => m.role === 'user') const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true) const { signal } = abortController @@ -470,6 +475,18 @@ export abstract class BaseOpenAiProvider extends BaseProvider { reqMessages = [systemMessage, ...userMessage].filter(Boolean) as OpenAI.Responses.EasyInputMessage[] } + const finalUsage: Usage = { + completion_tokens: 0, + prompt_tokens: 0, + total_tokens: 0 + } + + const finalMetrics: Metrics = { + completion_tokens: 0, + time_completion_millsec: 0, + time_first_token_millsec: 0 + } + const toolResponses: MCPToolResponse[] = [] const processToolResults = async (toolResults: Awaited>, idx: number) => { @@ -549,6 +566,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider { idx: number ) => { const toolCalls: OpenAI.Responses.ResponseFunctionToolCall[] = [] + let time_first_token_millsec = 0 + const start_time_millsec = new Date().getTime() if (!streamOutput) { const nonStream = stream as OpenAI.Responses.Response @@ -633,17 +652,15 @@ export abstract class BaseOpenAiProvider extends BaseProvider { const outputItems: OpenAI.Responses.ResponseOutputItem[] = [] - let lastUsage: Usage | undefined = undefined - let final_time_completion_millsec_delta = 0 for await (const chunk of stream as Stream) { if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) { break } switch (chunk.type) { - case 'response.created': - time_first_token_millsec = new Date().getTime() - break case 'response.output_item.added': + if (time_first_token_millsec === 0) { + time_first_token_millsec = new Date().getTime() + } if (chunk.item.type === 'function_call') { outputItems.push(chunk.item) } @@ -708,18 +725,18 @@ export abstract class BaseOpenAiProvider extends BaseProvider { } break case 'response.completed': { - final_time_completion_millsec_delta = new Date().getTime() - start_time_millsec const completion_tokens = (chunk.response.usage?.output_tokens || 0) + (chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0) const total_tokens = (chunk.response.usage?.total_tokens || 0) + (chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0) - lastUsage = { - completion_tokens, - prompt_tokens: chunk.response.usage?.input_tokens || 0, - total_tokens - } + finalUsage.completion_tokens += completion_tokens + finalUsage.prompt_tokens += chunk.response.usage?.input_tokens || 0 + finalUsage.total_tokens += total_tokens + finalMetrics.completion_tokens += completion_tokens + finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec + finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec break } case 'error': @@ -761,12 +778,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider { onChunk({ type: ChunkType.BLOCK_COMPLETE, response: { - usage: lastUsage, - metrics: { - completion_tokens: lastUsage?.completion_tokens, - time_completion_millsec: final_time_completion_millsec_delta, - time_first_token_millsec: time_first_token_millsec - start_time_millsec - } + usage: finalUsage, + metrics: finalMetrics } }) } diff --git a/src/renderer/src/services/StreamProcessingService.ts b/src/renderer/src/services/StreamProcessingService.ts index 1d8bef15b3..818345523e 100644 --- a/src/renderer/src/services/StreamProcessingService.ts +++ b/src/renderer/src/services/StreamProcessingService.ts @@ -59,7 +59,7 @@ export function createStreamProcessor(callbacks: StreamProcessorCallbacks = {}) callbacks.onTextComplete(data.text) } if (data.type === ChunkType.THINKING_DELTA && callbacks.onThinkingChunk) { - callbacks.onThinkingChunk(data.text, data.thinking_millsec) + callbacks.onThinkingChunk(data.text) } if (data.type === ChunkType.THINKING_COMPLETE && callbacks.onThinkingComplete) { callbacks.onThinkingComplete(data.text, data.thinking_millsec) diff --git a/src/renderer/src/store/index.ts b/src/renderer/src/store/index.ts index df7af60f69..abfc334986 100644 --- a/src/renderer/src/store/index.ts +++ b/src/renderer/src/store/index.ts @@ -46,7 +46,7 @@ const persistedReducer = persistReducer( { key: 'cherry-studio', storage, - version: 99, + version: 100, blacklist: ['runtime', 'messages', 'messageBlocks'], migrate }, diff --git a/src/renderer/src/store/migrate.ts b/src/renderer/src/store/migrate.ts index ab852b18f6..b2c94ce561 100644 --- a/src/renderer/src/store/migrate.ts +++ b/src/renderer/src/store/migrate.ts @@ -1296,6 +1296,16 @@ const migrateConfig = { } catch (error) { return state } + }, + '100': (state: RootState) => { + try { + if (!state.settings.zoomFactor) { + state.settings.zoomFactor = 1 + } + return state + } catch (error) { + return state + } } } diff --git a/src/renderer/src/store/thunk/messageThunk.ts b/src/renderer/src/store/thunk/messageThunk.ts index 5dd8d5984d..5d40439626 100644 --- a/src/renderer/src/store/thunk/messageThunk.ts +++ b/src/renderer/src/store/thunk/messageThunk.ts @@ -398,7 +398,7 @@ const fetchAndProcessAssistantResponseImpl = async ( } else { const newBlock = createThinkingBlock(assistantMsgId, accumulatedThinking, { status: MessageBlockStatus.STREAMING, - thinking_millsec: thinking_millsec + thinking_millsec: 0 }) handleBlockTransition(newBlock, MessageBlockType.THINKING) } @@ -565,7 +565,7 @@ const fetchAndProcessAssistantResponseImpl = async ( message: pauseErrorLanguagePlaceholder || error.message || 'Stream processing error', originalMessage: error.message, stack: error.stack, - status: error.status, + status: error.status || error.code, requestId: error.request_id } if (lastBlockId) { @@ -609,13 +609,13 @@ const fetchAndProcessAssistantResponseImpl = async ( // 更新topic的name autoRenameTopic(assistant, topicId) - if (response && !response.usage) { + if (response && response.usage?.total_tokens === 0) { const usage = await estimateMessagesUsage({ assistant, messages: finalContextWithAssistant }) response.usage = usage } } if (response && response.metrics) { - if (!response.metrics.completion_tokens && response.usage) { + if (response.metrics.completion_tokens === 0 && response.usage?.completion_tokens) { response = { ...response, metrics: { diff --git a/src/renderer/src/types/index.ts b/src/renderer/src/types/index.ts index e09e826b1b..0873fc0a99 100644 --- a/src/renderer/src/types/index.ts +++ b/src/renderer/src/types/index.ts @@ -121,8 +121,8 @@ export type Usage = OpenAI.Completions.CompletionUsage & { } export type Metrics = { - completion_tokens?: number - time_completion_millsec?: number + completion_tokens: number + time_completion_millsec: number time_first_token_millsec?: number time_thinking_millsec?: number }