fix: message and rerank errors

commit 1c90e23d76a3e1008408bf29add122ccab7dbe6d
Merge: 4e792033 1fde0999
Author: kangfenmao <kangfenmao@qq.com>
Date:   Sun May 11 18:43:56 2025 +0800

    Merge branch 'main' into fix/next-release-bugs

commit 4e7920336d838501b900a18f1c254f0d17fb54b8
Author: suyao <sy20010504@gmail.com>
Date:   Sun May 11 18:09:58 2025 +0800

    refactor(GeminiProvider): implement image generation handling in chat responses

commit cd1ce4c0c65bc2a111ddb3112722cb36e66b2515
Merge: 968de188 235122c8
Author: suyao <sy20010504@gmail.com>
Date:   Sun May 11 16:45:29 2025 +0800

    Merge branch 'main' into fix/next-release-bugs

commit 968de18866abacc55fd9bd74c8d618871e64ade6
Author: suyao <sy20010504@gmail.com>
Date:   Sun May 11 16:41:38 2025 +0800

    fix: add new image generation models to the configuration

commit 1eaf5801b4c0e2c3fa1aa2ed829b20d97ea57d3f
Merge: cb76588d e6655fff
Author: suyao <sy20010504@gmail.com>
Date:   Sun May 11 13:14:17 2025 +0800

    Merge branch 'main' into fix/next-release-bugs

commit cb76588dc692f35da1f3d4fdbc9217c6a1a36501
Author: suyao <sy20010504@gmail.com>
Date:   Sun May 11 13:13:20 2025 +0800

    fix: enhance error handling and metrics tracking across AI providers and message processing

commit c2d6bdabc00b48419773d08c7d6630803d6310c8
Author: suyao <sy20010504@gmail.com>
Date:   Sun May 11 05:36:52 2025 +0800

    fix: update regex for function calling models and improve time tracking logic

commit 95340b87d0bba3cdcd173a181953afa42b26da9b
Author: suyao <sy20010504@gmail.com>
Date:   Sun May 11 05:05:05 2025 +0800

    fix: adjust thinking millisecond handling in message thunk

commit f4d4d3901603f14df616582fa537f3d9c1a694eb
Author: suyao <sy20010504@gmail.com>
Date:   Sun May 11 03:52:26 2025 +0800

    fix: remove 'auto' option from qwen model supported options

commit e26f603dfe1a9146b0575142363d5ceab30e32df
Author: suyao <sy20010504@gmail.com>
Date:   Sun May 11 03:50:05 2025 +0800

    fix: add support for inline base64 image data in image block

commit bb0093c656b2b72158db1bf7bfef6aae46b8096c
Merge: f9d1339b d39584fc
Author: suyao <sy20010504@gmail.com>
Date:   Sun May 11 03:00:57 2025 +0800

    Merge branch 'main' into fix/next-release-bugs

commit f9d1339bd3367a4f80da23aac1fdc73b4cd2a075
Author: suyao <sy20010504@gmail.com>
Date:   Sun May 11 03:00:11 2025 +0800

    fix:  set default zoomFactor in settings

commit 7cf6fd685662a012e2460e722edcbe5ed12f1a1c
Merge: ba9c4482 3bebfe27
Author: suyao <sy20010504@gmail.com>
Date:   Sun May 11 01:31:05 2025 +0800

    Merge branch 'main' into fix/next-release-bugs

commit ba9c44828578a3b5cc9fd1aaba80158615921785
Merge: 97dffe71 3bf0b6b3
Author: kangfenmao <kangfenmao@qq.com>
Date:   Sat May 10 20:18:44 2025 +0800

    Merge branch 'main' into fix/next-release-bugs

commit 97dffe719ee00ace2325c80022a48c44f03d6e26
Author: lizhixuan <daoquqiexing@gmail.com>
Date:   Sat May 10 11:38:01 2025 +0800

    feat(MessageMenubar): add edit option to dropdown for single message editing

commit 70157439a1d9778cacc87b1781a84d159a8d3f33
Author: lizhixuan <daoquqiexing@gmail.com>
Date:   Sat May 10 10:53:10 2025 +0800

    refactor(StreamProcessingService): comment out console.log for cleaner code

commit fa33ba77a9306ad316f34da4149858192079f7a2
Author: lizhixuan <daoquqiexing@gmail.com>
Date:   Sat May 10 10:52:08 2025 +0800

    refactor(messageThunk): remove console.log statements for cleaner code

commit 6544c5d2990adf1943195e1d4d11383859a05488
Author: kangfenmao <kangfenmao@qq.com>
Date:   Sat May 10 10:17:44 2025 +0800

    feat(i18n): add download success and failure messages in multiple languages

commit e23bb6744a4a99b2062012691340f78fad4e1952
Merge: 55c5c553 60cc1dee
Author: kangfenmao <kangfenmao@qq.com>
Date:   Sat May 10 09:54:38 2025 +0800

    Merge branch 'main' into fix/next-release-bugs

commit 55c5c5533eab46a1de9c5511433ed1b1b9e90512
Author: suyao <sy20010504@gmail.com>
Date:   Fri May 9 22:19:35 2025 +0800

    fix: update styled component props to use dollar sign prefix for consistency

commit 7a5839e0efdf3eb648b3d26cdf7ce131a2821f6a
Author: suyao <sy20010504@gmail.com>
Date:   Fri May 9 22:02:06 2025 +0800

    fix: prevent default action in handleLinkClick for better link handling

commit ecb075fddfc2bc5796a804ccde29aaa762d85da6
Merge: df149608 963f04f7
Author: suyao <sy20010504@gmail.com>
Date:   Fri May 9 21:55:54 2025 +0800

    Merge branch 'main' into fix/next-release-bugs

commit df149608904039903d74d7a72a722b6c1a567ee3
Merge: 93bd4eb9 e29a9303
Author: suyao <sy20010504@gmail.com>
Date:   Fri May 9 21:48:13 2025 +0800

    Merge branch 'main' into fix/next-release-bugs

commit 93bd4eb907816414af5c27efbaa2d3d014c707fc
Merge: c4d1deb6 38ff9b90
Author: suyao <sy20010504@gmail.com>
Date:   Fri May 9 21:27:08 2025 +0800

    Merge branch 'fix/next-release-bugs' of github.com:CherryHQ/cherry-studio into fix/next-release-bugs

commit c4d1deb6911977a23cf731db6bed80b8352557ff
Author: suyao <sy20010504@gmail.com>
Date:   Fri May 9 21:24:48 2025 +0800

    feat: enhance citation handling and add metadata support in citation blocks

commit 38ff9b90b8fee91ed1fba7b83c9470bc40bd3429
Author: MyPrototypeWhat <daoquqiexing@gmail.com>
Date:   Fri May 9 19:47:24 2025 +0800

    fix: enhance logging and update async handling in StreamProcessingService and messageThunk

    - Enabled logging in `createStreamProcessor` for better debugging.
    - Added logging for updated messages in `updateExistingMessageAndBlocksInDB` and `saveUpdatesToDB`.
    - Updated `onTextComplete` and `onLLMWebSearchComplete` to handle asynchronous operations correctly.
    - Commented out unused `saveUpdatedBlockToDB` calls to prevent unnecessary database updates.

commit cda0215c9c4e007c2c7240c3c9c8521fb7111774
Author: MyPrototypeWhat <daoquqiexing@gmail.com>
Date:   Fri May 9 18:47:55 2025 +0800

    refactor: optimize block update logic and remove unused code

    - Updated `throttledBlockUpdate` to handle asynchronous updates directly.
    - Removed the unused `throttledBlockDbUpdate` function and its related logic.
    - Added cancellation for throttled updates on error and completion to improve performance and reliability.
    - Cleaned up commented-out code for better readability.

commit de2f5b09c8384eabd4df7253047b838a2759671a
Author: MyPrototypeWhat <daoquqiexing@gmail.com>
Date:   Fri May 9 18:42:00 2025 +0800

    refactor: update message handling and state management

    - Simplified message editing logic by removing unnecessary success/error logging.
    - Added `updatedAt` timestamp to message updates for better tracking.
    - Refactored `editMessageBlocks` to accept message ID and updates directly.
    - Removed unused `getTopicLimit` function from `TopicManager`.
    - Updated message rendering to use `updatedAt` when available.
    - Enhanced type definitions to include `updatedAt` in message structure.

commit 700fa13971cafb04314817d2d8732c8fbf33c9d7
Author: suyao <sy20010504@gmail.com>
Date:   Fri May 9 16:19:55 2025 +0800

    Remove Zhipu mode and text-only link handling

commit 06bd1338cd671b255e477cec76b12663ea759f4c
Author: kangfenmao <kangfenmao@qq.com>
Date:   Fri May 9 15:49:02 2025 +0800

    fix: update citation rendering logic in MainTextBlock component

    - Added a check to determine if the citation URL is a valid link.
    - Updated citation tag formatting to conditionally include the link based on the URL validity.

commit e96c9a569f7708816f57505975d30667929eeb19
Author: kangfenmao <kangfenmao@qq.com>
Date:   Thu May 8 18:31:14 2025 +0800

    style: update ChatNavigation and CitationsList components for improved UI consistency

    - Added header style to remove borders in ChatNavigation.
    - Enhanced CitationsList with new Skeleton loading state and improved layout for citation cards.
    - Refactored CitationLink to a div for better styling control and adjusted padding in OpenButton for a more polished appearance.
This commit is contained in:
kangfenmao 2025-05-11 18:44:28 +08:00
parent 1fde0999f8
commit a64c8ded73
15 changed files with 479 additions and 379 deletions

View File

@ -189,7 +189,7 @@ export const TEXT_TO_IMAGE_REGEX = /flux|diffusion|stabilityai|sd-|dall|cogview|
// Reasoning models
export const REASONING_REGEX =
/^(o\d+(?:-[\w-]+)?|.*\b(?:reasoner|thinking)\b.*|.*-[rR]\d+.*|.*\bqwq(?:-[\w-]+)?\b.*|.*\bhunyuan-t1(?:-[\w-]+)?\b.*|.*\bglm-zero-preview\b.*|.*\bgrok-3-mini(?:-[\w-]+)?\b.*)$/i
/^(o\d+(?:-[\w-]+)?|.*\b(?:reasoning|reasoner|thinking)\b.*|.*-[rR]\d+.*|.*\bqwq(?:-[\w-]+)?\b.*|.*\bhunyuan-t1(?:-[\w-]+)?\b.*|.*\bglm-zero-preview\b.*|.*\bgrok-3-mini(?:-[\w-]+)?\b.*)$/i
// Embedding models
export const EMBEDDING_REGEX =
@ -206,7 +206,7 @@ export const FUNCTION_CALLING_MODELS = [
'gpt-4o-mini',
'gpt-4',
'gpt-4.5',
'o1(?:-[\\w-]+)?',
'o(1|3|4)(?:-[\\w-]+)?',
'claude',
'qwen',
'qwen3',
@ -2153,11 +2153,11 @@ export const TEXT_TO_IMAGES_MODELS_SUPPORT_IMAGE_ENHANCEMENT = [
export const GENERATE_IMAGE_MODELS = [
'gemini-2.0-flash-exp-image-generation',
'gemini-2.0-flash-preview-image-generation',
'gemini-2.0-flash-exp',
'grok-2-image-1212',
'grok-2-image',
'grok-2-image-latest',
'gpt-4o-image',
'gpt-image-1'
]
@ -2172,6 +2172,7 @@ export const GEMINI_SEARCH_MODELS = [
'gemini-2.5-pro-exp-03-25',
'gemini-2.5-pro-preview',
'gemini-2.5-pro-preview-03-25',
'gemini-2.5-pro-preview-05-06',
'gemini-2.5-flash-preview',
'gemini-2.5-flash-preview-04-17'
]

View File

@ -35,7 +35,7 @@ const MODEL_SUPPORTED_OPTIONS: Record<string, ThinkingOption[]> = {
default: ['off', 'low', 'medium', 'high'],
grok: ['off', 'low', 'high'],
gemini: ['off', 'low', 'medium', 'high', 'auto'],
qwen: ['off', 'low', 'medium', 'high', 'auto']
qwen: ['off', 'low', 'medium', 'high']
}
// 选项转换映射表:当选项不支持时使用的替代选项

View File

@ -16,7 +16,7 @@ const MessageErrorInfo: React.FC<{ block: ErrorMessageBlock }> = ({ block }) =>
const HTTP_ERROR_CODES = [400, 401, 403, 404, 429, 500, 502, 503, 504]
if (block.error && HTTP_ERROR_CODES.includes(block.error?.status)) {
return <Alert description={t(`error.http.${block.error.status}`)} type="error" />
return <Alert description={t(`error.http.${block.error.status}`)} message={block.error?.message} type="error" />
}
if (block?.error?.message) {
const errorKey = `error.${block.error.message}`

View File

@ -4,7 +4,7 @@ import { MessageBlockStatus, type ThinkingMessageBlock } from '@renderer/types/n
import { Collapse, message as antdMessage, Tooltip } from 'antd'
import { Lightbulb } from 'lucide-react'
import { motion } from 'motion/react'
import { memo, useCallback, useEffect, useMemo, useState } from 'react'
import { memo, useCallback, useEffect, useMemo, useRef, useState } from 'react'
import { useTranslation } from 'react-i18next'
import styled from 'styled-components'
@ -40,6 +40,8 @@ const ThinkingBlock: React.FC<Props> = ({ block }) => {
const { t } = useTranslation()
const { messageFont, fontSize, thoughtAutoCollapse } = useSettings()
const [activeKey, setActiveKey] = useState<'thought' | ''>(thoughtAutoCollapse ? '' : 'thought')
const [thinkingTime, setThinkingTime] = useState(block.thinking_millsec || 0)
const intervalId = useRef<NodeJS.Timeout>(null)
const isThinking = useMemo(() => block.status === MessageBlockStatus.STREAMING, [block.status])
@ -73,13 +75,31 @@ const ThinkingBlock: React.FC<Props> = ({ block }) => {
}
}, [block.content, t])
// FIXME: 这里统计的和请求处统计的有一定误差
useEffect(() => {
if (isThinking) {
intervalId.current = setInterval(() => {
setThinkingTime((prev) => prev + 100)
}, 100)
} else if (intervalId.current) {
// 立即清除计时器
clearInterval(intervalId.current)
intervalId.current = null
}
return () => {
if (intervalId.current) {
window.clearInterval(intervalId.current)
}
}
}, [isThinking])
const thinkingTimeSeconds = useMemo(() => (thinkingTime / 1000).toFixed(1), [thinkingTime])
if (!block.content) {
return null
}
const thinkingTime = block.thinking_millsec || 0
const thinkingTimeSeconds = (thinkingTime / 1000).toFixed(1)
return (
<CollapseContainer
activeKey={activeKey}

View File

@ -10,35 +10,6 @@ interface Props {
}
const MessageContent: React.FC<Props> = ({ message }) => {
// const { t } = useTranslation()
// if (message.status === 'pending') {
// return (
// )
// }
// if (message.status === 'searching') {
// return (
// <SearchingContainer>
// <Search size={24} />
// <SearchingText>{t('message.searching')}</SearchingText>
// <BarLoader color="#1677ff" />
// </SearchingContainer>
// )
// }
// if (message.status === 'error') {
// return <MessageError message={message} />
// }
// if (message.type === '@' && model) {
// const content = `[@${model.name}](#) ${getBriefInfo(message.content)}`
// return <Markdown message={{ ...message, content }} />
// }
// const toolUseRegex = /<tool_use>([\s\S]*?)<\/tool_use>/g
// console.log('message', message)
return (
<>
<Flex gap="8px" wrap style={{ marginBottom: 10 }}>

View File

@ -1,45 +0,0 @@
import type { ErrorMessageBlock } from '@renderer/types/newMessage'
import { Alert as AntdAlert } from 'antd'
import { FC } from 'react'
import { useTranslation } from 'react-i18next'
import styled from 'styled-components'
const MessageError: FC<{ block: ErrorMessageBlock }> = ({ block }) => {
return (
<>
{/* <Markdown block={block} role={role} />
{block.error && (
<Markdown
message={{
...block,
content: formatErrorMessage(block.error)
}}
/>
)} */}
<MessageErrorInfo block={block} />
</>
)
}
const MessageErrorInfo: FC<{ block: ErrorMessageBlock }> = ({ block }) => {
const { t } = useTranslation()
const HTTP_ERROR_CODES = [400, 401, 403, 404, 429, 500, 502, 503, 504]
console.log('block', block)
if (block.error && HTTP_ERROR_CODES.includes(block.error?.status)) {
return <Alert description={t(`error.http.${block.error.status}`)} type="error" />
}
if (block?.error?.message) {
return <Alert description={block.error.message} type="error" />
}
return <Alert description={t('error.chat.response')} type="error" />
}
const Alert = styled(AntdAlert)`
margin: 15px 0 8px;
padding: 10px;
font-size: 12px;
`
export default MessageError

View File

@ -30,10 +30,12 @@ import {
MCPCallToolResponse,
MCPTool,
MCPToolResponse,
Metrics,
Model,
Provider,
Suggestion,
ToolCallResponse,
Usage,
WebSearchSource
} from '@renderer/types'
import { ChunkType } from '@renderer/types/chunk'
@ -47,7 +49,7 @@ import {
} from '@renderer/utils/mcp-tools'
import { findFileBlocks, findImageBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find'
import { buildSystemPrompt } from '@renderer/utils/prompt'
import { first, flatten, sum, takeRight } from 'lodash'
import { first, flatten, takeRight } from 'lodash'
import OpenAI from 'openai'
import { CompletionsParams } from '.'
@ -270,77 +272,82 @@ export default class AnthropicProvider extends BaseProvider {
...this.getCustomParameters(assistant)
}
let time_first_token_millsec = 0
let time_first_content_millsec = 0
let checkThinkingContent = false
let thinking_content = ''
const start_time_millsec = new Date().getTime()
if (!streamOutput) {
const message = await this.sdk.messages.create({ ...body, stream: false })
const time_completion_millsec = new Date().getTime() - start_time_millsec
let text = ''
let reasoning_content = ''
if (message.content && message.content.length > 0) {
const thinkingBlock = message.content.find((block) => block.type === 'thinking')
const textBlock = message.content.find((block) => block.type === 'text')
if (thinkingBlock && 'thinking' in thinkingBlock) {
reasoning_content = thinkingBlock.thinking
}
if (textBlock && 'text' in textBlock) {
text = textBlock.text
}
}
return onChunk({
type: ChunkType.BLOCK_COMPLETE,
response: {
text,
reasoning_content,
usage: message.usage as any,
metrics: {
completion_tokens: message.usage.output_tokens,
time_completion_millsec,
time_first_token_millsec: 0
}
}
})
}
const { abortController, cleanup } = this.createAbortController(lastUserMessage?.id)
const { signal } = abortController
const finalUsage: Usage = {
completion_tokens: 0,
prompt_tokens: 0,
total_tokens: 0
}
const finalMetrics: Metrics = {
completion_tokens: 0,
time_completion_millsec: 0,
time_first_token_millsec: 0
}
const toolResponses: MCPToolResponse[] = []
const processStream = (body: MessageCreateParamsNonStreaming, idx: number) => {
const processStream = async (body: MessageCreateParamsNonStreaming, idx: number) => {
let time_first_token_millsec = 0
const start_time_millsec = new Date().getTime()
if (!streamOutput) {
const message = await this.sdk.messages.create({ ...body, stream: false })
const time_completion_millsec = new Date().getTime() - start_time_millsec
let text = ''
let reasoning_content = ''
if (message.content && message.content.length > 0) {
const thinkingBlock = message.content.find((block) => block.type === 'thinking')
const textBlock = message.content.find((block) => block.type === 'text')
if (thinkingBlock && 'thinking' in thinkingBlock) {
reasoning_content = thinkingBlock.thinking
}
if (textBlock && 'text' in textBlock) {
text = textBlock.text
}
}
return onChunk({
type: ChunkType.BLOCK_COMPLETE,
response: {
text,
reasoning_content,
usage: message.usage as any,
metrics: {
completion_tokens: message.usage.output_tokens,
time_completion_millsec,
time_first_token_millsec: 0
}
}
})
}
let thinking_content = ''
let isFirstChunk = true
return new Promise<void>((resolve, reject) => {
// 等待接口返回流
const toolCalls: ToolUseBlock[] = []
let hasThinkingContent = false
this.sdk.messages
.stream({ ...body, stream: true }, { signal, timeout: 5 * 60 * 1000 })
.on('text', (text) => {
if (hasThinkingContent && !checkThinkingContent) {
checkThinkingContent = true
onChunk({
type: ChunkType.THINKING_COMPLETE,
text: thinking_content,
thinking_millsec: new Date().getTime() - time_first_content_millsec
})
}
if (time_first_token_millsec == 0) {
time_first_token_millsec = new Date().getTime()
}
thinking_content = ''
checkThinkingContent = false
hasThinkingContent = false
if (!hasThinkingContent && time_first_content_millsec === 0) {
time_first_content_millsec = new Date().getTime()
if (isFirstChunk) {
isFirstChunk = false
if (time_first_token_millsec == 0) {
time_first_token_millsec = new Date().getTime()
} else {
onChunk({
type: ChunkType.THINKING_COMPLETE,
text: thinking_content,
thinking_millsec: new Date().getTime() - time_first_token_millsec
})
}
}
onChunk({ type: ChunkType.TEXT_DELTA, text })
@ -372,34 +379,22 @@ export default class AnthropicProvider extends BaseProvider {
})
}
}
if (block.type === 'tool_use') {
toolCalls.push(block)
}
})
.on('thinking', (thinking) => {
hasThinkingContent = true
const currentTime = new Date().getTime() // Get current time for each chunk
if (time_first_token_millsec == 0) {
time_first_token_millsec = currentTime
time_first_token_millsec = new Date().getTime()
}
// Set time_first_content_millsec ONLY when the first content (thinking or text) arrives
if (time_first_content_millsec === 0) {
time_first_content_millsec = currentTime
}
// Calculate thinking time as time elapsed since start until this chunk
const thinking_time = currentTime - time_first_content_millsec
onChunk({
type: ChunkType.THINKING_DELTA,
text: thinking,
thinking_millsec: thinking_time
thinking_millsec: new Date().getTime() - time_first_token_millsec
})
thinking_content += thinking
})
.on('contentBlock', (content) => {
if (content.type === 'tool_use') {
toolCalls.push(content)
}
})
.on('finalMessage', async (message) => {
const toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
// tool call
@ -458,29 +453,28 @@ export default class AnthropicProvider extends BaseProvider {
newBody.messages = userMessages
onChunk({ type: ChunkType.LLM_RESPONSE_CREATED })
await processStream(newBody, idx + 1)
try {
await processStream(newBody, idx + 1)
} catch (error) {
console.error('Error processing stream:', error)
reject(error)
}
}
const time_completion_millsec = new Date().getTime() - start_time_millsec
finalUsage.prompt_tokens += message.usage.input_tokens
finalUsage.completion_tokens += message.usage.output_tokens
finalUsage.total_tokens += finalUsage.prompt_tokens + finalUsage.completion_tokens
finalMetrics.completion_tokens = finalUsage.completion_tokens
finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec
onChunk({
type: ChunkType.BLOCK_COMPLETE,
response: {
usage: {
prompt_tokens: message.usage.input_tokens,
completion_tokens: message.usage.output_tokens,
total_tokens: sum(Object.values(message.usage))
},
metrics: {
completion_tokens: message.usage.output_tokens,
time_completion_millsec,
time_first_token_millsec: time_first_token_millsec - start_time_millsec
}
usage: finalUsage,
metrics: finalMetrics
}
})
// FIXME: 临时方案,重置时间戳和思考内容
time_first_token_millsec = 0
time_first_content_millsec = 0
resolve()
})
.on('error', (error) => reject(error))

View File

@ -40,6 +40,7 @@ import {
MCPCallToolResponse,
MCPTool,
MCPToolResponse,
Metrics,
Model,
Provider,
Suggestion,
@ -126,7 +127,6 @@ export default class GeminiProvider extends BaseProvider {
* @returns The message contents
*/
private async getMessageContents(message: Message): Promise<Content> {
console.log('getMessageContents', message)
const role = message.role === 'user' ? 'user' : 'model'
const parts: Part[] = [{ text: await this.getMessageContent(message) }]
// Add any generated images from previous responses
@ -153,6 +153,16 @@ export default class GeminiProvider extends BaseProvider {
}
}
}
const file = imageBlock.file
if (file) {
const base64Data = await window.api.file.base64Image(file.id + file.ext)
parts.push({
inlineData: {
data: base64Data.base64,
mimeType: base64Data.mime
} as Part['inlineData']
})
}
}
const fileBlocks = findFileBlocks(message)
@ -186,6 +196,50 @@ export default class GeminiProvider extends BaseProvider {
}
}
private async getImageFileContents(message: Message): Promise<Content> {
const role = message.role === 'user' ? 'user' : 'model'
const content = getMainTextContent(message)
const parts: Part[] = [{ text: content }]
const imageBlocks = findImageBlocks(message)
for (const imageBlock of imageBlocks) {
if (
imageBlock.metadata?.generateImageResponse?.images &&
imageBlock.metadata.generateImageResponse.images.length > 0
) {
for (const imageUrl of imageBlock.metadata.generateImageResponse.images) {
if (imageUrl && imageUrl.startsWith('data:')) {
// Extract base64 data and mime type from the data URL
const matches = imageUrl.match(/^data:(.+);base64,(.*)$/)
if (matches && matches.length === 3) {
const mimeType = matches[1]
const base64Data = matches[2]
parts.push({
inlineData: {
data: base64Data,
mimeType: mimeType
} as Part['inlineData']
})
}
}
}
}
const file = imageBlock.file
if (file) {
const base64Data = await window.api.file.base64Image(file.id + file.ext)
parts.push({
inlineData: {
data: base64Data.base64,
mimeType: base64Data.mime
} as Part['inlineData']
})
}
}
return {
role,
parts: parts
}
}
/**
* Get the safety settings
* @returns The safety settings
@ -273,6 +327,18 @@ export default class GeminiProvider extends BaseProvider {
}: CompletionsParams): Promise<void> {
const defaultModel = getDefaultModel()
const model = assistant.model || defaultModel
let canGenerateImage = false
if (isGenerateImageModel(model)) {
if (model.id === 'gemini-2.0-flash-exp') {
canGenerateImage = assistant.enableGenerateImage!
} else {
canGenerateImage = true
}
}
if (canGenerateImage) {
await this.generateImageByChat({ messages, assistant, onChunk })
return
}
const { contextCount, maxTokens, streamOutput, enableToolUse } = getAssistantSettings(assistant)
const userMessages = filterUserRoleStartMessages(
@ -309,21 +375,10 @@ export default class GeminiProvider extends BaseProvider {
})
}
let canGenerateImage = false
if (isGenerateImageModel(model)) {
if (model.id === 'gemini-2.0-flash-exp') {
canGenerateImage = assistant.enableGenerateImage!
} else {
canGenerateImage = true
}
}
const generateContentConfig: GenerateContentConfig = {
responseModalities: canGenerateImage ? [Modality.TEXT, Modality.IMAGE] : undefined,
responseMimeType: canGenerateImage ? 'text/plain' : undefined,
safetySettings: this.getSafetySettings(),
// generate image don't need system instruction
systemInstruction: isGemmaModel(model) || canGenerateImage ? undefined : systemInstruction,
systemInstruction: isGemmaModel(model) ? undefined : systemInstruction,
temperature: assistant?.settings?.temperature,
topP: assistant?.settings?.topP,
maxOutputTokens: maxTokens,
@ -360,8 +415,17 @@ export default class GeminiProvider extends BaseProvider {
}
}
const start_time_millsec = new Date().getTime()
let time_first_token_millsec = 0
const finalUsage: Usage = {
completion_tokens: 0,
prompt_tokens: 0,
total_tokens: 0
}
const finalMetrics: Metrics = {
completion_tokens: 0,
time_completion_millsec: 0,
time_first_token_millsec: 0
}
const { cleanup, abortController } = this.createAbortController(userLastMessage?.id, true)
@ -435,6 +499,8 @@ export default class GeminiProvider extends BaseProvider {
history.push(messageContents)
let functionCalls: FunctionCall[] = []
let time_first_token_millsec = 0
const start_time_millsec = new Date().getTime()
if (stream instanceof GenerateContentResponse) {
let content = ''
@ -494,45 +560,28 @@ export default class GeminiProvider extends BaseProvider {
} as BlockCompleteChunk)
} else {
let content = ''
let final_time_completion_millsec = 0
let lastUsage: Usage | undefined = undefined
for await (const chunk of stream) {
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) break
// --- Calculate Metrics ---
if (time_first_token_millsec == 0 && chunk.text !== undefined) {
// Update based on text arrival
time_first_token_millsec = new Date().getTime() - start_time_millsec
if (time_first_token_millsec == 0) {
time_first_token_millsec = new Date().getTime()
}
// 1. Text Content
if (chunk.text !== undefined) {
content += chunk.text
onChunk({ type: ChunkType.TEXT_DELTA, text: chunk.text })
}
// 2. Usage Data
if (chunk.usageMetadata) {
lastUsage = {
prompt_tokens: chunk.usageMetadata.promptTokenCount || 0,
completion_tokens: chunk.usageMetadata.candidatesTokenCount || 0,
total_tokens: chunk.usageMetadata.totalTokenCount || 0
}
final_time_completion_millsec = new Date().getTime() - start_time_millsec
}
// 4. Image Generation
const generateImage = this.processGeminiImageResponse(chunk, onChunk)
if (generateImage?.images?.length) {
onChunk({ type: ChunkType.IMAGE_COMPLETE, image: generateImage })
}
if (chunk.candidates?.[0]?.finishReason) {
if (chunk.text) {
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
}
if (chunk.usageMetadata) {
finalUsage.prompt_tokens += chunk.usageMetadata.promptTokenCount || 0
finalUsage.completion_tokens += chunk.usageMetadata.candidatesTokenCount || 0
finalUsage.total_tokens += chunk.usageMetadata.totalTokenCount || 0
}
if (chunk.candidates?.[0]?.groundingMetadata) {
// 3. Grounding/Search Metadata
const groundingMetadata = chunk.candidates?.[0]?.groundingMetadata
onChunk({
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
@ -551,35 +600,37 @@ export default class GeminiProvider extends BaseProvider {
functionCalls = functionCalls.concat(chunk.functionCalls)
}
onChunk({
type: ChunkType.BLOCK_COMPLETE,
response: {
metrics: {
completion_tokens: lastUsage?.completion_tokens,
time_completion_millsec: final_time_completion_millsec,
time_first_token_millsec
},
usage: lastUsage
}
})
}
// --- End Incremental onChunk calls ---
// Call processToolUses AFTER potentially processing text content in this chunk
// This assumes tools might be specified within the text stream
// Note: parseAndCallTools inside should handle its own onChunk for tool responses
let toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
if (functionCalls.length) {
toolResults = await processToolCalls(functionCalls)
}
if (content.length) {
toolResults = toolResults.concat(await processToolUses(content))
}
if (toolResults.length) {
await processToolResults(toolResults, idx)
finalMetrics.completion_tokens = finalUsage.completion_tokens
finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
finalMetrics.time_first_token_millsec =
(finalMetrics.time_first_token_millsec || 0) + (time_first_token_millsec - start_time_millsec)
}
}
// --- End Incremental onChunk calls ---
// Call processToolUses AFTER potentially processing text content in this chunk
// This assumes tools might be specified within the text stream
// Note: parseAndCallTools inside should handle its own onChunk for tool responses
let toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
if (functionCalls.length) {
toolResults = await processToolCalls(functionCalls)
}
if (content.length) {
toolResults = toolResults.concat(await processToolUses(content))
}
if (toolResults.length) {
await processToolResults(toolResults, idx)
}
// FIXME: 由于递归会发送n次
onChunk({
type: ChunkType.BLOCK_COMPLETE,
response: {
usage: finalUsage,
metrics: finalMetrics
}
})
}
}
@ -605,17 +656,6 @@ export default class GeminiProvider extends BaseProvider {
})
await processStream(userMessagesStream, 0).finally(cleanup)
const final_time_completion_millsec = new Date().getTime() - start_time_millsec
onChunk({
type: ChunkType.BLOCK_COMPLETE,
response: {
metrics: {
time_completion_millsec: final_time_completion_millsec,
time_first_token_millsec
}
}
})
}
/**
@ -949,8 +989,97 @@ export default class GeminiProvider extends BaseProvider {
return data.embeddings?.[0]?.values?.length || 0
}
public generateImageByChat(): Promise<void> {
throw new Error('Method not implemented.')
public async generateImageByChat({ messages, assistant, onChunk }): Promise<void> {
const defaultModel = getDefaultModel()
const model = assistant.model || defaultModel
const { contextCount, maxTokens } = getAssistantSettings(assistant)
const userMessages = filterUserRoleStartMessages(
filterEmptyMessages(filterContextMessages(takeRight(messages, contextCount + 2)))
)
const userLastMessage = userMessages.pop()
const { abortController } = this.createAbortController(userLastMessage?.id, true)
const { signal } = abortController
const generateContentConfig: GenerateContentConfig = {
responseModalities: [Modality.TEXT, Modality.IMAGE],
responseMimeType: 'text/plain',
safetySettings: this.getSafetySettings(),
temperature: assistant?.settings?.temperature,
topP: assistant?.settings?.top_p,
maxOutputTokens: maxTokens,
abortSignal: signal,
...this.getCustomParameters(assistant)
}
const history: Content[] = []
try {
for (const message of userMessages) {
history.push(await this.getImageFileContents(message))
}
let time_first_token_millsec = 0
const start_time_millsec = new Date().getTime()
onChunk({ type: ChunkType.LLM_RESPONSE_CREATED })
const chat = this.sdk.chats.create({
model: model.id,
config: generateContentConfig,
history: history
})
let content = ''
const finalUsage: Usage = {
prompt_tokens: 0,
completion_tokens: 0,
total_tokens: 0
}
const userMessage: Content = await this.getImageFileContents(userLastMessage!)
const response = await chat.sendMessageStream({
message: userMessage.parts!,
config: {
...generateContentConfig,
abortSignal: signal
}
})
for await (const chunk of response as AsyncGenerator<GenerateContentResponse>) {
if (time_first_token_millsec == 0) {
time_first_token_millsec = new Date().getTime()
}
if (chunk.text !== undefined) {
content += chunk.text
onChunk({ type: ChunkType.TEXT_DELTA, text: chunk.text })
}
const generateImage = this.processGeminiImageResponse(chunk, onChunk)
if (generateImage?.images?.length) {
onChunk({ type: ChunkType.IMAGE_COMPLETE, image: generateImage })
}
if (chunk.candidates?.[0]?.finishReason) {
if (chunk.text) {
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
}
if (chunk.usageMetadata) {
finalUsage.prompt_tokens = chunk.usageMetadata.promptTokenCount || 0
finalUsage.completion_tokens = chunk.usageMetadata.candidatesTokenCount || 0
finalUsage.total_tokens = chunk.usageMetadata.totalTokenCount || 0
}
}
}
onChunk({
type: ChunkType.BLOCK_COMPLETE,
response: {
usage: finalUsage,
metrics: {
completion_tokens: finalUsage.completion_tokens,
time_completion_millsec: new Date().getTime() - start_time_millsec,
time_first_token_millsec: time_first_token_millsec - start_time_millsec
}
}
})
} catch (error) {
console.error('[generateImageByChat] error', error)
onChunk({
type: ChunkType.ERROR,
error
})
}
}
public convertMcpTools<T>(mcpTools: MCPTool[]): T[] {

View File

@ -34,6 +34,7 @@ import {
MCPCallToolResponse,
MCPTool,
MCPToolResponse,
Metrics,
Model,
Provider,
Suggestion,
@ -395,7 +396,6 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
return streamOutput
}
const start_time_millsec = new Date().getTime()
const lastUserMessage = _messages.findLast((m) => m.role === 'user')
const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
const { signal } = abortController
@ -423,6 +423,18 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
reqMessages = [systemMessage, ...userMessages].filter(Boolean) as ChatCompletionMessageParam[]
}
let finalUsage: Usage = {
completion_tokens: 0,
prompt_tokens: 0,
total_tokens: 0
}
const finalMetrics: Metrics = {
completion_tokens: 0,
time_completion_millsec: 0,
time_first_token_millsec: 0
}
const toolResponses: MCPToolResponse[] = []
const processToolResults = async (toolResults: Awaited<ReturnType<typeof parseAndCallTools>>, idx: number) => {
@ -505,18 +517,17 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
const processStream = async (stream: any, idx: number) => {
const toolCalls: ChatCompletionMessageToolCall[] = []
let time_first_token_millsec = 0
const start_time_millsec = new Date().getTime()
// Handle non-streaming case (already returns early, no change needed here)
if (!isSupportStreamOutput()) {
const time_completion_millsec = new Date().getTime() - start_time_millsec
// Calculate final metrics once
const finalMetrics = {
completion_tokens: stream.usage?.completion_tokens,
time_completion_millsec,
time_first_token_millsec: 0 // Non-streaming, first token time is not relevant
}
finalMetrics.completion_tokens = stream.usage?.completion_tokens
finalMetrics.time_completion_millsec = new Date().getTime() - start_time_millsec
// Create a synthetic usage object if stream.usage is undefined
const finalUsage = stream.usage
finalUsage = { ...stream.usage }
// Separate onChunk calls for text and usage/metrics
let content = ''
stream.choices.forEach((choice) => {
@ -526,7 +537,7 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
onChunk({
type: ChunkType.THINKING_COMPLETE,
text: choice.message.reasoning,
thinking_millsec: time_completion_millsec
thinking_millsec: new Date().getTime() - start_time_millsec
})
}
// text
@ -576,20 +587,9 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
return
}
let content = '' // Accumulate content for tool processing if needed
let content = ''
let thinkingContent = ''
// 记录最终的完成时间差
let final_time_completion_millsec_delta = 0
let final_time_thinking_millsec_delta = 0
// Variable to store the last received usage object
let lastUsage: Usage | undefined = undefined
// let isThinkingInContent: ThoughtProcessor | undefined = undefined
// const processThinkingChunk = this.handleThinkingTags()
let isFirstChunk = true
let time_first_token_millsec = 0
let time_first_token_millsec_delta = 0
let time_first_content_millsec = 0
let time_thinking_start = 0
// 1. 初始化中间件
const reasoningTags = [
@ -640,25 +640,24 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
// 3. 消费 processedStream分发 onChunk
for await (const chunk of readableStreamAsyncIterable(processedStream)) {
const currentTime = new Date().getTime()
const delta = chunk.type === 'finish' ? chunk.delta : chunk
const rawChunk = chunk.type === 'finish' ? chunk.chunk : chunk
switch (chunk.type) {
case 'reasoning': {
if (time_thinking_start === 0) {
time_thinking_start = currentTime
time_first_token_millsec = currentTime
time_first_token_millsec_delta = currentTime - start_time_millsec
if (time_first_token_millsec === 0) {
time_first_token_millsec = new Date().getTime()
}
thinkingContent += chunk.textDelta
const thinking_time = currentTime - time_thinking_start
onChunk({ type: ChunkType.THINKING_DELTA, text: chunk.textDelta, thinking_millsec: thinking_time })
onChunk({
type: ChunkType.THINKING_DELTA,
text: chunk.textDelta,
thinking_millsec: new Date().getTime() - time_first_token_millsec
})
break
}
case 'text-delta': {
let textDelta = chunk.textDelta
if (assistant.enableWebSearch && delta) {
const originalDelta = rawChunk?.choices?.[0]?.delta
@ -676,25 +675,32 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
if (isFirstChunk) {
isFirstChunk = false
if (time_first_token_millsec === 0) {
time_first_token_millsec = currentTime
time_first_token_millsec_delta = currentTime - start_time_millsec
time_first_token_millsec = new Date().getTime()
} else {
onChunk({
type: ChunkType.THINKING_COMPLETE,
text: thinkingContent,
thinking_millsec: new Date().getTime() - time_first_token_millsec
})
}
}
content += textDelta
if (time_thinking_start > 0 && time_first_content_millsec === 0) {
time_first_content_millsec = currentTime
final_time_thinking_millsec_delta = time_first_content_millsec - time_thinking_start
onChunk({
type: ChunkType.THINKING_COMPLETE,
text: thinkingContent,
thinking_millsec: final_time_thinking_millsec_delta
})
}
onChunk({ type: ChunkType.TEXT_DELTA, text: textDelta })
break
}
case 'tool-calls': {
if (isFirstChunk) {
isFirstChunk = false
if (time_first_token_millsec === 0) {
time_first_token_millsec = new Date().getTime()
} else {
onChunk({
type: ChunkType.THINKING_COMPLETE,
text: thinkingContent,
thinking_millsec: new Date().getTime() - time_first_token_millsec
})
}
}
chunk.delta.tool_calls.forEach((toolCall) => {
const { id, index, type, function: fun } = toolCall
if (id && type === 'function' && fun) {
@ -721,10 +727,14 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
if (!isEmpty(finishReason)) {
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
final_time_completion_millsec_delta = currentTime - start_time_millsec
if (usage) {
lastUsage = usage
finalUsage.completion_tokens += usage.completion_tokens || 0
finalUsage.prompt_tokens += usage.prompt_tokens || 0
finalUsage.total_tokens += usage.total_tokens || 0
finalMetrics.completion_tokens += usage.completion_tokens || 0
}
finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec
if (originalFinishDelta?.annotations) {
onChunk({
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
@ -774,49 +784,46 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
} as LLMWebSearchCompleteChunk)
}
}
reqMessages.push({
role: 'assistant',
content: content,
tool_calls: toolCalls.length
? toolCalls.map((toolCall) => ({
id: toolCall.id,
function: {
...toolCall.function,
arguments:
typeof toolCall.function.arguments === 'string'
? toolCall.function.arguments
: JSON.stringify(toolCall.function.arguments)
},
type: 'function'
}))
: undefined
})
let toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
if (toolCalls.length) {
toolResults = await processToolCalls(mcpTools, toolCalls)
}
if (content.length) {
toolResults = toolResults.concat(await processToolUses(content))
}
if (toolResults.length) {
await processToolResults(toolResults, idx)
}
onChunk({
type: ChunkType.BLOCK_COMPLETE,
response: {
usage: lastUsage,
metrics: {
completion_tokens: lastUsage?.completion_tokens,
time_completion_millsec: final_time_completion_millsec_delta,
time_first_token_millsec: time_first_token_millsec_delta,
time_thinking_millsec: final_time_thinking_millsec_delta
}
}
})
break
}
}
}
reqMessages.push({
role: 'assistant',
content: content,
tool_calls: toolCalls.length
? toolCalls.map((toolCall) => ({
id: toolCall.id,
function: {
...toolCall.function,
arguments:
typeof toolCall.function.arguments === 'string'
? toolCall.function.arguments
: JSON.stringify(toolCall.function.arguments)
},
type: 'function'
}))
: undefined
})
let toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
if (toolCalls.length) {
toolResults = await processToolCalls(mcpTools, toolCalls)
}
if (content.length) {
toolResults = toolResults.concat(await processToolUses(content))
}
if (toolResults.length) {
await processToolResults(toolResults, idx)
}
onChunk({
type: ChunkType.BLOCK_COMPLETE,
response: {
usage: finalUsage,
metrics: finalMetrics
}
})
}
reqMessages = processReqMessages(model, reqMessages)

View File

@ -24,6 +24,7 @@ import {
MCPCallToolResponse,
MCPTool,
MCPToolResponse,
Metrics,
Model,
Provider,
Suggestion,
@ -332,7 +333,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
const lastUserMessage = _messages.findLast((m) => m.role === 'user')
const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
const { signal } = abortController
let time_first_token_millsec_delta = 0
const start_time_millsec = new Date().getTime()
const response = await this.sdk.chat.completions
// @ts-ignore key is not typed
@ -354,8 +354,17 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
const processStream = async (stream: any) => {
let content = ''
let isFirstChunk = true
let final_time_completion_millsec_delta = 0
let lastUsage: Usage | undefined = undefined
const finalUsage: Usage = {
completion_tokens: 0,
prompt_tokens: 0,
total_tokens: 0
}
const finalMetrics: Metrics = {
completion_tokens: 0,
time_completion_millsec: 0,
time_first_token_millsec: 0
}
for await (const chunk of stream as any) {
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
break
@ -368,17 +377,21 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
}
if (isFirstChunk) {
isFirstChunk = false
time_first_token_millsec_delta = new Date().getTime() - start_time_millsec
finalMetrics.time_first_token_millsec = new Date().getTime() - start_time_millsec
}
content += delta.content
onChunk({ type: ChunkType.TEXT_DELTA, text: delta.content })
}
if (!isEmpty(finishReason) || chunk?.annotations) {
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
final_time_completion_millsec_delta = new Date().getTime() - start_time_millsec
finalMetrics.time_completion_millsec = new Date().getTime() - start_time_millsec
if (chunk.usage) {
lastUsage = chunk.usage
const usage = chunk.usage as OpenAI.Completions.CompletionUsage
finalUsage.completion_tokens = usage.completion_tokens
finalUsage.prompt_tokens = usage.prompt_tokens
finalUsage.total_tokens = usage.total_tokens
}
finalMetrics.completion_tokens = finalUsage.completion_tokens
}
if (delta?.annotations) {
onChunk({
@ -393,12 +406,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
onChunk({
type: ChunkType.BLOCK_COMPLETE,
response: {
usage: lastUsage,
metrics: {
completion_tokens: lastUsage?.completion_tokens,
time_completion_millsec: final_time_completion_millsec_delta,
time_first_token_millsec: time_first_token_millsec_delta
}
usage: finalUsage,
metrics: finalMetrics
}
})
}
@ -428,7 +437,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
type: 'input_text'
}
if (isSupportedReasoningEffortOpenAIModel(model)) {
systemMessageInput.text = `Formatting re-enabled${systemMessageInput.text ? '\n' + systemMessageInput.text : ''}`
systemMessage.role = 'developer'
}
@ -455,9 +463,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
userMessage.push(await this.getReponseMessageParam(message, model))
}
let time_first_token_millsec = 0
const start_time_millsec = new Date().getTime()
const lastUserMessage = _messages.findLast((m) => m.role === 'user')
const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
const { signal } = abortController
@ -470,6 +475,18 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
reqMessages = [systemMessage, ...userMessage].filter(Boolean) as OpenAI.Responses.EasyInputMessage[]
}
const finalUsage: Usage = {
completion_tokens: 0,
prompt_tokens: 0,
total_tokens: 0
}
const finalMetrics: Metrics = {
completion_tokens: 0,
time_completion_millsec: 0,
time_first_token_millsec: 0
}
const toolResponses: MCPToolResponse[] = []
const processToolResults = async (toolResults: Awaited<ReturnType<typeof parseAndCallTools>>, idx: number) => {
@ -549,6 +566,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
idx: number
) => {
const toolCalls: OpenAI.Responses.ResponseFunctionToolCall[] = []
let time_first_token_millsec = 0
const start_time_millsec = new Date().getTime()
if (!streamOutput) {
const nonStream = stream as OpenAI.Responses.Response
@ -633,17 +652,15 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
const outputItems: OpenAI.Responses.ResponseOutputItem[] = []
let lastUsage: Usage | undefined = undefined
let final_time_completion_millsec_delta = 0
for await (const chunk of stream as Stream<OpenAI.Responses.ResponseStreamEvent>) {
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
break
}
switch (chunk.type) {
case 'response.created':
time_first_token_millsec = new Date().getTime()
break
case 'response.output_item.added':
if (time_first_token_millsec === 0) {
time_first_token_millsec = new Date().getTime()
}
if (chunk.item.type === 'function_call') {
outputItems.push(chunk.item)
}
@ -708,18 +725,18 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
}
break
case 'response.completed': {
final_time_completion_millsec_delta = new Date().getTime() - start_time_millsec
const completion_tokens =
(chunk.response.usage?.output_tokens || 0) +
(chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0)
const total_tokens =
(chunk.response.usage?.total_tokens || 0) +
(chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0)
lastUsage = {
completion_tokens,
prompt_tokens: chunk.response.usage?.input_tokens || 0,
total_tokens
}
finalUsage.completion_tokens += completion_tokens
finalUsage.prompt_tokens += chunk.response.usage?.input_tokens || 0
finalUsage.total_tokens += total_tokens
finalMetrics.completion_tokens += completion_tokens
finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec
break
}
case 'error':
@ -761,12 +778,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
onChunk({
type: ChunkType.BLOCK_COMPLETE,
response: {
usage: lastUsage,
metrics: {
completion_tokens: lastUsage?.completion_tokens,
time_completion_millsec: final_time_completion_millsec_delta,
time_first_token_millsec: time_first_token_millsec - start_time_millsec
}
usage: finalUsage,
metrics: finalMetrics
}
})
}

View File

@ -59,7 +59,7 @@ export function createStreamProcessor(callbacks: StreamProcessorCallbacks = {})
callbacks.onTextComplete(data.text)
}
if (data.type === ChunkType.THINKING_DELTA && callbacks.onThinkingChunk) {
callbacks.onThinkingChunk(data.text, data.thinking_millsec)
callbacks.onThinkingChunk(data.text)
}
if (data.type === ChunkType.THINKING_COMPLETE && callbacks.onThinkingComplete) {
callbacks.onThinkingComplete(data.text, data.thinking_millsec)

View File

@ -46,7 +46,7 @@ const persistedReducer = persistReducer(
{
key: 'cherry-studio',
storage,
version: 99,
version: 100,
blacklist: ['runtime', 'messages', 'messageBlocks'],
migrate
},

View File

@ -1296,6 +1296,16 @@ const migrateConfig = {
} catch (error) {
return state
}
},
'100': (state: RootState) => {
try {
if (!state.settings.zoomFactor) {
state.settings.zoomFactor = 1
}
return state
} catch (error) {
return state
}
}
}

View File

@ -398,7 +398,7 @@ const fetchAndProcessAssistantResponseImpl = async (
} else {
const newBlock = createThinkingBlock(assistantMsgId, accumulatedThinking, {
status: MessageBlockStatus.STREAMING,
thinking_millsec: thinking_millsec
thinking_millsec: 0
})
handleBlockTransition(newBlock, MessageBlockType.THINKING)
}
@ -565,7 +565,7 @@ const fetchAndProcessAssistantResponseImpl = async (
message: pauseErrorLanguagePlaceholder || error.message || 'Stream processing error',
originalMessage: error.message,
stack: error.stack,
status: error.status,
status: error.status || error.code,
requestId: error.request_id
}
if (lastBlockId) {
@ -609,13 +609,13 @@ const fetchAndProcessAssistantResponseImpl = async (
// 更新topic的name
autoRenameTopic(assistant, topicId)
if (response && !response.usage) {
if (response && response.usage?.total_tokens === 0) {
const usage = await estimateMessagesUsage({ assistant, messages: finalContextWithAssistant })
response.usage = usage
}
}
if (response && response.metrics) {
if (!response.metrics.completion_tokens && response.usage) {
if (response.metrics.completion_tokens === 0 && response.usage?.completion_tokens) {
response = {
...response,
metrics: {

View File

@ -121,8 +121,8 @@ export type Usage = OpenAI.Completions.CompletionUsage & {
}
export type Metrics = {
completion_tokens?: number
time_completion_millsec?: number
completion_tokens: number
time_completion_millsec: number
time_first_token_millsec?: number
time_thinking_millsec?: number
}