mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-23 18:10:26 +08:00
fix: enhance error handling and metrics tracking across AI providers and message processing
This commit is contained in:
parent
9b9a395451
commit
4c50dfbd19
@ -16,7 +16,7 @@ const MessageErrorInfo: React.FC<{ block: ErrorMessageBlock }> = ({ block }) =>
|
|||||||
|
|
||||||
const HTTP_ERROR_CODES = [400, 401, 403, 404, 429, 500, 502, 503, 504]
|
const HTTP_ERROR_CODES = [400, 401, 403, 404, 429, 500, 502, 503, 504]
|
||||||
if (block.error && HTTP_ERROR_CODES.includes(block.error?.status)) {
|
if (block.error && HTTP_ERROR_CODES.includes(block.error?.status)) {
|
||||||
return <Alert description={t(`error.http.${block.error.status}`)} type="error" />
|
return <Alert description={t(`error.http.${block.error.status}`)} message={block.error?.message} type="error" />
|
||||||
}
|
}
|
||||||
if (block?.error?.message) {
|
if (block?.error?.message) {
|
||||||
const errorKey = `error.${block.error.message}`
|
const errorKey = `error.${block.error.message}`
|
||||||
|
|||||||
@ -56,10 +56,12 @@ const ThinkingBlock: React.FC<Props> = ({ block }) => {
|
|||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (isThinking) {
|
if (isThinking) {
|
||||||
intervalId.current = setInterval(() => {
|
intervalId.current = setInterval(() => {
|
||||||
setThinkingTime((prev) => prev + 200)
|
setThinkingTime((prev) => prev + 100)
|
||||||
}, 200)
|
}, 100)
|
||||||
} else {
|
} else if (intervalId.current) {
|
||||||
return
|
// 立即清除计时器
|
||||||
|
clearInterval(intervalId.current)
|
||||||
|
intervalId.current = null
|
||||||
}
|
}
|
||||||
|
|
||||||
return () => {
|
return () => {
|
||||||
|
|||||||
@ -10,35 +10,6 @@ interface Props {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const MessageContent: React.FC<Props> = ({ message }) => {
|
const MessageContent: React.FC<Props> = ({ message }) => {
|
||||||
// const { t } = useTranslation()
|
|
||||||
// if (message.status === 'pending') {
|
|
||||||
// return (
|
|
||||||
|
|
||||||
// )
|
|
||||||
// }
|
|
||||||
|
|
||||||
// if (message.status === 'searching') {
|
|
||||||
// return (
|
|
||||||
// <SearchingContainer>
|
|
||||||
// <Search size={24} />
|
|
||||||
// <SearchingText>{t('message.searching')}</SearchingText>
|
|
||||||
// <BarLoader color="#1677ff" />
|
|
||||||
// </SearchingContainer>
|
|
||||||
// )
|
|
||||||
// }
|
|
||||||
|
|
||||||
// if (message.status === 'error') {
|
|
||||||
// return <MessageError message={message} />
|
|
||||||
// }
|
|
||||||
|
|
||||||
// if (message.type === '@' && model) {
|
|
||||||
// const content = `[@${model.name}](#) ${getBriefInfo(message.content)}`
|
|
||||||
// return <Markdown message={{ ...message, content }} />
|
|
||||||
// }
|
|
||||||
// const toolUseRegex = /<tool_use>([\s\S]*?)<\/tool_use>/g
|
|
||||||
|
|
||||||
// console.log('message', message)
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
<Flex gap="8px" wrap style={{ marginBottom: 10 }}>
|
<Flex gap="8px" wrap style={{ marginBottom: 10 }}>
|
||||||
|
|||||||
@ -1,45 +0,0 @@
|
|||||||
import type { ErrorMessageBlock } from '@renderer/types/newMessage'
|
|
||||||
import { Alert as AntdAlert } from 'antd'
|
|
||||||
import { FC } from 'react'
|
|
||||||
import { useTranslation } from 'react-i18next'
|
|
||||||
import styled from 'styled-components'
|
|
||||||
|
|
||||||
const MessageError: FC<{ block: ErrorMessageBlock }> = ({ block }) => {
|
|
||||||
return (
|
|
||||||
<>
|
|
||||||
{/* <Markdown block={block} role={role} />
|
|
||||||
{block.error && (
|
|
||||||
<Markdown
|
|
||||||
message={{
|
|
||||||
...block,
|
|
||||||
content: formatErrorMessage(block.error)
|
|
||||||
}}
|
|
||||||
/>
|
|
||||||
)} */}
|
|
||||||
<MessageErrorInfo block={block} />
|
|
||||||
</>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
const MessageErrorInfo: FC<{ block: ErrorMessageBlock }> = ({ block }) => {
|
|
||||||
const { t } = useTranslation()
|
|
||||||
|
|
||||||
const HTTP_ERROR_CODES = [400, 401, 403, 404, 429, 500, 502, 503, 504]
|
|
||||||
console.log('block', block)
|
|
||||||
if (block.error && HTTP_ERROR_CODES.includes(block.error?.status)) {
|
|
||||||
return <Alert description={t(`error.http.${block.error.status}`)} type="error" />
|
|
||||||
}
|
|
||||||
if (block?.error?.message) {
|
|
||||||
return <Alert description={block.error.message} type="error" />
|
|
||||||
}
|
|
||||||
|
|
||||||
return <Alert description={t('error.chat.response')} type="error" />
|
|
||||||
}
|
|
||||||
|
|
||||||
const Alert = styled(AntdAlert)`
|
|
||||||
margin: 15px 0 8px;
|
|
||||||
padding: 10px;
|
|
||||||
font-size: 12px;
|
|
||||||
`
|
|
||||||
|
|
||||||
export default MessageError
|
|
||||||
@ -30,10 +30,12 @@ import {
|
|||||||
MCPCallToolResponse,
|
MCPCallToolResponse,
|
||||||
MCPTool,
|
MCPTool,
|
||||||
MCPToolResponse,
|
MCPToolResponse,
|
||||||
|
Metrics,
|
||||||
Model,
|
Model,
|
||||||
Provider,
|
Provider,
|
||||||
Suggestion,
|
Suggestion,
|
||||||
ToolCallResponse,
|
ToolCallResponse,
|
||||||
|
Usage,
|
||||||
WebSearchSource
|
WebSearchSource
|
||||||
} from '@renderer/types'
|
} from '@renderer/types'
|
||||||
import { ChunkType } from '@renderer/types/chunk'
|
import { ChunkType } from '@renderer/types/chunk'
|
||||||
@ -47,7 +49,7 @@ import {
|
|||||||
} from '@renderer/utils/mcp-tools'
|
} from '@renderer/utils/mcp-tools'
|
||||||
import { findFileBlocks, findImageBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find'
|
import { findFileBlocks, findImageBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find'
|
||||||
import { buildSystemPrompt } from '@renderer/utils/prompt'
|
import { buildSystemPrompt } from '@renderer/utils/prompt'
|
||||||
import { first, flatten, sum, takeRight } from 'lodash'
|
import { first, flatten, takeRight } from 'lodash'
|
||||||
import OpenAI from 'openai'
|
import OpenAI from 'openai'
|
||||||
|
|
||||||
import { CompletionsParams } from '.'
|
import { CompletionsParams } from '.'
|
||||||
@ -270,10 +272,24 @@ export default class AnthropicProvider extends BaseProvider {
|
|||||||
...this.getCustomParameters(assistant)
|
...this.getCustomParameters(assistant)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const { abortController, cleanup } = this.createAbortController(lastUserMessage?.id)
|
||||||
|
const { signal } = abortController
|
||||||
|
|
||||||
|
const finalUsage: Usage = {
|
||||||
|
completion_tokens: 0,
|
||||||
|
prompt_tokens: 0,
|
||||||
|
total_tokens: 0
|
||||||
|
}
|
||||||
|
|
||||||
|
const finalMetrics: Metrics = {
|
||||||
|
completion_tokens: 0,
|
||||||
|
time_completion_millsec: 0,
|
||||||
|
time_first_token_millsec: 0
|
||||||
|
}
|
||||||
|
const toolResponses: MCPToolResponse[] = []
|
||||||
|
|
||||||
|
const processStream = async (body: MessageCreateParamsNonStreaming, idx: number) => {
|
||||||
let time_first_token_millsec = 0
|
let time_first_token_millsec = 0
|
||||||
let time_first_content_millsec = 0
|
|
||||||
let checkThinkingContent = false
|
|
||||||
let thinking_content = ''
|
|
||||||
const start_time_millsec = new Date().getTime()
|
const start_time_millsec = new Date().getTime()
|
||||||
|
|
||||||
if (!streamOutput) {
|
if (!streamOutput) {
|
||||||
@ -311,36 +327,27 @@ export default class AnthropicProvider extends BaseProvider {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
const { abortController, cleanup } = this.createAbortController(lastUserMessage?.id)
|
let thinking_content = ''
|
||||||
const { signal } = abortController
|
let isFirstChunk = true
|
||||||
const toolResponses: MCPToolResponse[] = []
|
|
||||||
|
|
||||||
const processStream = (body: MessageCreateParamsNonStreaming, idx: number) => {
|
|
||||||
return new Promise<void>((resolve, reject) => {
|
return new Promise<void>((resolve, reject) => {
|
||||||
// 等待接口返回流
|
// 等待接口返回流
|
||||||
const toolCalls: ToolUseBlock[] = []
|
const toolCalls: ToolUseBlock[] = []
|
||||||
let hasThinkingContent = false
|
|
||||||
this.sdk.messages
|
this.sdk.messages
|
||||||
.stream({ ...body, stream: true }, { signal, timeout: 5 * 60 * 1000 })
|
.stream({ ...body, stream: true }, { signal, timeout: 5 * 60 * 1000 })
|
||||||
.on('text', (text) => {
|
.on('text', (text) => {
|
||||||
if (hasThinkingContent && !checkThinkingContent) {
|
if (isFirstChunk) {
|
||||||
checkThinkingContent = true
|
isFirstChunk = false
|
||||||
|
if (time_first_token_millsec == 0) {
|
||||||
|
time_first_token_millsec = new Date().getTime()
|
||||||
|
} else {
|
||||||
onChunk({
|
onChunk({
|
||||||
type: ChunkType.THINKING_COMPLETE,
|
type: ChunkType.THINKING_COMPLETE,
|
||||||
text: thinking_content,
|
text: thinking_content,
|
||||||
thinking_millsec: new Date().getTime() - time_first_content_millsec
|
thinking_millsec: new Date().getTime() - time_first_token_millsec
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
if (time_first_token_millsec == 0) {
|
|
||||||
time_first_token_millsec = new Date().getTime()
|
|
||||||
}
|
|
||||||
|
|
||||||
thinking_content = ''
|
|
||||||
checkThinkingContent = false
|
|
||||||
hasThinkingContent = false
|
|
||||||
|
|
||||||
if (!hasThinkingContent && time_first_content_millsec === 0) {
|
|
||||||
time_first_content_millsec = new Date().getTime()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
onChunk({ type: ChunkType.TEXT_DELTA, text })
|
onChunk({ type: ChunkType.TEXT_DELTA, text })
|
||||||
@ -372,34 +379,22 @@ export default class AnthropicProvider extends BaseProvider {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (block.type === 'tool_use') {
|
||||||
|
toolCalls.push(block)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.on('thinking', (thinking) => {
|
.on('thinking', (thinking) => {
|
||||||
hasThinkingContent = true
|
|
||||||
const currentTime = new Date().getTime() // Get current time for each chunk
|
|
||||||
|
|
||||||
if (time_first_token_millsec == 0) {
|
if (time_first_token_millsec == 0) {
|
||||||
time_first_token_millsec = currentTime
|
time_first_token_millsec = new Date().getTime()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set time_first_content_millsec ONLY when the first content (thinking or text) arrives
|
|
||||||
if (time_first_content_millsec === 0) {
|
|
||||||
time_first_content_millsec = currentTime
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate thinking time as time elapsed since start until this chunk
|
|
||||||
const thinking_time = currentTime - time_first_content_millsec
|
|
||||||
onChunk({
|
onChunk({
|
||||||
type: ChunkType.THINKING_DELTA,
|
type: ChunkType.THINKING_DELTA,
|
||||||
text: thinking,
|
text: thinking,
|
||||||
thinking_millsec: thinking_time
|
thinking_millsec: new Date().getTime() - time_first_token_millsec
|
||||||
})
|
})
|
||||||
thinking_content += thinking
|
thinking_content += thinking
|
||||||
})
|
})
|
||||||
.on('contentBlock', (content) => {
|
|
||||||
if (content.type === 'tool_use') {
|
|
||||||
toolCalls.push(content)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.on('finalMessage', async (message) => {
|
.on('finalMessage', async (message) => {
|
||||||
const toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
|
const toolResults: Awaited<ReturnType<typeof parseAndCallTools>> = []
|
||||||
// tool call
|
// tool call
|
||||||
@ -458,29 +453,28 @@ export default class AnthropicProvider extends BaseProvider {
|
|||||||
newBody.messages = userMessages
|
newBody.messages = userMessages
|
||||||
|
|
||||||
onChunk({ type: ChunkType.LLM_RESPONSE_CREATED })
|
onChunk({ type: ChunkType.LLM_RESPONSE_CREATED })
|
||||||
|
try {
|
||||||
await processStream(newBody, idx + 1)
|
await processStream(newBody, idx + 1)
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error processing stream:', error)
|
||||||
|
reject(error)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const time_completion_millsec = new Date().getTime() - start_time_millsec
|
finalUsage.prompt_tokens += message.usage.input_tokens
|
||||||
|
finalUsage.completion_tokens += message.usage.output_tokens
|
||||||
|
finalUsage.total_tokens += finalUsage.prompt_tokens + finalUsage.completion_tokens
|
||||||
|
finalMetrics.completion_tokens = finalUsage.completion_tokens
|
||||||
|
finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
|
||||||
|
finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec
|
||||||
|
|
||||||
onChunk({
|
onChunk({
|
||||||
type: ChunkType.BLOCK_COMPLETE,
|
type: ChunkType.BLOCK_COMPLETE,
|
||||||
response: {
|
response: {
|
||||||
usage: {
|
usage: finalUsage,
|
||||||
prompt_tokens: message.usage.input_tokens,
|
metrics: finalMetrics
|
||||||
completion_tokens: message.usage.output_tokens,
|
|
||||||
total_tokens: sum(Object.values(message.usage))
|
|
||||||
},
|
|
||||||
metrics: {
|
|
||||||
completion_tokens: message.usage.output_tokens,
|
|
||||||
time_completion_millsec,
|
|
||||||
time_first_token_millsec: time_first_token_millsec - start_time_millsec
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
// FIXME: 临时方案,重置时间戳和思考内容
|
|
||||||
time_first_token_millsec = 0
|
|
||||||
time_first_content_millsec = 0
|
|
||||||
resolve()
|
resolve()
|
||||||
})
|
})
|
||||||
.on('error', (error) => reject(error))
|
.on('error', (error) => reject(error))
|
||||||
|
|||||||
@ -40,6 +40,7 @@ import {
|
|||||||
MCPCallToolResponse,
|
MCPCallToolResponse,
|
||||||
MCPTool,
|
MCPTool,
|
||||||
MCPToolResponse,
|
MCPToolResponse,
|
||||||
|
Metrics,
|
||||||
Model,
|
Model,
|
||||||
Provider,
|
Provider,
|
||||||
Suggestion,
|
Suggestion,
|
||||||
@ -370,8 +371,17 @@ export default class GeminiProvider extends BaseProvider {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const start_time_millsec = new Date().getTime()
|
const finalUsage: Usage = {
|
||||||
let time_first_token_millsec = 0
|
completion_tokens: 0,
|
||||||
|
prompt_tokens: 0,
|
||||||
|
total_tokens: 0
|
||||||
|
}
|
||||||
|
|
||||||
|
const finalMetrics: Metrics = {
|
||||||
|
completion_tokens: 0,
|
||||||
|
time_completion_millsec: 0,
|
||||||
|
time_first_token_millsec: 0
|
||||||
|
}
|
||||||
|
|
||||||
const { cleanup, abortController } = this.createAbortController(userLastMessage?.id, true)
|
const { cleanup, abortController } = this.createAbortController(userLastMessage?.id, true)
|
||||||
|
|
||||||
@ -445,6 +455,8 @@ export default class GeminiProvider extends BaseProvider {
|
|||||||
history.push(messageContents)
|
history.push(messageContents)
|
||||||
|
|
||||||
let functionCalls: FunctionCall[] = []
|
let functionCalls: FunctionCall[] = []
|
||||||
|
let time_first_token_millsec = 0
|
||||||
|
const start_time_millsec = new Date().getTime()
|
||||||
|
|
||||||
if (stream instanceof GenerateContentResponse) {
|
if (stream instanceof GenerateContentResponse) {
|
||||||
let content = ''
|
let content = ''
|
||||||
@ -504,34 +516,18 @@ export default class GeminiProvider extends BaseProvider {
|
|||||||
} as BlockCompleteChunk)
|
} as BlockCompleteChunk)
|
||||||
} else {
|
} else {
|
||||||
let content = ''
|
let content = ''
|
||||||
let final_time_completion_millsec = 0
|
|
||||||
let lastUsage: Usage | undefined = undefined
|
|
||||||
for await (const chunk of stream) {
|
for await (const chunk of stream) {
|
||||||
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) break
|
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) break
|
||||||
|
|
||||||
// --- Calculate Metrics ---
|
if (time_first_token_millsec == 0) {
|
||||||
if (time_first_token_millsec == 0 && chunk.text !== undefined) {
|
time_first_token_millsec = new Date().getTime()
|
||||||
// Update based on text arrival
|
|
||||||
time_first_token_millsec = new Date().getTime() - start_time_millsec
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 1. Text Content
|
|
||||||
if (chunk.text !== undefined) {
|
if (chunk.text !== undefined) {
|
||||||
content += chunk.text
|
content += chunk.text
|
||||||
onChunk({ type: ChunkType.TEXT_DELTA, text: chunk.text })
|
onChunk({ type: ChunkType.TEXT_DELTA, text: chunk.text })
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. Usage Data
|
|
||||||
if (chunk.usageMetadata) {
|
|
||||||
lastUsage = {
|
|
||||||
prompt_tokens: chunk.usageMetadata.promptTokenCount || 0,
|
|
||||||
completion_tokens: chunk.usageMetadata.candidatesTokenCount || 0,
|
|
||||||
total_tokens: chunk.usageMetadata.totalTokenCount || 0
|
|
||||||
}
|
|
||||||
final_time_completion_millsec = new Date().getTime() - start_time_millsec
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4. Image Generation
|
|
||||||
const generateImage = this.processGeminiImageResponse(chunk, onChunk)
|
const generateImage = this.processGeminiImageResponse(chunk, onChunk)
|
||||||
if (generateImage?.images?.length) {
|
if (generateImage?.images?.length) {
|
||||||
onChunk({ type: ChunkType.IMAGE_COMPLETE, image: generateImage })
|
onChunk({ type: ChunkType.IMAGE_COMPLETE, image: generateImage })
|
||||||
@ -541,8 +537,12 @@ export default class GeminiProvider extends BaseProvider {
|
|||||||
if (chunk.text) {
|
if (chunk.text) {
|
||||||
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
|
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
|
||||||
}
|
}
|
||||||
|
if (chunk.usageMetadata) {
|
||||||
|
finalUsage.prompt_tokens += chunk.usageMetadata.promptTokenCount || 0
|
||||||
|
finalUsage.completion_tokens += chunk.usageMetadata.candidatesTokenCount || 0
|
||||||
|
finalUsage.total_tokens += chunk.usageMetadata.totalTokenCount || 0
|
||||||
|
}
|
||||||
if (chunk.candidates?.[0]?.groundingMetadata) {
|
if (chunk.candidates?.[0]?.groundingMetadata) {
|
||||||
// 3. Grounding/Search Metadata
|
|
||||||
const groundingMetadata = chunk.candidates?.[0]?.groundingMetadata
|
const groundingMetadata = chunk.candidates?.[0]?.groundingMetadata
|
||||||
onChunk({
|
onChunk({
|
||||||
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
|
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
|
||||||
@ -561,17 +561,11 @@ export default class GeminiProvider extends BaseProvider {
|
|||||||
functionCalls = functionCalls.concat(chunk.functionCalls)
|
functionCalls = functionCalls.concat(chunk.functionCalls)
|
||||||
}
|
}
|
||||||
|
|
||||||
onChunk({
|
finalMetrics.completion_tokens = finalUsage.completion_tokens
|
||||||
type: ChunkType.BLOCK_COMPLETE,
|
finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
|
||||||
response: {
|
finalMetrics.time_first_token_millsec =
|
||||||
metrics: {
|
(finalMetrics.time_first_token_millsec || 0) + (time_first_token_millsec - start_time_millsec)
|
||||||
completion_tokens: lastUsage?.completion_tokens,
|
|
||||||
time_completion_millsec: final_time_completion_millsec,
|
|
||||||
time_first_token_millsec
|
|
||||||
},
|
|
||||||
usage: lastUsage
|
|
||||||
}
|
}
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- End Incremental onChunk calls ---
|
// --- End Incremental onChunk calls ---
|
||||||
@ -589,7 +583,15 @@ export default class GeminiProvider extends BaseProvider {
|
|||||||
if (toolResults.length) {
|
if (toolResults.length) {
|
||||||
await processToolResults(toolResults, idx)
|
await processToolResults(toolResults, idx)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: 由于递归,会发送n次
|
||||||
|
onChunk({
|
||||||
|
type: ChunkType.BLOCK_COMPLETE,
|
||||||
|
response: {
|
||||||
|
usage: finalUsage,
|
||||||
|
metrics: finalMetrics
|
||||||
}
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -615,17 +617,6 @@ export default class GeminiProvider extends BaseProvider {
|
|||||||
})
|
})
|
||||||
|
|
||||||
await processStream(userMessagesStream, 0).finally(cleanup)
|
await processStream(userMessagesStream, 0).finally(cleanup)
|
||||||
|
|
||||||
const final_time_completion_millsec = new Date().getTime() - start_time_millsec
|
|
||||||
onChunk({
|
|
||||||
type: ChunkType.BLOCK_COMPLETE,
|
|
||||||
response: {
|
|
||||||
metrics: {
|
|
||||||
time_completion_millsec: final_time_completion_millsec,
|
|
||||||
time_first_token_millsec
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -34,6 +34,7 @@ import {
|
|||||||
MCPCallToolResponse,
|
MCPCallToolResponse,
|
||||||
MCPTool,
|
MCPTool,
|
||||||
MCPToolResponse,
|
MCPToolResponse,
|
||||||
|
Metrics,
|
||||||
Model,
|
Model,
|
||||||
Provider,
|
Provider,
|
||||||
Suggestion,
|
Suggestion,
|
||||||
@ -395,7 +396,6 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
|
|||||||
return streamOutput
|
return streamOutput
|
||||||
}
|
}
|
||||||
|
|
||||||
const start_time_millsec = new Date().getTime()
|
|
||||||
const lastUserMessage = _messages.findLast((m) => m.role === 'user')
|
const lastUserMessage = _messages.findLast((m) => m.role === 'user')
|
||||||
const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
|
const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
|
||||||
const { signal } = abortController
|
const { signal } = abortController
|
||||||
@ -423,6 +423,18 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
|
|||||||
reqMessages = [systemMessage, ...userMessages].filter(Boolean) as ChatCompletionMessageParam[]
|
reqMessages = [systemMessage, ...userMessages].filter(Boolean) as ChatCompletionMessageParam[]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let finalUsage: Usage = {
|
||||||
|
completion_tokens: 0,
|
||||||
|
prompt_tokens: 0,
|
||||||
|
total_tokens: 0
|
||||||
|
}
|
||||||
|
|
||||||
|
const finalMetrics: Metrics = {
|
||||||
|
completion_tokens: 0,
|
||||||
|
time_completion_millsec: 0,
|
||||||
|
time_first_token_millsec: 0
|
||||||
|
}
|
||||||
|
|
||||||
const toolResponses: MCPToolResponse[] = []
|
const toolResponses: MCPToolResponse[] = []
|
||||||
|
|
||||||
const processToolResults = async (toolResults: Awaited<ReturnType<typeof parseAndCallTools>>, idx: number) => {
|
const processToolResults = async (toolResults: Awaited<ReturnType<typeof parseAndCallTools>>, idx: number) => {
|
||||||
@ -505,18 +517,17 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
|
|||||||
|
|
||||||
const processStream = async (stream: any, idx: number) => {
|
const processStream = async (stream: any, idx: number) => {
|
||||||
const toolCalls: ChatCompletionMessageToolCall[] = []
|
const toolCalls: ChatCompletionMessageToolCall[] = []
|
||||||
|
let time_first_token_millsec = 0
|
||||||
|
const start_time_millsec = new Date().getTime()
|
||||||
|
|
||||||
// Handle non-streaming case (already returns early, no change needed here)
|
// Handle non-streaming case (already returns early, no change needed here)
|
||||||
if (!isSupportStreamOutput()) {
|
if (!isSupportStreamOutput()) {
|
||||||
const time_completion_millsec = new Date().getTime() - start_time_millsec
|
|
||||||
// Calculate final metrics once
|
// Calculate final metrics once
|
||||||
const finalMetrics = {
|
finalMetrics.completion_tokens = stream.usage?.completion_tokens
|
||||||
completion_tokens: stream.usage?.completion_tokens,
|
finalMetrics.time_completion_millsec = new Date().getTime() - start_time_millsec
|
||||||
time_completion_millsec,
|
|
||||||
time_first_token_millsec: 0 // Non-streaming, first token time is not relevant
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a synthetic usage object if stream.usage is undefined
|
// Create a synthetic usage object if stream.usage is undefined
|
||||||
const finalUsage = stream.usage
|
finalUsage = { ...stream.usage }
|
||||||
// Separate onChunk calls for text and usage/metrics
|
// Separate onChunk calls for text and usage/metrics
|
||||||
let content = ''
|
let content = ''
|
||||||
stream.choices.forEach((choice) => {
|
stream.choices.forEach((choice) => {
|
||||||
@ -526,7 +537,7 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
|
|||||||
onChunk({
|
onChunk({
|
||||||
type: ChunkType.THINKING_COMPLETE,
|
type: ChunkType.THINKING_COMPLETE,
|
||||||
text: choice.message.reasoning,
|
text: choice.message.reasoning,
|
||||||
thinking_millsec: time_completion_millsec
|
thinking_millsec: new Date().getTime() - start_time_millsec
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
// text
|
// text
|
||||||
@ -576,20 +587,9 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
let content = '' // Accumulate content for tool processing if needed
|
let content = ''
|
||||||
let thinkingContent = ''
|
let thinkingContent = ''
|
||||||
// 记录最终的完成时间差
|
|
||||||
let final_time_completion_millsec_delta = 0
|
|
||||||
let final_time_thinking_millsec_delta = 0
|
|
||||||
// Variable to store the last received usage object
|
|
||||||
let lastUsage: Usage | undefined = undefined
|
|
||||||
// let isThinkingInContent: ThoughtProcessor | undefined = undefined
|
|
||||||
// const processThinkingChunk = this.handleThinkingTags()
|
|
||||||
let isFirstChunk = true
|
let isFirstChunk = true
|
||||||
let time_first_token_millsec = 0
|
|
||||||
let time_first_token_millsec_delta = 0
|
|
||||||
let time_first_content_millsec = 0
|
|
||||||
let time_thinking_start = 0
|
|
||||||
|
|
||||||
// 1. 初始化中间件
|
// 1. 初始化中间件
|
||||||
const reasoningTags = [
|
const reasoningTags = [
|
||||||
@ -640,25 +640,24 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
|
|||||||
|
|
||||||
// 3. 消费 processedStream,分发 onChunk
|
// 3. 消费 processedStream,分发 onChunk
|
||||||
for await (const chunk of readableStreamAsyncIterable(processedStream)) {
|
for await (const chunk of readableStreamAsyncIterable(processedStream)) {
|
||||||
const currentTime = new Date().getTime()
|
|
||||||
const delta = chunk.type === 'finish' ? chunk.delta : chunk
|
const delta = chunk.type === 'finish' ? chunk.delta : chunk
|
||||||
const rawChunk = chunk.type === 'finish' ? chunk.chunk : chunk
|
const rawChunk = chunk.type === 'finish' ? chunk.chunk : chunk
|
||||||
|
|
||||||
switch (chunk.type) {
|
switch (chunk.type) {
|
||||||
case 'reasoning': {
|
case 'reasoning': {
|
||||||
if (time_thinking_start === 0) {
|
if (time_first_token_millsec === 0) {
|
||||||
time_thinking_start = currentTime
|
time_first_token_millsec = new Date().getTime()
|
||||||
time_first_token_millsec = currentTime
|
|
||||||
time_first_token_millsec_delta = currentTime - start_time_millsec
|
|
||||||
}
|
}
|
||||||
thinkingContent += chunk.textDelta
|
thinkingContent += chunk.textDelta
|
||||||
const thinking_time = currentTime - time_thinking_start
|
onChunk({
|
||||||
onChunk({ type: ChunkType.THINKING_DELTA, text: chunk.textDelta, thinking_millsec: thinking_time })
|
type: ChunkType.THINKING_DELTA,
|
||||||
|
text: chunk.textDelta,
|
||||||
|
thinking_millsec: new Date().getTime() - time_first_token_millsec
|
||||||
|
})
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
case 'text-delta': {
|
case 'text-delta': {
|
||||||
let textDelta = chunk.textDelta
|
let textDelta = chunk.textDelta
|
||||||
|
|
||||||
if (assistant.enableWebSearch && delta) {
|
if (assistant.enableWebSearch && delta) {
|
||||||
const originalDelta = rawChunk?.choices?.[0]?.delta
|
const originalDelta = rawChunk?.choices?.[0]?.delta
|
||||||
|
|
||||||
@ -676,25 +675,32 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
|
|||||||
if (isFirstChunk) {
|
if (isFirstChunk) {
|
||||||
isFirstChunk = false
|
isFirstChunk = false
|
||||||
if (time_first_token_millsec === 0) {
|
if (time_first_token_millsec === 0) {
|
||||||
time_first_token_millsec = currentTime
|
time_first_token_millsec = new Date().getTime()
|
||||||
time_first_token_millsec_delta = currentTime - start_time_millsec
|
} else {
|
||||||
}
|
|
||||||
}
|
|
||||||
content += textDelta
|
|
||||||
if (time_thinking_start > 0 && time_first_content_millsec === 0) {
|
|
||||||
time_first_content_millsec = currentTime
|
|
||||||
final_time_thinking_millsec_delta = time_first_content_millsec - time_thinking_start
|
|
||||||
|
|
||||||
onChunk({
|
onChunk({
|
||||||
type: ChunkType.THINKING_COMPLETE,
|
type: ChunkType.THINKING_COMPLETE,
|
||||||
text: thinkingContent,
|
text: thinkingContent,
|
||||||
thinking_millsec: final_time_thinking_millsec_delta
|
thinking_millsec: new Date().getTime() - time_first_token_millsec
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
content += textDelta
|
||||||
onChunk({ type: ChunkType.TEXT_DELTA, text: textDelta })
|
onChunk({ type: ChunkType.TEXT_DELTA, text: textDelta })
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
case 'tool-calls': {
|
case 'tool-calls': {
|
||||||
|
if (isFirstChunk) {
|
||||||
|
isFirstChunk = false
|
||||||
|
if (time_first_token_millsec === 0) {
|
||||||
|
time_first_token_millsec = new Date().getTime()
|
||||||
|
} else {
|
||||||
|
onChunk({
|
||||||
|
type: ChunkType.THINKING_COMPLETE,
|
||||||
|
text: thinkingContent,
|
||||||
|
thinking_millsec: new Date().getTime() - time_first_token_millsec
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
chunk.delta.tool_calls.forEach((toolCall) => {
|
chunk.delta.tool_calls.forEach((toolCall) => {
|
||||||
const { id, index, type, function: fun } = toolCall
|
const { id, index, type, function: fun } = toolCall
|
||||||
if (id && type === 'function' && fun) {
|
if (id && type === 'function' && fun) {
|
||||||
@ -721,10 +727,14 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
|
|||||||
|
|
||||||
if (!isEmpty(finishReason)) {
|
if (!isEmpty(finishReason)) {
|
||||||
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
|
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
|
||||||
final_time_completion_millsec_delta = currentTime - start_time_millsec
|
|
||||||
if (usage) {
|
if (usage) {
|
||||||
lastUsage = usage
|
finalUsage.completion_tokens += usage.completion_tokens || 0
|
||||||
|
finalUsage.prompt_tokens += usage.prompt_tokens || 0
|
||||||
|
finalUsage.total_tokens += usage.total_tokens || 0
|
||||||
|
finalMetrics.completion_tokens += usage.completion_tokens || 0
|
||||||
}
|
}
|
||||||
|
finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
|
||||||
|
finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec
|
||||||
if (originalFinishDelta?.annotations) {
|
if (originalFinishDelta?.annotations) {
|
||||||
onChunk({
|
onChunk({
|
||||||
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
|
type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
|
||||||
@ -774,6 +784,11 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
|
|||||||
} as LLMWebSearchCompleteChunk)
|
} as LLMWebSearchCompleteChunk)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
reqMessages.push({
|
reqMessages.push({
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: content,
|
content: content,
|
||||||
@ -801,22 +816,14 @@ export default class OpenAICompatibleProvider extends BaseOpenAiProvider {
|
|||||||
if (toolResults.length) {
|
if (toolResults.length) {
|
||||||
await processToolResults(toolResults, idx)
|
await processToolResults(toolResults, idx)
|
||||||
}
|
}
|
||||||
|
|
||||||
onChunk({
|
onChunk({
|
||||||
type: ChunkType.BLOCK_COMPLETE,
|
type: ChunkType.BLOCK_COMPLETE,
|
||||||
response: {
|
response: {
|
||||||
usage: lastUsage,
|
usage: finalUsage,
|
||||||
metrics: {
|
metrics: finalMetrics
|
||||||
completion_tokens: lastUsage?.completion_tokens,
|
|
||||||
time_completion_millsec: final_time_completion_millsec_delta,
|
|
||||||
time_first_token_millsec: time_first_token_millsec_delta,
|
|
||||||
time_thinking_millsec: final_time_thinking_millsec_delta
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
reqMessages = processReqMessages(model, reqMessages)
|
reqMessages = processReqMessages(model, reqMessages)
|
||||||
|
|||||||
@ -24,6 +24,7 @@ import {
|
|||||||
MCPCallToolResponse,
|
MCPCallToolResponse,
|
||||||
MCPTool,
|
MCPTool,
|
||||||
MCPToolResponse,
|
MCPToolResponse,
|
||||||
|
Metrics,
|
||||||
Model,
|
Model,
|
||||||
Provider,
|
Provider,
|
||||||
Suggestion,
|
Suggestion,
|
||||||
@ -332,7 +333,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
|
|||||||
const lastUserMessage = _messages.findLast((m) => m.role === 'user')
|
const lastUserMessage = _messages.findLast((m) => m.role === 'user')
|
||||||
const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
|
const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
|
||||||
const { signal } = abortController
|
const { signal } = abortController
|
||||||
let time_first_token_millsec_delta = 0
|
|
||||||
const start_time_millsec = new Date().getTime()
|
const start_time_millsec = new Date().getTime()
|
||||||
const response = await this.sdk.chat.completions
|
const response = await this.sdk.chat.completions
|
||||||
// @ts-ignore key is not typed
|
// @ts-ignore key is not typed
|
||||||
@ -354,8 +354,17 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
|
|||||||
const processStream = async (stream: any) => {
|
const processStream = async (stream: any) => {
|
||||||
let content = ''
|
let content = ''
|
||||||
let isFirstChunk = true
|
let isFirstChunk = true
|
||||||
let final_time_completion_millsec_delta = 0
|
const finalUsage: Usage = {
|
||||||
let lastUsage: Usage | undefined = undefined
|
completion_tokens: 0,
|
||||||
|
prompt_tokens: 0,
|
||||||
|
total_tokens: 0
|
||||||
|
}
|
||||||
|
|
||||||
|
const finalMetrics: Metrics = {
|
||||||
|
completion_tokens: 0,
|
||||||
|
time_completion_millsec: 0,
|
||||||
|
time_first_token_millsec: 0
|
||||||
|
}
|
||||||
for await (const chunk of stream as any) {
|
for await (const chunk of stream as any) {
|
||||||
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
|
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
|
||||||
break
|
break
|
||||||
@ -368,17 +377,21 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
|
|||||||
}
|
}
|
||||||
if (isFirstChunk) {
|
if (isFirstChunk) {
|
||||||
isFirstChunk = false
|
isFirstChunk = false
|
||||||
time_first_token_millsec_delta = new Date().getTime() - start_time_millsec
|
finalMetrics.time_first_token_millsec = new Date().getTime() - start_time_millsec
|
||||||
}
|
}
|
||||||
content += delta.content
|
content += delta.content
|
||||||
onChunk({ type: ChunkType.TEXT_DELTA, text: delta.content })
|
onChunk({ type: ChunkType.TEXT_DELTA, text: delta.content })
|
||||||
}
|
}
|
||||||
if (!isEmpty(finishReason) || chunk?.annotations) {
|
if (!isEmpty(finishReason) || chunk?.annotations) {
|
||||||
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
|
onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
|
||||||
final_time_completion_millsec_delta = new Date().getTime() - start_time_millsec
|
finalMetrics.time_completion_millsec = new Date().getTime() - start_time_millsec
|
||||||
if (chunk.usage) {
|
if (chunk.usage) {
|
||||||
lastUsage = chunk.usage
|
const usage = chunk.usage as OpenAI.Completions.CompletionUsage
|
||||||
|
finalUsage.completion_tokens = usage.completion_tokens
|
||||||
|
finalUsage.prompt_tokens = usage.prompt_tokens
|
||||||
|
finalUsage.total_tokens = usage.total_tokens
|
||||||
}
|
}
|
||||||
|
finalMetrics.completion_tokens = finalUsage.completion_tokens
|
||||||
}
|
}
|
||||||
if (delta?.annotations) {
|
if (delta?.annotations) {
|
||||||
onChunk({
|
onChunk({
|
||||||
@ -393,12 +406,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
|
|||||||
onChunk({
|
onChunk({
|
||||||
type: ChunkType.BLOCK_COMPLETE,
|
type: ChunkType.BLOCK_COMPLETE,
|
||||||
response: {
|
response: {
|
||||||
usage: lastUsage,
|
usage: finalUsage,
|
||||||
metrics: {
|
metrics: finalMetrics
|
||||||
completion_tokens: lastUsage?.completion_tokens,
|
|
||||||
time_completion_millsec: final_time_completion_millsec_delta,
|
|
||||||
time_first_token_millsec: time_first_token_millsec_delta
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -454,9 +463,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
|
|||||||
userMessage.push(await this.getReponseMessageParam(message, model))
|
userMessage.push(await this.getReponseMessageParam(message, model))
|
||||||
}
|
}
|
||||||
|
|
||||||
let time_first_token_millsec = 0
|
|
||||||
const start_time_millsec = new Date().getTime()
|
|
||||||
|
|
||||||
const lastUserMessage = _messages.findLast((m) => m.role === 'user')
|
const lastUserMessage = _messages.findLast((m) => m.role === 'user')
|
||||||
const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
|
const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
|
||||||
const { signal } = abortController
|
const { signal } = abortController
|
||||||
@ -469,6 +475,18 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
|
|||||||
reqMessages = [systemMessage, ...userMessage].filter(Boolean) as OpenAI.Responses.EasyInputMessage[]
|
reqMessages = [systemMessage, ...userMessage].filter(Boolean) as OpenAI.Responses.EasyInputMessage[]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const finalUsage: Usage = {
|
||||||
|
completion_tokens: 0,
|
||||||
|
prompt_tokens: 0,
|
||||||
|
total_tokens: 0
|
||||||
|
}
|
||||||
|
|
||||||
|
const finalMetrics: Metrics = {
|
||||||
|
completion_tokens: 0,
|
||||||
|
time_completion_millsec: 0,
|
||||||
|
time_first_token_millsec: 0
|
||||||
|
}
|
||||||
|
|
||||||
const toolResponses: MCPToolResponse[] = []
|
const toolResponses: MCPToolResponse[] = []
|
||||||
|
|
||||||
const processToolResults = async (toolResults: Awaited<ReturnType<typeof parseAndCallTools>>, idx: number) => {
|
const processToolResults = async (toolResults: Awaited<ReturnType<typeof parseAndCallTools>>, idx: number) => {
|
||||||
@ -548,6 +566,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
|
|||||||
idx: number
|
idx: number
|
||||||
) => {
|
) => {
|
||||||
const toolCalls: OpenAI.Responses.ResponseFunctionToolCall[] = []
|
const toolCalls: OpenAI.Responses.ResponseFunctionToolCall[] = []
|
||||||
|
let time_first_token_millsec = 0
|
||||||
|
const start_time_millsec = new Date().getTime()
|
||||||
|
|
||||||
if (!streamOutput) {
|
if (!streamOutput) {
|
||||||
const nonStream = stream as OpenAI.Responses.Response
|
const nonStream = stream as OpenAI.Responses.Response
|
||||||
@ -632,8 +652,6 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
|
|||||||
|
|
||||||
const outputItems: OpenAI.Responses.ResponseOutputItem[] = []
|
const outputItems: OpenAI.Responses.ResponseOutputItem[] = []
|
||||||
|
|
||||||
let lastUsage: Usage | undefined = undefined
|
|
||||||
let final_time_completion_millsec_delta = 0
|
|
||||||
for await (const chunk of stream as Stream<OpenAI.Responses.ResponseStreamEvent>) {
|
for await (const chunk of stream as Stream<OpenAI.Responses.ResponseStreamEvent>) {
|
||||||
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
|
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
|
||||||
break
|
break
|
||||||
@ -707,18 +725,18 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
|
|||||||
}
|
}
|
||||||
break
|
break
|
||||||
case 'response.completed': {
|
case 'response.completed': {
|
||||||
final_time_completion_millsec_delta = new Date().getTime() - start_time_millsec
|
|
||||||
const completion_tokens =
|
const completion_tokens =
|
||||||
(chunk.response.usage?.output_tokens || 0) +
|
(chunk.response.usage?.output_tokens || 0) +
|
||||||
(chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0)
|
(chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0)
|
||||||
const total_tokens =
|
const total_tokens =
|
||||||
(chunk.response.usage?.total_tokens || 0) +
|
(chunk.response.usage?.total_tokens || 0) +
|
||||||
(chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0)
|
(chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0)
|
||||||
lastUsage = {
|
finalUsage.completion_tokens += completion_tokens
|
||||||
completion_tokens,
|
finalUsage.prompt_tokens += chunk.response.usage?.input_tokens || 0
|
||||||
prompt_tokens: chunk.response.usage?.input_tokens || 0,
|
finalUsage.total_tokens += total_tokens
|
||||||
total_tokens
|
finalMetrics.completion_tokens += completion_tokens
|
||||||
}
|
finalMetrics.time_completion_millsec += new Date().getTime() - start_time_millsec
|
||||||
|
finalMetrics.time_first_token_millsec = time_first_token_millsec - start_time_millsec
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
case 'error':
|
case 'error':
|
||||||
@ -760,12 +778,8 @@ export abstract class BaseOpenAiProvider extends BaseProvider {
|
|||||||
onChunk({
|
onChunk({
|
||||||
type: ChunkType.BLOCK_COMPLETE,
|
type: ChunkType.BLOCK_COMPLETE,
|
||||||
response: {
|
response: {
|
||||||
usage: lastUsage,
|
usage: finalUsage,
|
||||||
metrics: {
|
metrics: finalMetrics
|
||||||
completion_tokens: lastUsage?.completion_tokens,
|
|
||||||
time_completion_millsec: final_time_completion_millsec_delta,
|
|
||||||
time_first_token_millsec: time_first_token_millsec - start_time_millsec
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@ -565,7 +565,7 @@ const fetchAndProcessAssistantResponseImpl = async (
|
|||||||
message: pauseErrorLanguagePlaceholder || error.message || 'Stream processing error',
|
message: pauseErrorLanguagePlaceholder || error.message || 'Stream processing error',
|
||||||
originalMessage: error.message,
|
originalMessage: error.message,
|
||||||
stack: error.stack,
|
stack: error.stack,
|
||||||
status: error.status,
|
status: error.status || error.code,
|
||||||
requestId: error.request_id
|
requestId: error.request_id
|
||||||
}
|
}
|
||||||
if (lastBlockId) {
|
if (lastBlockId) {
|
||||||
@ -609,13 +609,14 @@ const fetchAndProcessAssistantResponseImpl = async (
|
|||||||
// 更新topic的name
|
// 更新topic的name
|
||||||
autoRenameTopic(assistant, topicId)
|
autoRenameTopic(assistant, topicId)
|
||||||
|
|
||||||
if (response && !response.usage) {
|
if (response && response.usage?.total_tokens === 0) {
|
||||||
const usage = await estimateMessagesUsage({ assistant, messages: finalContextWithAssistant })
|
const usage = await estimateMessagesUsage({ assistant, messages: finalContextWithAssistant })
|
||||||
response.usage = usage
|
response.usage = usage
|
||||||
}
|
}
|
||||||
|
console.log('response', response)
|
||||||
}
|
}
|
||||||
if (response && response.metrics) {
|
if (response && response.metrics) {
|
||||||
if (!response.metrics.completion_tokens && response.usage) {
|
if (response.metrics.completion_tokens === 0 && response.usage?.completion_tokens) {
|
||||||
response = {
|
response = {
|
||||||
...response,
|
...response,
|
||||||
metrics: {
|
metrics: {
|
||||||
|
|||||||
@ -121,8 +121,8 @@ export type Usage = OpenAI.Completions.CompletionUsage & {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export type Metrics = {
|
export type Metrics = {
|
||||||
completion_tokens?: number
|
completion_tokens: number
|
||||||
time_completion_millsec?: number
|
time_completion_millsec: number
|
||||||
time_first_token_millsec?: number
|
time_first_token_millsec?: number
|
||||||
time_thinking_millsec?: number
|
time_thinking_millsec?: number
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user