mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-20 23:22:05 +08:00
fix(metrics): restore first token latency reporting (#10538)
This commit is contained in:
parent
fcf53f06ef
commit
8bec7640fa
@ -22,6 +22,8 @@ export class AiSdkToChunkAdapter {
|
|||||||
private accumulate: boolean | undefined
|
private accumulate: boolean | undefined
|
||||||
private isFirstChunk = true
|
private isFirstChunk = true
|
||||||
private enableWebSearch: boolean = false
|
private enableWebSearch: boolean = false
|
||||||
|
private responseStartTimestamp: number | null = null
|
||||||
|
private firstTokenTimestamp: number | null = null
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private onChunk: (chunk: Chunk) => void,
|
private onChunk: (chunk: Chunk) => void,
|
||||||
@ -34,6 +36,17 @@ export class AiSdkToChunkAdapter {
|
|||||||
this.enableWebSearch = enableWebSearch || false
|
this.enableWebSearch = enableWebSearch || false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private markFirstTokenIfNeeded() {
|
||||||
|
if (this.firstTokenTimestamp === null && this.responseStartTimestamp !== null) {
|
||||||
|
this.firstTokenTimestamp = Date.now()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private resetTimingState() {
|
||||||
|
this.responseStartTimestamp = null
|
||||||
|
this.firstTokenTimestamp = null
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 处理 AI SDK 流结果
|
* 处理 AI SDK 流结果
|
||||||
* @param aiSdkResult AI SDK 的流结果对象
|
* @param aiSdkResult AI SDK 的流结果对象
|
||||||
@ -61,6 +74,8 @@ export class AiSdkToChunkAdapter {
|
|||||||
webSearchResults: [],
|
webSearchResults: [],
|
||||||
reasoningId: ''
|
reasoningId: ''
|
||||||
}
|
}
|
||||||
|
this.resetTimingState()
|
||||||
|
this.responseStartTimestamp = Date.now()
|
||||||
// Reset link converter state at the start of stream
|
// Reset link converter state at the start of stream
|
||||||
this.isFirstChunk = true
|
this.isFirstChunk = true
|
||||||
|
|
||||||
@ -73,6 +88,7 @@ export class AiSdkToChunkAdapter {
|
|||||||
if (this.enableWebSearch) {
|
if (this.enableWebSearch) {
|
||||||
const remainingText = flushLinkConverterBuffer()
|
const remainingText = flushLinkConverterBuffer()
|
||||||
if (remainingText) {
|
if (remainingText) {
|
||||||
|
this.markFirstTokenIfNeeded()
|
||||||
this.onChunk({
|
this.onChunk({
|
||||||
type: ChunkType.TEXT_DELTA,
|
type: ChunkType.TEXT_DELTA,
|
||||||
text: remainingText
|
text: remainingText
|
||||||
@ -87,6 +103,7 @@ export class AiSdkToChunkAdapter {
|
|||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
reader.releaseLock()
|
reader.releaseLock()
|
||||||
|
this.resetTimingState()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -137,6 +154,7 @@ export class AiSdkToChunkAdapter {
|
|||||||
|
|
||||||
// Only emit chunk if there's text to send
|
// Only emit chunk if there's text to send
|
||||||
if (finalText) {
|
if (finalText) {
|
||||||
|
this.markFirstTokenIfNeeded()
|
||||||
this.onChunk({
|
this.onChunk({
|
||||||
type: ChunkType.TEXT_DELTA,
|
type: ChunkType.TEXT_DELTA,
|
||||||
text: this.accumulate ? final.text : finalText
|
text: this.accumulate ? final.text : finalText
|
||||||
@ -161,6 +179,9 @@ export class AiSdkToChunkAdapter {
|
|||||||
break
|
break
|
||||||
case 'reasoning-delta':
|
case 'reasoning-delta':
|
||||||
final.reasoningContent += chunk.text || ''
|
final.reasoningContent += chunk.text || ''
|
||||||
|
if (chunk.text) {
|
||||||
|
this.markFirstTokenIfNeeded()
|
||||||
|
}
|
||||||
this.onChunk({
|
this.onChunk({
|
||||||
type: ChunkType.THINKING_DELTA,
|
type: ChunkType.THINKING_DELTA,
|
||||||
text: final.reasoningContent || ''
|
text: final.reasoningContent || ''
|
||||||
@ -260,44 +281,37 @@ export class AiSdkToChunkAdapter {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
case 'finish':
|
case 'finish': {
|
||||||
|
const usage = {
|
||||||
|
completion_tokens: chunk.totalUsage?.outputTokens || 0,
|
||||||
|
prompt_tokens: chunk.totalUsage?.inputTokens || 0,
|
||||||
|
total_tokens: chunk.totalUsage?.totalTokens || 0
|
||||||
|
}
|
||||||
|
const metrics = this.buildMetrics(chunk.totalUsage)
|
||||||
|
const baseResponse = {
|
||||||
|
text: final.text || '',
|
||||||
|
reasoning_content: final.reasoningContent || ''
|
||||||
|
}
|
||||||
|
|
||||||
this.onChunk({
|
this.onChunk({
|
||||||
type: ChunkType.BLOCK_COMPLETE,
|
type: ChunkType.BLOCK_COMPLETE,
|
||||||
response: {
|
response: {
|
||||||
text: final.text || '',
|
...baseResponse,
|
||||||
reasoning_content: final.reasoningContent || '',
|
usage: { ...usage },
|
||||||
usage: {
|
metrics: metrics ? { ...metrics } : undefined
|
||||||
completion_tokens: chunk.totalUsage.outputTokens || 0,
|
|
||||||
prompt_tokens: chunk.totalUsage.inputTokens || 0,
|
|
||||||
total_tokens: chunk.totalUsage.totalTokens || 0
|
|
||||||
},
|
|
||||||
metrics: chunk.totalUsage
|
|
||||||
? {
|
|
||||||
completion_tokens: chunk.totalUsage.outputTokens || 0,
|
|
||||||
time_completion_millsec: 0
|
|
||||||
}
|
|
||||||
: undefined
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
this.onChunk({
|
this.onChunk({
|
||||||
type: ChunkType.LLM_RESPONSE_COMPLETE,
|
type: ChunkType.LLM_RESPONSE_COMPLETE,
|
||||||
response: {
|
response: {
|
||||||
text: final.text || '',
|
...baseResponse,
|
||||||
reasoning_content: final.reasoningContent || '',
|
usage: { ...usage },
|
||||||
usage: {
|
metrics: metrics ? { ...metrics } : undefined
|
||||||
completion_tokens: chunk.totalUsage.outputTokens || 0,
|
|
||||||
prompt_tokens: chunk.totalUsage.inputTokens || 0,
|
|
||||||
total_tokens: chunk.totalUsage.totalTokens || 0
|
|
||||||
},
|
|
||||||
metrics: chunk.totalUsage
|
|
||||||
? {
|
|
||||||
completion_tokens: chunk.totalUsage.outputTokens || 0,
|
|
||||||
time_completion_millsec: 0
|
|
||||||
}
|
|
||||||
: undefined
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
this.resetTimingState()
|
||||||
break
|
break
|
||||||
|
}
|
||||||
|
|
||||||
// === 源和文件相关事件 ===
|
// === 源和文件相关事件 ===
|
||||||
case 'source':
|
case 'source':
|
||||||
@ -333,6 +347,34 @@ export class AiSdkToChunkAdapter {
|
|||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private buildMetrics(totalUsage?: {
|
||||||
|
inputTokens?: number | null
|
||||||
|
outputTokens?: number | null
|
||||||
|
totalTokens?: number | null
|
||||||
|
}) {
|
||||||
|
if (!totalUsage) {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
const completionTokens = totalUsage.outputTokens ?? 0
|
||||||
|
const now = Date.now()
|
||||||
|
const start = this.responseStartTimestamp ?? now
|
||||||
|
const firstToken = this.firstTokenTimestamp
|
||||||
|
const timeFirstToken = Math.max(firstToken != null ? firstToken - start : 0, 0)
|
||||||
|
const baseForCompletion = firstToken ?? start
|
||||||
|
let timeCompletion = Math.max(now - baseForCompletion, 0)
|
||||||
|
|
||||||
|
if (timeCompletion === 0 && completionTokens > 0) {
|
||||||
|
timeCompletion = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
completion_tokens: completionTokens,
|
||||||
|
time_first_token_millsec: timeFirstToken,
|
||||||
|
time_completion_millsec: timeCompletion
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export default AiSdkToChunkAdapter
|
export default AiSdkToChunkAdapter
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user