fix(metrics): restore first token latency reporting (#10538)

This commit is contained in:
rebecca554owen 2025-10-06 22:19:09 +08:00 committed by GitHub
parent fcf53f06ef
commit 8bec7640fa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -22,6 +22,8 @@ export class AiSdkToChunkAdapter {
private accumulate: boolean | undefined private accumulate: boolean | undefined
private isFirstChunk = true private isFirstChunk = true
private enableWebSearch: boolean = false private enableWebSearch: boolean = false
private responseStartTimestamp: number | null = null
private firstTokenTimestamp: number | null = null
constructor( constructor(
private onChunk: (chunk: Chunk) => void, private onChunk: (chunk: Chunk) => void,
@ -34,6 +36,17 @@ export class AiSdkToChunkAdapter {
this.enableWebSearch = enableWebSearch || false this.enableWebSearch = enableWebSearch || false
} }
private markFirstTokenIfNeeded() {
if (this.firstTokenTimestamp === null && this.responseStartTimestamp !== null) {
this.firstTokenTimestamp = Date.now()
}
}
private resetTimingState() {
this.responseStartTimestamp = null
this.firstTokenTimestamp = null
}
/** /**
* AI SDK * AI SDK
* @param aiSdkResult AI SDK * @param aiSdkResult AI SDK
@ -61,6 +74,8 @@ export class AiSdkToChunkAdapter {
webSearchResults: [], webSearchResults: [],
reasoningId: '' reasoningId: ''
} }
this.resetTimingState()
this.responseStartTimestamp = Date.now()
// Reset link converter state at the start of stream // Reset link converter state at the start of stream
this.isFirstChunk = true this.isFirstChunk = true
@ -73,6 +88,7 @@ export class AiSdkToChunkAdapter {
if (this.enableWebSearch) { if (this.enableWebSearch) {
const remainingText = flushLinkConverterBuffer() const remainingText = flushLinkConverterBuffer()
if (remainingText) { if (remainingText) {
this.markFirstTokenIfNeeded()
this.onChunk({ this.onChunk({
type: ChunkType.TEXT_DELTA, type: ChunkType.TEXT_DELTA,
text: remainingText text: remainingText
@ -87,6 +103,7 @@ export class AiSdkToChunkAdapter {
} }
} finally { } finally {
reader.releaseLock() reader.releaseLock()
this.resetTimingState()
} }
} }
@ -137,6 +154,7 @@ export class AiSdkToChunkAdapter {
// Only emit chunk if there's text to send // Only emit chunk if there's text to send
if (finalText) { if (finalText) {
this.markFirstTokenIfNeeded()
this.onChunk({ this.onChunk({
type: ChunkType.TEXT_DELTA, type: ChunkType.TEXT_DELTA,
text: this.accumulate ? final.text : finalText text: this.accumulate ? final.text : finalText
@ -161,6 +179,9 @@ export class AiSdkToChunkAdapter {
break break
case 'reasoning-delta': case 'reasoning-delta':
final.reasoningContent += chunk.text || '' final.reasoningContent += chunk.text || ''
if (chunk.text) {
this.markFirstTokenIfNeeded()
}
this.onChunk({ this.onChunk({
type: ChunkType.THINKING_DELTA, type: ChunkType.THINKING_DELTA,
text: final.reasoningContent || '' text: final.reasoningContent || ''
@ -260,44 +281,37 @@ export class AiSdkToChunkAdapter {
break break
} }
case 'finish': case 'finish': {
const usage = {
completion_tokens: chunk.totalUsage?.outputTokens || 0,
prompt_tokens: chunk.totalUsage?.inputTokens || 0,
total_tokens: chunk.totalUsage?.totalTokens || 0
}
const metrics = this.buildMetrics(chunk.totalUsage)
const baseResponse = {
text: final.text || '',
reasoning_content: final.reasoningContent || ''
}
this.onChunk({ this.onChunk({
type: ChunkType.BLOCK_COMPLETE, type: ChunkType.BLOCK_COMPLETE,
response: { response: {
text: final.text || '', ...baseResponse,
reasoning_content: final.reasoningContent || '', usage: { ...usage },
usage: { metrics: metrics ? { ...metrics } : undefined
completion_tokens: chunk.totalUsage.outputTokens || 0,
prompt_tokens: chunk.totalUsage.inputTokens || 0,
total_tokens: chunk.totalUsage.totalTokens || 0
},
metrics: chunk.totalUsage
? {
completion_tokens: chunk.totalUsage.outputTokens || 0,
time_completion_millsec: 0
}
: undefined
} }
}) })
this.onChunk({ this.onChunk({
type: ChunkType.LLM_RESPONSE_COMPLETE, type: ChunkType.LLM_RESPONSE_COMPLETE,
response: { response: {
text: final.text || '', ...baseResponse,
reasoning_content: final.reasoningContent || '', usage: { ...usage },
usage: { metrics: metrics ? { ...metrics } : undefined
completion_tokens: chunk.totalUsage.outputTokens || 0,
prompt_tokens: chunk.totalUsage.inputTokens || 0,
total_tokens: chunk.totalUsage.totalTokens || 0
},
metrics: chunk.totalUsage
? {
completion_tokens: chunk.totalUsage.outputTokens || 0,
time_completion_millsec: 0
}
: undefined
} }
}) })
this.resetTimingState()
break break
}
// === 源和文件相关事件 === // === 源和文件相关事件 ===
case 'source': case 'source':
@ -333,6 +347,34 @@ export class AiSdkToChunkAdapter {
default: default:
} }
} }
private buildMetrics(totalUsage?: {
inputTokens?: number | null
outputTokens?: number | null
totalTokens?: number | null
}) {
if (!totalUsage) {
return undefined
}
const completionTokens = totalUsage.outputTokens ?? 0
const now = Date.now()
const start = this.responseStartTimestamp ?? now
const firstToken = this.firstTokenTimestamp
const timeFirstToken = Math.max(firstToken != null ? firstToken - start : 0, 0)
const baseForCompletion = firstToken ?? start
let timeCompletion = Math.max(now - baseForCompletion, 0)
if (timeCompletion === 0 && completionTokens > 0) {
timeCompletion = 1
}
return {
completion_tokens: completionTokens,
time_first_token_millsec: timeFirstToken,
time_completion_millsec: timeCompletion
}
}
} }
export default AiSdkToChunkAdapter export default AiSdkToChunkAdapter