fix(metrics): restore first token latency reporting (#10538)

2025-12-20 15:10:59 +08:00 · 2025-10-06 22:19:09 +08:00 · 2025-10-06 22:19:09 +08:00 · 8bec7640fa
commit 8bec7640fa
parent fcf53f06ef
1 changed files with 69 additions and 27 deletions
--- a/src/renderer/src/aiCore/chunk/AiSdkToChunkAdapter.ts
+++ b/src/renderer/src/aiCore/chunk/AiSdkToChunkAdapter.ts
@ -22,6 +22,8 @@ export class AiSdkToChunkAdapter {
  private accumulate: boolean | undefined
  private isFirstChunk = true
  private enableWebSearch: boolean = false
+  private responseStartTimestamp: number | null = null
+  private firstTokenTimestamp: number | null = null

  constructor(
    private onChunk: (chunk: Chunk) => void,
@ -34,6 +36,17 @@ export class AiSdkToChunkAdapter {
    this.enableWebSearch = enableWebSearch || false
  }

+  private markFirstTokenIfNeeded() {
+    if (this.firstTokenTimestamp === null && this.responseStartTimestamp !== null) {
+      this.firstTokenTimestamp = Date.now()
+    }
+  }
+
+  private resetTimingState() {
+    this.responseStartTimestamp = null
+    this.firstTokenTimestamp = null
+  }
+
  /**
   * 处理 AI SDK 流结果
   * @param aiSdkResult AI SDK 的流结果对象
@ -61,6 +74,8 @@ export class AiSdkToChunkAdapter {
      webSearchResults: [],
      reasoningId: ''
    }
+    this.resetTimingState()
+    this.responseStartTimestamp = Date.now()
    // Reset link converter state at the start of stream
    this.isFirstChunk = true

@ -73,6 +88,7 @@ export class AiSdkToChunkAdapter {
          if (this.enableWebSearch) {
            const remainingText = flushLinkConverterBuffer()
            if (remainingText) {
+              this.markFirstTokenIfNeeded()
              this.onChunk({
                type: ChunkType.TEXT_DELTA,
                text: remainingText
@ -87,6 +103,7 @@ export class AiSdkToChunkAdapter {
      }
    } finally {
      reader.releaseLock()
+      this.resetTimingState()
    }
  }

@ -137,6 +154,7 @@ export class AiSdkToChunkAdapter {

        // Only emit chunk if there's text to send
        if (finalText) {
+          this.markFirstTokenIfNeeded()
          this.onChunk({
            type: ChunkType.TEXT_DELTA,
            text: this.accumulate ? final.text : finalText
@ -161,6 +179,9 @@ export class AiSdkToChunkAdapter {
        break
      case 'reasoning-delta':
        final.reasoningContent += chunk.text || ''
+        if (chunk.text) {
+          this.markFirstTokenIfNeeded()
+        }
        this.onChunk({
          type: ChunkType.THINKING_DELTA,
          text: final.reasoningContent || ''
@ -260,44 +281,37 @@ export class AiSdkToChunkAdapter {
        break
      }

-      case 'finish':
+      case 'finish': {
+        const usage = {
+          completion_tokens: chunk.totalUsage?.outputTokens || 0,
+          prompt_tokens: chunk.totalUsage?.inputTokens || 0,
+          total_tokens: chunk.totalUsage?.totalTokens || 0
+        }
+        const metrics = this.buildMetrics(chunk.totalUsage)
+        const baseResponse = {
+          text: final.text || '',
+          reasoning_content: final.reasoningContent || ''
+        }
+
        this.onChunk({
          type: ChunkType.BLOCK_COMPLETE,
          response: {
-            text: final.text || '',
-            reasoning_content: final.reasoningContent || '',
-            usage: {
-              completion_tokens: chunk.totalUsage.outputTokens || 0,
-              prompt_tokens: chunk.totalUsage.inputTokens || 0,
-              total_tokens: chunk.totalUsage.totalTokens || 0
-            },
-            metrics: chunk.totalUsage
-              ? {
-                  completion_tokens: chunk.totalUsage.outputTokens || 0,
-                  time_completion_millsec: 0
-                }
-              : undefined
+            ...baseResponse,
+            usage: { ...usage },
+            metrics: metrics ? { ...metrics } : undefined
          }
        })
        this.onChunk({
          type: ChunkType.LLM_RESPONSE_COMPLETE,
          response: {
-            text: final.text || '',
-            reasoning_content: final.reasoningContent || '',
-            usage: {
-              completion_tokens: chunk.totalUsage.outputTokens || 0,
-              prompt_tokens: chunk.totalUsage.inputTokens || 0,
-              total_tokens: chunk.totalUsage.totalTokens || 0
-            },
-            metrics: chunk.totalUsage
-              ? {
-                  completion_tokens: chunk.totalUsage.outputTokens || 0,
-                  time_completion_millsec: 0
-                }
-              : undefined
+            ...baseResponse,
+            usage: { ...usage },
+            metrics: metrics ? { ...metrics } : undefined
          }
        })
+        this.resetTimingState()
        break
+      }

      // === 源和文件相关事件 ===
      case 'source':
@ -333,6 +347,34 @@ export class AiSdkToChunkAdapter {
      default:
    }
  }
+
+  private buildMetrics(totalUsage?: {
+    inputTokens?: number | null
+    outputTokens?: number | null
+    totalTokens?: number | null
+  }) {
+    if (!totalUsage) {
+      return undefined
+    }
+
+    const completionTokens = totalUsage.outputTokens ?? 0
+    const now = Date.now()
+    const start = this.responseStartTimestamp ?? now
+    const firstToken = this.firstTokenTimestamp
+    const timeFirstToken = Math.max(firstToken != null ? firstToken - start : 0, 0)
+    const baseForCompletion = firstToken ?? start
+    let timeCompletion = Math.max(now - baseForCompletion, 0)
+
+    if (timeCompletion === 0 && completionTokens > 0) {
+      timeCompletion = 1
+    }
+
+    return {
+      completion_tokens: completionTokens,
+      time_first_token_millsec: timeFirstToken,
+      time_completion_millsec: timeCompletion
+    }
+  }
 }

 export default AiSdkToChunkAdapter