From c1f1d7996d56a7a4f1486b3eafc33a07dca8e5a9 Mon Sep 17 00:00:00 2001 From: SuYao Date: Sat, 22 Nov 2025 21:43:57 +0800 Subject: [PATCH] test: add thinking budget token test (#11305) * refactor: add thinking budget token test * fix comment --- .../aiCore/prepareParams/parameterBuilder.ts | 6 +- .../src/aiCore/trace/AiSdkSpanAdapter.ts | 10 +-- .../aiCore/utils/__tests__/reasoning.test.ts | 87 +++++++++++++++++++ src/renderer/src/aiCore/utils/reasoning.ts | 23 +++-- .../src/config/__test__/reasoning.test.ts | 33 +++++++ tests/renderer.setup.ts | 5 ++ 6 files changed, 151 insertions(+), 13 deletions(-) create mode 100644 src/renderer/src/aiCore/utils/__tests__/reasoning.test.ts diff --git a/src/renderer/src/aiCore/prepareParams/parameterBuilder.ts b/src/renderer/src/aiCore/prepareParams/parameterBuilder.ts index 6f8747a7c5..4208907236 100644 --- a/src/renderer/src/aiCore/prepareParams/parameterBuilder.ts +++ b/src/renderer/src/aiCore/prepareParams/parameterBuilder.ts @@ -123,7 +123,11 @@ export async function buildStreamTextParams( isSupportedThinkingTokenClaudeModel(model) && (provider.type === 'anthropic' || provider.type === 'aws-bedrock') ) { - maxTokens -= getAnthropicThinkingBudget(assistant, model) + const { reasoning_effort: reasoningEffort } = getAssistantSettings(assistant) + const budget = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id) + if (budget) { + maxTokens -= budget + } } let webSearchPluginConfig: WebSearchPluginConfig | undefined = undefined diff --git a/src/renderer/src/aiCore/trace/AiSdkSpanAdapter.ts b/src/renderer/src/aiCore/trace/AiSdkSpanAdapter.ts index 732397de40..f3df504de8 100644 --- a/src/renderer/src/aiCore/trace/AiSdkSpanAdapter.ts +++ b/src/renderer/src/aiCore/trace/AiSdkSpanAdapter.ts @@ -133,7 +133,7 @@ export class AiSdkSpanAdapter { // 详细记录转换过程 const operationId = attributes['ai.operationId'] - logger.info('Converting AI SDK span to SpanEntity', { + logger.debug('Converting AI SDK span to SpanEntity', { spanName: spanName, operationId, spanTag, @@ -149,7 +149,7 @@ export class AiSdkSpanAdapter { }) if (tokenUsage) { - logger.info('Token usage data found', { + logger.debug('Token usage data found', { spanName: spanName, operationId, usage: tokenUsage, @@ -158,7 +158,7 @@ export class AiSdkSpanAdapter { } if (inputs || outputs) { - logger.info('Input/Output data extracted', { + logger.debug('Input/Output data extracted', { spanName: spanName, operationId, hasInputs: !!inputs, @@ -170,7 +170,7 @@ export class AiSdkSpanAdapter { } if (Object.keys(typeSpecificData).length > 0) { - logger.info('Type-specific data extracted', { + logger.debug('Type-specific data extracted', { spanName: spanName, operationId, typeSpecificKeys: Object.keys(typeSpecificData), @@ -204,7 +204,7 @@ export class AiSdkSpanAdapter { modelName: modelName || this.extractModelFromAttributes(attributes) } - logger.info('AI SDK span successfully converted to SpanEntity', { + logger.debug('AI SDK span successfully converted to SpanEntity', { spanName: spanName, operationId, spanId: spanContext.spanId, diff --git a/src/renderer/src/aiCore/utils/__tests__/reasoning.test.ts b/src/renderer/src/aiCore/utils/__tests__/reasoning.test.ts new file mode 100644 index 0000000000..4561414c11 --- /dev/null +++ b/src/renderer/src/aiCore/utils/__tests__/reasoning.test.ts @@ -0,0 +1,87 @@ +import * as models from '@renderer/config/models' +import { beforeEach, describe, expect, it, vi } from 'vitest' + +import { getAnthropicThinkingBudget } from '../reasoning' + +vi.mock('@renderer/store', () => ({ + default: { + getState: () => ({ + llm: { + providers: [] + }, + settings: {} + }) + }, + useAppDispatch: () => vi.fn(), + useAppSelector: () => vi.fn() +})) + +vi.mock('@renderer/hooks/useSettings', () => ({ + getStoreSetting: () => undefined, + useSettings: () => ({}) +})) + +vi.mock('@renderer/services/AssistantService', () => ({ + getAssistantSettings: () => ({ maxTokens: undefined }), + getProviderByModel: () => ({ id: '' }) +})) + +describe('reasoning utils', () => { + describe('getAnthropicThinkingBudget', () => { + const findTokenLimitSpy = vi.spyOn(models, 'findTokenLimit') + const applyTokenLimit = (limit?: { min: number; max: number }) => findTokenLimitSpy.mockReturnValueOnce(limit) + + beforeEach(() => { + findTokenLimitSpy.mockReset() + }) + + it('returns undefined when reasoningEffort is undefined', () => { + const result = getAnthropicThinkingBudget(8000, undefined, 'claude-model') + expect(result).toBe(undefined) + expect(findTokenLimitSpy).not.toHaveBeenCalled() + }) + + it('returns undefined when tokenLimit is not found', () => { + const unknownId = 'unknown-model' + applyTokenLimit(undefined) + const result = getAnthropicThinkingBudget(8000, 'medium', unknownId) + expect(result).toBe(undefined) + expect(findTokenLimitSpy).toHaveBeenCalledWith(unknownId) + }) + + it('uses DEFAULT_MAX_TOKENS when maxTokens is undefined', () => { + applyTokenLimit({ min: 1000, max: 10_000 }) + const result = getAnthropicThinkingBudget(undefined, 'medium', 'claude-model') + expect(result).toBe(2048) + expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model') + }) + + it('respects maxTokens limit when lower than token limit', () => { + applyTokenLimit({ min: 1000, max: 10_000 }) + const result = getAnthropicThinkingBudget(8000, 'medium', 'claude-model') + expect(result).toBe(4000) + expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model') + }) + + it('caps to token limit when lower than maxTokens budget', () => { + applyTokenLimit({ min: 1000, max: 5000 }) + const result = getAnthropicThinkingBudget(100_000, 'high', 'claude-model') + expect(result).toBe(4200) + expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model') + }) + + it('enforces minimum budget of 1024', () => { + applyTokenLimit({ min: 0, max: 500 }) + const result = getAnthropicThinkingBudget(200, 'low', 'claude-model') + expect(result).toBe(1024) + expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model') + }) + + it('respects large token limits when maxTokens is high', () => { + applyTokenLimit({ min: 1024, max: 64_000 }) + const result = getAnthropicThinkingBudget(64_000, 'high', 'claude-model') + expect(result).toBe(51_200) + expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model') + }) + }) +}) diff --git a/src/renderer/src/aiCore/utils/reasoning.ts b/src/renderer/src/aiCore/utils/reasoning.ts index f261f71a7a..270f5aac7e 100644 --- a/src/renderer/src/aiCore/utils/reasoning.ts +++ b/src/renderer/src/aiCore/utils/reasoning.ts @@ -393,19 +393,26 @@ export function getOpenAIReasoningParams( return {} } -export function getAnthropicThinkingBudget(assistant: Assistant, model: Model): number { - const { maxTokens, reasoning_effort: reasoningEffort } = getAssistantSettings(assistant) +export function getAnthropicThinkingBudget( + maxTokens: number | undefined, + reasoningEffort: string | undefined, + modelId: string +): number | undefined { if (reasoningEffort === undefined || reasoningEffort === 'none') { - return 0 + return undefined } const effortRatio = EFFORT_RATIO[reasoningEffort] + const tokenLimit = findTokenLimit(modelId) + if (!tokenLimit) { + return undefined + } + const budgetTokens = Math.max( 1024, Math.floor( Math.min( - (findTokenLimit(model.id)?.max! - findTokenLimit(model.id)?.min!) * effortRatio + - findTokenLimit(model.id)?.min!, + (tokenLimit.max - tokenLimit.min) * effortRatio + tokenLimit.min, (maxTokens || DEFAULT_MAX_TOKENS) * effortRatio ) ) @@ -437,7 +444,8 @@ export function getAnthropicReasoningParams( // Claude 推理参数 if (isSupportedThinkingTokenClaudeModel(model)) { - const budgetTokens = getAnthropicThinkingBudget(assistant, model) + const { maxTokens } = getAssistantSettings(assistant) + const budgetTokens = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id) return { thinking: { @@ -560,7 +568,8 @@ export function getBedrockReasoningParams( return {} } - const budgetTokens = getAnthropicThinkingBudget(assistant, model) + const { maxTokens } = getAssistantSettings(assistant) + const budgetTokens = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id) return { reasoningConfig: { type: 'enabled', diff --git a/src/renderer/src/config/__test__/reasoning.test.ts b/src/renderer/src/config/__test__/reasoning.test.ts index 006fc79d49..f702d33d10 100644 --- a/src/renderer/src/config/__test__/reasoning.test.ts +++ b/src/renderer/src/config/__test__/reasoning.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it, vi } from 'vitest' import { + findTokenLimit, isDoubaoSeedAfter251015, isDoubaoThinkingAutoModel, isGeminiReasoningModel, @@ -518,3 +519,35 @@ describe('Gemini Models', () => { }) }) }) + +describe('findTokenLimit', () => { + const cases: Array<{ modelId: string; expected: { min: number; max: number } }> = [ + { modelId: 'gemini-2.5-flash-lite-exp', expected: { min: 512, max: 24_576 } }, + { modelId: 'gemini-1.5-flash', expected: { min: 0, max: 24_576 } }, + { modelId: 'gemini-1.5-pro-001', expected: { min: 128, max: 32_768 } }, + { modelId: 'qwen3-235b-a22b-thinking-2507', expected: { min: 0, max: 81_920 } }, + { modelId: 'qwen3-30b-a3b-thinking-2507', expected: { min: 0, max: 81_920 } }, + { modelId: 'qwen3-vl-235b-a22b-thinking', expected: { min: 0, max: 81_920 } }, + { modelId: 'qwen3-vl-30b-a3b-thinking', expected: { min: 0, max: 81_920 } }, + { modelId: 'qwen-plus-2025-07-14', expected: { min: 0, max: 38_912 } }, + { modelId: 'qwen-plus-2025-04-28', expected: { min: 0, max: 38_912 } }, + { modelId: 'qwen3-1.7b', expected: { min: 0, max: 30_720 } }, + { modelId: 'qwen3-0.6b', expected: { min: 0, max: 30_720 } }, + { modelId: 'qwen-plus-ultra', expected: { min: 0, max: 81_920 } }, + { modelId: 'qwen-turbo-pro', expected: { min: 0, max: 38_912 } }, + { modelId: 'qwen-flash-lite', expected: { min: 0, max: 81_920 } }, + { modelId: 'qwen3-7b', expected: { min: 1_024, max: 38_912 } }, + { modelId: 'claude-3.7-sonnet-extended', expected: { min: 1_024, max: 64_000 } }, + { modelId: 'claude-sonnet-4.1', expected: { min: 1_024, max: 64_000 } }, + { modelId: 'claude-sonnet-4-5-20250929', expected: { min: 1_024, max: 64_000 } }, + { modelId: 'claude-opus-4-1-extended', expected: { min: 1_024, max: 32_000 } } + ] + + it.each(cases)('returns correct limits for $modelId', ({ modelId, expected }) => { + expect(findTokenLimit(modelId)).toEqual(expected) + }) + + it('returns undefined for unknown models', () => { + expect(findTokenLimit('unknown-model')).toBeUndefined() + }) +}) diff --git a/tests/renderer.setup.ts b/tests/renderer.setup.ts index fab761fae3..bd62271285 100644 --- a/tests/renderer.setup.ts +++ b/tests/renderer.setup.ts @@ -14,6 +14,11 @@ vi.mock('@logger', async () => { } }) +// Mock uuid globally for renderer tests +vi.mock('uuid', () => ({ + v4: () => 'test-uuid-' + Date.now() +})) + vi.mock('axios', () => { const defaultAxiosMock = { get: vi.fn().mockResolvedValue({ data: {} }), // Mocking axios GET request