mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-26 11:44:28 +08:00
test: add thinking budget token test (#11305)
* refactor: add thinking budget token test * fix comment
This commit is contained in:
parent
0a72c613af
commit
c1f1d7996d
@ -123,7 +123,11 @@ export async function buildStreamTextParams(
|
||||
isSupportedThinkingTokenClaudeModel(model) &&
|
||||
(provider.type === 'anthropic' || provider.type === 'aws-bedrock')
|
||||
) {
|
||||
maxTokens -= getAnthropicThinkingBudget(assistant, model)
|
||||
const { reasoning_effort: reasoningEffort } = getAssistantSettings(assistant)
|
||||
const budget = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id)
|
||||
if (budget) {
|
||||
maxTokens -= budget
|
||||
}
|
||||
}
|
||||
|
||||
let webSearchPluginConfig: WebSearchPluginConfig | undefined = undefined
|
||||
|
||||
@ -133,7 +133,7 @@ export class AiSdkSpanAdapter {
|
||||
|
||||
// 详细记录转换过程
|
||||
const operationId = attributes['ai.operationId']
|
||||
logger.info('Converting AI SDK span to SpanEntity', {
|
||||
logger.debug('Converting AI SDK span to SpanEntity', {
|
||||
spanName: spanName,
|
||||
operationId,
|
||||
spanTag,
|
||||
@ -149,7 +149,7 @@ export class AiSdkSpanAdapter {
|
||||
})
|
||||
|
||||
if (tokenUsage) {
|
||||
logger.info('Token usage data found', {
|
||||
logger.debug('Token usage data found', {
|
||||
spanName: spanName,
|
||||
operationId,
|
||||
usage: tokenUsage,
|
||||
@ -158,7 +158,7 @@ export class AiSdkSpanAdapter {
|
||||
}
|
||||
|
||||
if (inputs || outputs) {
|
||||
logger.info('Input/Output data extracted', {
|
||||
logger.debug('Input/Output data extracted', {
|
||||
spanName: spanName,
|
||||
operationId,
|
||||
hasInputs: !!inputs,
|
||||
@ -170,7 +170,7 @@ export class AiSdkSpanAdapter {
|
||||
}
|
||||
|
||||
if (Object.keys(typeSpecificData).length > 0) {
|
||||
logger.info('Type-specific data extracted', {
|
||||
logger.debug('Type-specific data extracted', {
|
||||
spanName: spanName,
|
||||
operationId,
|
||||
typeSpecificKeys: Object.keys(typeSpecificData),
|
||||
@ -204,7 +204,7 @@ export class AiSdkSpanAdapter {
|
||||
modelName: modelName || this.extractModelFromAttributes(attributes)
|
||||
}
|
||||
|
||||
logger.info('AI SDK span successfully converted to SpanEntity', {
|
||||
logger.debug('AI SDK span successfully converted to SpanEntity', {
|
||||
spanName: spanName,
|
||||
operationId,
|
||||
spanId: spanContext.spanId,
|
||||
|
||||
87
src/renderer/src/aiCore/utils/__tests__/reasoning.test.ts
Normal file
87
src/renderer/src/aiCore/utils/__tests__/reasoning.test.ts
Normal file
@ -0,0 +1,87 @@
|
||||
import * as models from '@renderer/config/models'
|
||||
import { beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import { getAnthropicThinkingBudget } from '../reasoning'
|
||||
|
||||
vi.mock('@renderer/store', () => ({
|
||||
default: {
|
||||
getState: () => ({
|
||||
llm: {
|
||||
providers: []
|
||||
},
|
||||
settings: {}
|
||||
})
|
||||
},
|
||||
useAppDispatch: () => vi.fn(),
|
||||
useAppSelector: () => vi.fn()
|
||||
}))
|
||||
|
||||
vi.mock('@renderer/hooks/useSettings', () => ({
|
||||
getStoreSetting: () => undefined,
|
||||
useSettings: () => ({})
|
||||
}))
|
||||
|
||||
vi.mock('@renderer/services/AssistantService', () => ({
|
||||
getAssistantSettings: () => ({ maxTokens: undefined }),
|
||||
getProviderByModel: () => ({ id: '' })
|
||||
}))
|
||||
|
||||
describe('reasoning utils', () => {
|
||||
describe('getAnthropicThinkingBudget', () => {
|
||||
const findTokenLimitSpy = vi.spyOn(models, 'findTokenLimit')
|
||||
const applyTokenLimit = (limit?: { min: number; max: number }) => findTokenLimitSpy.mockReturnValueOnce(limit)
|
||||
|
||||
beforeEach(() => {
|
||||
findTokenLimitSpy.mockReset()
|
||||
})
|
||||
|
||||
it('returns undefined when reasoningEffort is undefined', () => {
|
||||
const result = getAnthropicThinkingBudget(8000, undefined, 'claude-model')
|
||||
expect(result).toBe(undefined)
|
||||
expect(findTokenLimitSpy).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('returns undefined when tokenLimit is not found', () => {
|
||||
const unknownId = 'unknown-model'
|
||||
applyTokenLimit(undefined)
|
||||
const result = getAnthropicThinkingBudget(8000, 'medium', unknownId)
|
||||
expect(result).toBe(undefined)
|
||||
expect(findTokenLimitSpy).toHaveBeenCalledWith(unknownId)
|
||||
})
|
||||
|
||||
it('uses DEFAULT_MAX_TOKENS when maxTokens is undefined', () => {
|
||||
applyTokenLimit({ min: 1000, max: 10_000 })
|
||||
const result = getAnthropicThinkingBudget(undefined, 'medium', 'claude-model')
|
||||
expect(result).toBe(2048)
|
||||
expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
|
||||
})
|
||||
|
||||
it('respects maxTokens limit when lower than token limit', () => {
|
||||
applyTokenLimit({ min: 1000, max: 10_000 })
|
||||
const result = getAnthropicThinkingBudget(8000, 'medium', 'claude-model')
|
||||
expect(result).toBe(4000)
|
||||
expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
|
||||
})
|
||||
|
||||
it('caps to token limit when lower than maxTokens budget', () => {
|
||||
applyTokenLimit({ min: 1000, max: 5000 })
|
||||
const result = getAnthropicThinkingBudget(100_000, 'high', 'claude-model')
|
||||
expect(result).toBe(4200)
|
||||
expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
|
||||
})
|
||||
|
||||
it('enforces minimum budget of 1024', () => {
|
||||
applyTokenLimit({ min: 0, max: 500 })
|
||||
const result = getAnthropicThinkingBudget(200, 'low', 'claude-model')
|
||||
expect(result).toBe(1024)
|
||||
expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
|
||||
})
|
||||
|
||||
it('respects large token limits when maxTokens is high', () => {
|
||||
applyTokenLimit({ min: 1024, max: 64_000 })
|
||||
const result = getAnthropicThinkingBudget(64_000, 'high', 'claude-model')
|
||||
expect(result).toBe(51_200)
|
||||
expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
|
||||
})
|
||||
})
|
||||
})
|
||||
@ -393,19 +393,26 @@ export function getOpenAIReasoningParams(
|
||||
return {}
|
||||
}
|
||||
|
||||
export function getAnthropicThinkingBudget(assistant: Assistant, model: Model): number {
|
||||
const { maxTokens, reasoning_effort: reasoningEffort } = getAssistantSettings(assistant)
|
||||
export function getAnthropicThinkingBudget(
|
||||
maxTokens: number | undefined,
|
||||
reasoningEffort: string | undefined,
|
||||
modelId: string
|
||||
): number | undefined {
|
||||
if (reasoningEffort === undefined || reasoningEffort === 'none') {
|
||||
return 0
|
||||
return undefined
|
||||
}
|
||||
const effortRatio = EFFORT_RATIO[reasoningEffort]
|
||||
|
||||
const tokenLimit = findTokenLimit(modelId)
|
||||
if (!tokenLimit) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
const budgetTokens = Math.max(
|
||||
1024,
|
||||
Math.floor(
|
||||
Math.min(
|
||||
(findTokenLimit(model.id)?.max! - findTokenLimit(model.id)?.min!) * effortRatio +
|
||||
findTokenLimit(model.id)?.min!,
|
||||
(tokenLimit.max - tokenLimit.min) * effortRatio + tokenLimit.min,
|
||||
(maxTokens || DEFAULT_MAX_TOKENS) * effortRatio
|
||||
)
|
||||
)
|
||||
@ -437,7 +444,8 @@ export function getAnthropicReasoningParams(
|
||||
|
||||
// Claude 推理参数
|
||||
if (isSupportedThinkingTokenClaudeModel(model)) {
|
||||
const budgetTokens = getAnthropicThinkingBudget(assistant, model)
|
||||
const { maxTokens } = getAssistantSettings(assistant)
|
||||
const budgetTokens = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id)
|
||||
|
||||
return {
|
||||
thinking: {
|
||||
@ -560,7 +568,8 @@ export function getBedrockReasoningParams(
|
||||
return {}
|
||||
}
|
||||
|
||||
const budgetTokens = getAnthropicThinkingBudget(assistant, model)
|
||||
const { maxTokens } = getAssistantSettings(assistant)
|
||||
const budgetTokens = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id)
|
||||
return {
|
||||
reasoningConfig: {
|
||||
type: 'enabled',
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import { describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import {
|
||||
findTokenLimit,
|
||||
isDoubaoSeedAfter251015,
|
||||
isDoubaoThinkingAutoModel,
|
||||
isGeminiReasoningModel,
|
||||
@ -518,3 +519,35 @@ describe('Gemini Models', () => {
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('findTokenLimit', () => {
|
||||
const cases: Array<{ modelId: string; expected: { min: number; max: number } }> = [
|
||||
{ modelId: 'gemini-2.5-flash-lite-exp', expected: { min: 512, max: 24_576 } },
|
||||
{ modelId: 'gemini-1.5-flash', expected: { min: 0, max: 24_576 } },
|
||||
{ modelId: 'gemini-1.5-pro-001', expected: { min: 128, max: 32_768 } },
|
||||
{ modelId: 'qwen3-235b-a22b-thinking-2507', expected: { min: 0, max: 81_920 } },
|
||||
{ modelId: 'qwen3-30b-a3b-thinking-2507', expected: { min: 0, max: 81_920 } },
|
||||
{ modelId: 'qwen3-vl-235b-a22b-thinking', expected: { min: 0, max: 81_920 } },
|
||||
{ modelId: 'qwen3-vl-30b-a3b-thinking', expected: { min: 0, max: 81_920 } },
|
||||
{ modelId: 'qwen-plus-2025-07-14', expected: { min: 0, max: 38_912 } },
|
||||
{ modelId: 'qwen-plus-2025-04-28', expected: { min: 0, max: 38_912 } },
|
||||
{ modelId: 'qwen3-1.7b', expected: { min: 0, max: 30_720 } },
|
||||
{ modelId: 'qwen3-0.6b', expected: { min: 0, max: 30_720 } },
|
||||
{ modelId: 'qwen-plus-ultra', expected: { min: 0, max: 81_920 } },
|
||||
{ modelId: 'qwen-turbo-pro', expected: { min: 0, max: 38_912 } },
|
||||
{ modelId: 'qwen-flash-lite', expected: { min: 0, max: 81_920 } },
|
||||
{ modelId: 'qwen3-7b', expected: { min: 1_024, max: 38_912 } },
|
||||
{ modelId: 'claude-3.7-sonnet-extended', expected: { min: 1_024, max: 64_000 } },
|
||||
{ modelId: 'claude-sonnet-4.1', expected: { min: 1_024, max: 64_000 } },
|
||||
{ modelId: 'claude-sonnet-4-5-20250929', expected: { min: 1_024, max: 64_000 } },
|
||||
{ modelId: 'claude-opus-4-1-extended', expected: { min: 1_024, max: 32_000 } }
|
||||
]
|
||||
|
||||
it.each(cases)('returns correct limits for $modelId', ({ modelId, expected }) => {
|
||||
expect(findTokenLimit(modelId)).toEqual(expected)
|
||||
})
|
||||
|
||||
it('returns undefined for unknown models', () => {
|
||||
expect(findTokenLimit('unknown-model')).toBeUndefined()
|
||||
})
|
||||
})
|
||||
|
||||
@ -14,6 +14,11 @@ vi.mock('@logger', async () => {
|
||||
}
|
||||
})
|
||||
|
||||
// Mock uuid globally for renderer tests
|
||||
vi.mock('uuid', () => ({
|
||||
v4: () => 'test-uuid-' + Date.now()
|
||||
}))
|
||||
|
||||
vi.mock('axios', () => {
|
||||
const defaultAxiosMock = {
|
||||
get: vi.fn().mockResolvedValue({ data: {} }), // Mocking axios GET request
|
||||
|
||||
Loading…
Reference in New Issue
Block a user