diff --git a/src/renderer/src/aiCore/utils/__tests__/reasoning.test.ts b/src/renderer/src/aiCore/utils/__tests__/reasoning.test.ts index e5561f6fc..df7d69d0c 100644 --- a/src/renderer/src/aiCore/utils/__tests__/reasoning.test.ts +++ b/src/renderer/src/aiCore/utils/__tests__/reasoning.test.ts @@ -11,6 +11,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' import { getAnthropicReasoningParams, + getAnthropicThinkingBudget, getBedrockReasoningParams, getCustomParameters, getGeminiReasoningParams, @@ -89,7 +90,8 @@ vi.mock('@renderer/config/models', async (importOriginal) => { isQwenAlwaysThinkModel: vi.fn(() => false), isSupportedThinkingTokenHunyuanModel: vi.fn(() => false), isSupportedThinkingTokenModel: vi.fn(() => false), - isGPT51SeriesModel: vi.fn(() => false) + isGPT51SeriesModel: vi.fn(() => false), + findTokenLimit: vi.fn(actual.findTokenLimit) } }) @@ -649,7 +651,7 @@ describe('reasoning utils', () => { expect(result).toEqual({ thinking: { type: 'enabled', - budgetTokens: 2048 + budgetTokens: 4096 } }) }) @@ -729,7 +731,7 @@ describe('reasoning utils', () => { const result = getGeminiReasoningParams(assistant, model) expect(result).toEqual({ thinkingConfig: { - thinkingBudget: 16448, + thinkingBudget: expect.any(Number), includeThoughts: true } }) @@ -893,7 +895,7 @@ describe('reasoning utils', () => { expect(result).toEqual({ reasoningConfig: { type: 'enabled', - budgetTokens: 2048 + budgetTokens: 4096 } }) }) @@ -994,4 +996,89 @@ describe('reasoning utils', () => { }) }) }) + + describe('getAnthropicThinkingBudget', () => { + it('should return undefined when reasoningEffort is undefined', async () => { + const result = getAnthropicThinkingBudget(4096, undefined, 'claude-3-7-sonnet') + expect(result).toBeUndefined() + }) + + it('should return undefined when reasoningEffort is none', async () => { + const result = getAnthropicThinkingBudget(4096, 'none', 'claude-3-7-sonnet') + expect(result).toBeUndefined() + }) + + it('should return undefined when tokenLimit is not found', async () => { + const { findTokenLimit } = await import('@renderer/config/models') + vi.mocked(findTokenLimit).mockReturnValue(undefined) + + const result = getAnthropicThinkingBudget(4096, 'medium', 'unknown-model') + expect(result).toBeUndefined() + }) + + it('should calculate budget correctly when maxTokens is provided', async () => { + const { findTokenLimit } = await import('@renderer/config/models') + vi.mocked(findTokenLimit).mockReturnValue({ min: 1024, max: 32768 }) + + const result = getAnthropicThinkingBudget(4096, 'medium', 'claude-3-7-sonnet') + // EFFORT_RATIO['medium'] = 0.5 + // budget = Math.floor((32768 - 1024) * 0.5 + 1024) + // = Math.floor(31744 * 0.5 + 1024) = Math.floor(15872 + 1024) = 16896 + // budgetTokens = Math.min(16896, 4096) = 4096 + // result = Math.max(1024, 4096) = 4096 + expect(result).toBe(4096) + }) + + it('should use tokenLimit.max when maxTokens is undefined', async () => { + const { findTokenLimit } = await import('@renderer/config/models') + vi.mocked(findTokenLimit).mockReturnValue({ min: 1024, max: 32768 }) + + const result = getAnthropicThinkingBudget(undefined, 'medium', 'claude-3-7-sonnet') + // When maxTokens is undefined, budget is not constrained by maxTokens + // EFFORT_RATIO['medium'] = 0.5 + // budget = Math.floor((32768 - 1024) * 0.5 + 1024) + // = Math.floor(31744 * 0.5 + 1024) = Math.floor(15872 + 1024) = 16896 + // result = Math.max(1024, 16896) = 16896 + expect(result).toBe(16896) + }) + + it('should enforce minimum budget of 1024', async () => { + const { findTokenLimit } = await import('@renderer/config/models') + vi.mocked(findTokenLimit).mockReturnValue({ min: 100, max: 1000 }) + + const result = getAnthropicThinkingBudget(500, 'low', 'claude-3-7-sonnet') + // EFFORT_RATIO['low'] = 0.05 + // budget = Math.floor((1000 - 100) * 0.05 + 100) + // = Math.floor(900 * 0.05 + 100) = Math.floor(45 + 100) = 145 + // budgetTokens = Math.min(145, 500) = 145 + // result = Math.max(1024, 145) = 1024 + expect(result).toBe(1024) + }) + + it('should respect effort ratio for high reasoning effort', async () => { + const { findTokenLimit } = await import('@renderer/config/models') + vi.mocked(findTokenLimit).mockReturnValue({ min: 1024, max: 32768 }) + + const result = getAnthropicThinkingBudget(8192, 'high', 'claude-3-7-sonnet') + // EFFORT_RATIO['high'] = 0.8 + // budget = Math.floor((32768 - 1024) * 0.8 + 1024) + // = Math.floor(31744 * 0.8 + 1024) = Math.floor(25395.2 + 1024) = 26419 + // budgetTokens = Math.min(26419, 8192) = 8192 + // result = Math.max(1024, 8192) = 8192 + expect(result).toBe(8192) + }) + + it('should use full token limit when maxTokens is undefined and reasoning effort is high', async () => { + const { findTokenLimit } = await import('@renderer/config/models') + vi.mocked(findTokenLimit).mockReturnValue({ min: 1024, max: 32768 }) + + const result = getAnthropicThinkingBudget(undefined, 'high', 'claude-3-7-sonnet') + // When maxTokens is undefined, budget is not constrained by maxTokens + // EFFORT_RATIO['high'] = 0.8 + // budget = Math.floor((32768 - 1024) * 0.8 + 1024) + // = Math.floor(31744 * 0.8 + 1024) = Math.floor(25395.2 + 1024) = 26419 + // result = Math.max(1024, 26419) = 26419 + expect(result).toBe(26419) + }) + }) }) diff --git a/src/renderer/src/aiCore/utils/reasoning.ts b/src/renderer/src/aiCore/utils/reasoning.ts index ca1d03966..9320fb8e5 100644 --- a/src/renderer/src/aiCore/utils/reasoning.ts +++ b/src/renderer/src/aiCore/utils/reasoning.ts @@ -480,16 +480,14 @@ export function getAnthropicThinkingBudget( return undefined } - const budgetTokens = Math.max( - 1024, - Math.floor( - Math.min( - (tokenLimit.max - tokenLimit.min) * effortRatio + tokenLimit.min, - (maxTokens || DEFAULT_MAX_TOKENS) * effortRatio - ) - ) - ) - return budgetTokens + const budget = Math.floor((tokenLimit.max - tokenLimit.min) * effortRatio + tokenLimit.min) + + let budgetTokens = budget + if (maxTokens !== undefined) { + budgetTokens = Math.min(budget, maxTokens) + } + + return Math.max(1024, budgetTokens) } /**