diff --git a/src/renderer/src/aiCore/legacy/clients/openai/OpenAIApiClient.ts b/src/renderer/src/aiCore/legacy/clients/openai/OpenAIApiClient.ts index 44d57a3d4b..8ea9494220 100644 --- a/src/renderer/src/aiCore/legacy/clients/openai/OpenAIApiClient.ts +++ b/src/renderer/src/aiCore/legacy/clients/openai/OpenAIApiClient.ts @@ -184,7 +184,7 @@ export class OpenAIAPIClient extends OpenAIBaseClient< extra_body: { google: { thinking_config: { - thinking_budget: 0 + thinkingBudget: 0 } } } @@ -319,8 +319,8 @@ export class OpenAIAPIClient extends OpenAIBaseClient< extra_body: { google: { thinking_config: { - thinking_budget: -1, - include_thoughts: true + thinkingBudget: -1, + includeThoughts: true } } } @@ -330,8 +330,8 @@ export class OpenAIAPIClient extends OpenAIBaseClient< extra_body: { google: { thinking_config: { - thinking_budget: budgetTokens, - include_thoughts: true + thinkingBudget: budgetTokens, + includeThoughts: true } } } @@ -662,7 +662,7 @@ export class OpenAIAPIClient extends OpenAIBaseClient< } else if (isClaudeReasoningModel(model) && reasoningEffort.thinking?.budget_tokens) { suffix = ` --thinking_budget ${reasoningEffort.thinking.budget_tokens}` } else if (isGeminiReasoningModel(model) && reasoningEffort.extra_body?.google?.thinking_config) { - suffix = ` --thinking_budget ${reasoningEffort.extra_body.google.thinking_config.thinking_budget}` + suffix = ` --thinking_budget ${reasoningEffort.extra_body.google.thinking_config.thinkingBudget}` } // FIXME: poe 不支持多个text part,上传文本文件的时候用的不是file part而是text part,因此会出问题 // 临时解决方案是强制poe用string content,但是其实poe部分支持array diff --git a/src/renderer/src/aiCore/utils/reasoning.ts b/src/renderer/src/aiCore/utils/reasoning.ts index 26093bcb34..2c98d86578 100644 --- a/src/renderer/src/aiCore/utils/reasoning.ts +++ b/src/renderer/src/aiCore/utils/reasoning.ts @@ -32,6 +32,7 @@ import { getAssistantSettings, getProviderByModel } from '@renderer/services/Ass import { SettingsState } from '@renderer/store/settings' import { Assistant, EFFORT_RATIO, isSystemProvider, Model, SystemProviderIds } from '@renderer/types' import { ReasoningEffortOptionalParams } from '@renderer/types/sdk' +import { toInteger } from 'lodash' const logger = loggerService.withContext('reasoning') @@ -94,7 +95,7 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin extra_body: { google: { thinking_config: { - thinking_budget: 0 + thinkingBudget: 0 } } } @@ -112,9 +113,54 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin } // reasoningEffort有效的情况 + + // OpenRouter models + if (model.provider === SystemProviderIds.openrouter) { + // Grok 4 Fast doesn't support effort levels, always use enabled: true + if (isGrok4FastReasoningModel(model)) { + return { + reasoning: { + enabled: true // Ignore effort level, just enable reasoning + } + } + } + + // Other OpenRouter models that support effort levels + if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) { + return { + reasoning: { + effort: reasoningEffort === 'auto' ? 'medium' : reasoningEffort + } + } + } + } + + const effortRatio = EFFORT_RATIO[reasoningEffort] + const tokenLimit = findTokenLimit(model.id) + let budgetTokens: number | undefined + if (tokenLimit) { + budgetTokens = Math.floor((tokenLimit.max - tokenLimit.min) * effortRatio + tokenLimit.min) + } + + // See https://docs.siliconflow.cn/cn/api-reference/chat-completions/chat-completions + if (model.provider === SystemProviderIds.silicon) { + if ( + isDeepSeekHybridInferenceModel(model) || + isSupportedThinkingTokenZhipuModel(model) || + isSupportedThinkingTokenQwenModel(model) || + isSupportedThinkingTokenHunyuanModel(model) + ) { + return { + enable_thinking: true, + // Hard-encoded maximum, only for silicon + thinking_budget: budgetTokens ? toInteger(Math.max(budgetTokens, 32768)) : undefined + } + } + return {} + } + // DeepSeek hybrid inference models, v3.1 and maybe more in the future // 不同的 provider 有不同的思考控制方式,在这里统一解决 - if (isDeepSeekHybridInferenceModel(model)) { if (isSystemProvider(provider)) { switch (provider.id) { @@ -123,10 +169,6 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin enable_thinking: true, incremental_output: true } - case SystemProviderIds.silicon: - return { - enable_thinking: true - } case SystemProviderIds.hunyuan: case SystemProviderIds['tencent-cloud-ti']: case SystemProviderIds.doubao: @@ -151,53 +193,12 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin logger.warn( `Skipping thinking options for provider ${provider.name} as DeepSeek v3.1 thinking control method is unknown` ) + case SystemProviderIds.silicon: + // specially handled before } } } - // OpenRouter models - if (model.provider === SystemProviderIds.openrouter) { - // Grok 4 Fast doesn't support effort levels, always use enabled: true - if (isGrok4FastReasoningModel(model)) { - return { - reasoning: { - enabled: true // Ignore effort level, just enable reasoning - } - } - } - - // Other OpenRouter models that support effort levels - if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) { - return { - reasoning: { - effort: reasoningEffort === 'auto' ? 'medium' : reasoningEffort - } - } - } - } - - // Doubao 思考模式支持 - if (isSupportedThinkingTokenDoubaoModel(model)) { - if (isDoubaoSeedAfter251015(model)) { - return { reasoningEffort } - } - // Comment below this line seems weird. reasoning is high instead of null/undefined. Who wrote this? - // reasoningEffort 为空,默认开启 enabled - if (reasoningEffort === 'high') { - return { thinking: { type: 'enabled' } } - } - if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) { - return { thinking: { type: 'auto' } } - } - // 其他情况不带 thinking 字段 - return {} - } - - const effortRatio = EFFORT_RATIO[reasoningEffort] - const budgetTokens = Math.floor( - (findTokenLimit(model.id)?.max! - findTokenLimit(model.id)?.min!) * effortRatio + findTokenLimit(model.id)?.min! - ) - // OpenRouter models, use thinking if (model.provider === SystemProviderIds.openrouter) { if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) { @@ -255,8 +256,8 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin extra_body: { google: { thinking_config: { - thinking_budget: -1, - include_thoughts: true + thinkingBudget: -1, + includeThoughts: true } } } @@ -266,8 +267,8 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin extra_body: { google: { thinking_config: { - thinking_budget: budgetTokens, - include_thoughts: true + thinkingBudget: budgetTokens, + includeThoughts: true } } } @@ -280,22 +281,26 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin return { thinking: { type: 'enabled', - budget_tokens: Math.floor( - Math.max(1024, Math.min(budgetTokens, (maxTokens || DEFAULT_MAX_TOKENS) * effortRatio)) - ) + budget_tokens: budgetTokens + ? Math.floor(Math.max(1024, Math.min(budgetTokens, (maxTokens || DEFAULT_MAX_TOKENS) * effortRatio))) + : undefined } } } // Use thinking, doubao, zhipu, etc. if (isSupportedThinkingTokenDoubaoModel(model)) { - if (assistant.settings?.reasoning_effort === 'high') { - return { - thinking: { - type: 'enabled' - } - } + if (isDoubaoSeedAfter251015(model)) { + return { reasoningEffort } } + if (reasoningEffort === 'high') { + return { thinking: { type: 'enabled' } } + } + if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) { + return { thinking: { type: 'auto' } } + } + // 其他情况不带 thinking 字段 + return {} } if (isSupportedThinkingTokenZhipuModel(model)) { return { thinking: { type: 'enabled' } } diff --git a/src/renderer/src/types/sdk.ts b/src/renderer/src/types/sdk.ts index e18891f2bf..00fd6c5761 100644 --- a/src/renderer/src/types/sdk.ts +++ b/src/renderer/src/types/sdk.ts @@ -19,6 +19,7 @@ import { GoogleGenAI, Model as GeminiModel, SendMessageParameters, + ThinkingConfig, Tool } from '@google/genai' import OpenAI, { AzureOpenAI } from 'openai' @@ -90,10 +91,7 @@ export type ReasoningEffortOptionalParams = { } extra_body?: { google?: { - thinking_config: { - thinking_budget: number - include_thoughts?: boolean - } + thinking_config: ThinkingConfig } } // Add any other potential reasoning-related keys here if they exist