From f71ce7fe3da596943d29419e0a2ca2d0631c1de5 Mon Sep 17 00:00:00 2001 From: Phantom Date: Fri, 24 Oct 2025 13:01:00 +0800 Subject: [PATCH] fix: silicon reasoning (#10932) * refactor(aiCore): reorganize reasoning effort logic for different providers Restructure the reasoning effort calculation logic to handle different model providers more clearly. Move OpenRouter and SiliconFlow specific logic to dedicated sections and remove duplicate checks. Improve maintainability by grouping related provider logic together. * refactor(sdk): update thinking config type and property names - Replace inline thinking config type with imported ThinkingConfig type - Update property names from snake_case to camelCase for consistency - Add null checks for token limit calculations - Clarify hard-coded maximum for silicon provider in comments * refactor(openai): standardize property names to camelCase in thinking_config Update property names in thinking_config object from snake_case to camelCase for consistency with codebase conventions (cherry picked from commit 4dfb73c982ccd5b575e984dc802b62a4ef339a4a) --- .../legacy/clients/openai/OpenAIApiClient.ts | 12 +- src/renderer/src/aiCore/utils/reasoning.ts | 129 +++++++++--------- src/renderer/src/types/sdk.ts | 6 +- 3 files changed, 75 insertions(+), 72 deletions(-) diff --git a/src/renderer/src/aiCore/legacy/clients/openai/OpenAIApiClient.ts b/src/renderer/src/aiCore/legacy/clients/openai/OpenAIApiClient.ts index 44d57a3d4b..8ea9494220 100644 --- a/src/renderer/src/aiCore/legacy/clients/openai/OpenAIApiClient.ts +++ b/src/renderer/src/aiCore/legacy/clients/openai/OpenAIApiClient.ts @@ -184,7 +184,7 @@ export class OpenAIAPIClient extends OpenAIBaseClient< extra_body: { google: { thinking_config: { - thinking_budget: 0 + thinkingBudget: 0 } } } @@ -319,8 +319,8 @@ export class OpenAIAPIClient extends OpenAIBaseClient< extra_body: { google: { thinking_config: { - thinking_budget: -1, - include_thoughts: true + thinkingBudget: -1, + includeThoughts: true } } } @@ -330,8 +330,8 @@ export class OpenAIAPIClient extends OpenAIBaseClient< extra_body: { google: { thinking_config: { - thinking_budget: budgetTokens, - include_thoughts: true + thinkingBudget: budgetTokens, + includeThoughts: true } } } @@ -662,7 +662,7 @@ export class OpenAIAPIClient extends OpenAIBaseClient< } else if (isClaudeReasoningModel(model) && reasoningEffort.thinking?.budget_tokens) { suffix = ` --thinking_budget ${reasoningEffort.thinking.budget_tokens}` } else if (isGeminiReasoningModel(model) && reasoningEffort.extra_body?.google?.thinking_config) { - suffix = ` --thinking_budget ${reasoningEffort.extra_body.google.thinking_config.thinking_budget}` + suffix = ` --thinking_budget ${reasoningEffort.extra_body.google.thinking_config.thinkingBudget}` } // FIXME: poe 不支持多个text part,上传文本文件的时候用的不是file part而是text part,因此会出问题 // 临时解决方案是强制poe用string content,但是其实poe部分支持array diff --git a/src/renderer/src/aiCore/utils/reasoning.ts b/src/renderer/src/aiCore/utils/reasoning.ts index 26093bcb34..2c98d86578 100644 --- a/src/renderer/src/aiCore/utils/reasoning.ts +++ b/src/renderer/src/aiCore/utils/reasoning.ts @@ -32,6 +32,7 @@ import { getAssistantSettings, getProviderByModel } from '@renderer/services/Ass import { SettingsState } from '@renderer/store/settings' import { Assistant, EFFORT_RATIO, isSystemProvider, Model, SystemProviderIds } from '@renderer/types' import { ReasoningEffortOptionalParams } from '@renderer/types/sdk' +import { toInteger } from 'lodash' const logger = loggerService.withContext('reasoning') @@ -94,7 +95,7 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin extra_body: { google: { thinking_config: { - thinking_budget: 0 + thinkingBudget: 0 } } } @@ -112,9 +113,54 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin } // reasoningEffort有效的情况 + + // OpenRouter models + if (model.provider === SystemProviderIds.openrouter) { + // Grok 4 Fast doesn't support effort levels, always use enabled: true + if (isGrok4FastReasoningModel(model)) { + return { + reasoning: { + enabled: true // Ignore effort level, just enable reasoning + } + } + } + + // Other OpenRouter models that support effort levels + if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) { + return { + reasoning: { + effort: reasoningEffort === 'auto' ? 'medium' : reasoningEffort + } + } + } + } + + const effortRatio = EFFORT_RATIO[reasoningEffort] + const tokenLimit = findTokenLimit(model.id) + let budgetTokens: number | undefined + if (tokenLimit) { + budgetTokens = Math.floor((tokenLimit.max - tokenLimit.min) * effortRatio + tokenLimit.min) + } + + // See https://docs.siliconflow.cn/cn/api-reference/chat-completions/chat-completions + if (model.provider === SystemProviderIds.silicon) { + if ( + isDeepSeekHybridInferenceModel(model) || + isSupportedThinkingTokenZhipuModel(model) || + isSupportedThinkingTokenQwenModel(model) || + isSupportedThinkingTokenHunyuanModel(model) + ) { + return { + enable_thinking: true, + // Hard-encoded maximum, only for silicon + thinking_budget: budgetTokens ? toInteger(Math.max(budgetTokens, 32768)) : undefined + } + } + return {} + } + // DeepSeek hybrid inference models, v3.1 and maybe more in the future // 不同的 provider 有不同的思考控制方式,在这里统一解决 - if (isDeepSeekHybridInferenceModel(model)) { if (isSystemProvider(provider)) { switch (provider.id) { @@ -123,10 +169,6 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin enable_thinking: true, incremental_output: true } - case SystemProviderIds.silicon: - return { - enable_thinking: true - } case SystemProviderIds.hunyuan: case SystemProviderIds['tencent-cloud-ti']: case SystemProviderIds.doubao: @@ -151,53 +193,12 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin logger.warn( `Skipping thinking options for provider ${provider.name} as DeepSeek v3.1 thinking control method is unknown` ) + case SystemProviderIds.silicon: + // specially handled before } } } - // OpenRouter models - if (model.provider === SystemProviderIds.openrouter) { - // Grok 4 Fast doesn't support effort levels, always use enabled: true - if (isGrok4FastReasoningModel(model)) { - return { - reasoning: { - enabled: true // Ignore effort level, just enable reasoning - } - } - } - - // Other OpenRouter models that support effort levels - if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) { - return { - reasoning: { - effort: reasoningEffort === 'auto' ? 'medium' : reasoningEffort - } - } - } - } - - // Doubao 思考模式支持 - if (isSupportedThinkingTokenDoubaoModel(model)) { - if (isDoubaoSeedAfter251015(model)) { - return { reasoningEffort } - } - // Comment below this line seems weird. reasoning is high instead of null/undefined. Who wrote this? - // reasoningEffort 为空,默认开启 enabled - if (reasoningEffort === 'high') { - return { thinking: { type: 'enabled' } } - } - if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) { - return { thinking: { type: 'auto' } } - } - // 其他情况不带 thinking 字段 - return {} - } - - const effortRatio = EFFORT_RATIO[reasoningEffort] - const budgetTokens = Math.floor( - (findTokenLimit(model.id)?.max! - findTokenLimit(model.id)?.min!) * effortRatio + findTokenLimit(model.id)?.min! - ) - // OpenRouter models, use thinking if (model.provider === SystemProviderIds.openrouter) { if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) { @@ -255,8 +256,8 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin extra_body: { google: { thinking_config: { - thinking_budget: -1, - include_thoughts: true + thinkingBudget: -1, + includeThoughts: true } } } @@ -266,8 +267,8 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin extra_body: { google: { thinking_config: { - thinking_budget: budgetTokens, - include_thoughts: true + thinkingBudget: budgetTokens, + includeThoughts: true } } } @@ -280,22 +281,26 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin return { thinking: { type: 'enabled', - budget_tokens: Math.floor( - Math.max(1024, Math.min(budgetTokens, (maxTokens || DEFAULT_MAX_TOKENS) * effortRatio)) - ) + budget_tokens: budgetTokens + ? Math.floor(Math.max(1024, Math.min(budgetTokens, (maxTokens || DEFAULT_MAX_TOKENS) * effortRatio))) + : undefined } } } // Use thinking, doubao, zhipu, etc. if (isSupportedThinkingTokenDoubaoModel(model)) { - if (assistant.settings?.reasoning_effort === 'high') { - return { - thinking: { - type: 'enabled' - } - } + if (isDoubaoSeedAfter251015(model)) { + return { reasoningEffort } } + if (reasoningEffort === 'high') { + return { thinking: { type: 'enabled' } } + } + if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) { + return { thinking: { type: 'auto' } } + } + // 其他情况不带 thinking 字段 + return {} } if (isSupportedThinkingTokenZhipuModel(model)) { return { thinking: { type: 'enabled' } } diff --git a/src/renderer/src/types/sdk.ts b/src/renderer/src/types/sdk.ts index e18891f2bf..00fd6c5761 100644 --- a/src/renderer/src/types/sdk.ts +++ b/src/renderer/src/types/sdk.ts @@ -19,6 +19,7 @@ import { GoogleGenAI, Model as GeminiModel, SendMessageParameters, + ThinkingConfig, Tool } from '@google/genai' import OpenAI, { AzureOpenAI } from 'openai' @@ -90,10 +91,7 @@ export type ReasoningEffortOptionalParams = { } extra_body?: { google?: { - thinking_config: { - thinking_budget: number - include_thoughts?: boolean - } + thinking_config: ThinkingConfig } } // Add any other potential reasoning-related keys here if they exist