fix: silicon reasoning (#10932)

* refactor(aiCore): reorganize reasoning effort logic for different providers Restructure the reasoning effort calculation logic to handle different model providers more clearly. Move OpenRouter and SiliconFlow specific logic to dedicated sections and remove duplicate checks. Improve maintainability by grouping related provider logic together. * refactor(sdk): update thinking config type and property names - Replace inline thinking config type with imported ThinkingConfig type - Update property names from snake_case to camelCase for consistency - Add null checks for token limit calculations - Clarify hard-coded maximum for silicon provider in comments * refactor(openai): standardize property names to camelCase in thinking_config Update property names in thinking_config object from snake_case to camelCase for consistency with codebase conventions (cherry picked from commit 4dfb73c982)
2025-12-27 04:31:27 +08:00 · 2025-10-24 13:01:00 +08:00 · 2025-10-24 13:01:00 +08:00 · f71ce7fe3d
commit f71ce7fe3d
parent 7b10ff5010
3 changed files with 75 additions and 72 deletions
--- a/src/renderer/src/aiCore/legacy/clients/openai/OpenAIApiClient.ts
+++ b/src/renderer/src/aiCore/legacy/clients/openai/OpenAIApiClient.ts
@ -184,7 +184,7 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
            extra_body: {
              google: {
                thinking_config: {
-                  thinking_budget: 0
+                  thinkingBudget: 0
                }
              }
            }
@ -319,8 +319,8 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
          extra_body: {
            google: {
              thinking_config: {
-                thinking_budget: -1,
-                include_thoughts: true
+                thinkingBudget: -1,
+                includeThoughts: true
              }
            }
          }
@ -330,8 +330,8 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
        extra_body: {
          google: {
            thinking_config: {
-              thinking_budget: budgetTokens,
-              include_thoughts: true
+              thinkingBudget: budgetTokens,
+              includeThoughts: true
            }
          }
        }
@ -662,7 +662,7 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
            } else if (isClaudeReasoningModel(model) && reasoningEffort.thinking?.budget_tokens) {
              suffix = ` --thinking_budget ${reasoningEffort.thinking.budget_tokens}`
            } else if (isGeminiReasoningModel(model) && reasoningEffort.extra_body?.google?.thinking_config) {
-              suffix = ` --thinking_budget ${reasoningEffort.extra_body.google.thinking_config.thinking_budget}`
+              suffix = ` --thinking_budget ${reasoningEffort.extra_body.google.thinking_config.thinkingBudget}`
            }
            // FIXME: poe 不支持多个text part，上传文本文件的时候用的不是file part而是text part，因此会出问题
            // 临时解决方案是强制poe用string content，但是其实poe部分支持array
--- a/src/renderer/src/aiCore/utils/reasoning.ts
+++ b/src/renderer/src/aiCore/utils/reasoning.ts
@ -32,6 +32,7 @@ import { getAssistantSettings, getProviderByModel } from '@renderer/services/Ass
 import { SettingsState } from '@renderer/store/settings'
 import { Assistant, EFFORT_RATIO, isSystemProvider, Model, SystemProviderIds } from '@renderer/types'
 import { ReasoningEffortOptionalParams } from '@renderer/types/sdk'
+import { toInteger } from 'lodash'

 const logger = loggerService.withContext('reasoning')

@ -94,7 +95,7 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
          extra_body: {
            google: {
              thinking_config: {
-                thinking_budget: 0
+                thinkingBudget: 0
              }
            }
          }
@ -112,9 +113,54 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
  }

  // reasoningEffort有效的情况
+
+  // OpenRouter models
+  if (model.provider === SystemProviderIds.openrouter) {
+    // Grok 4 Fast doesn't support effort levels, always use enabled: true
+    if (isGrok4FastReasoningModel(model)) {
+      return {
+        reasoning: {
+          enabled: true // Ignore effort level, just enable reasoning
+        }
+      }
+    }
+
+    // Other OpenRouter models that support effort levels
+    if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
+      return {
+        reasoning: {
+          effort: reasoningEffort === 'auto' ? 'medium' : reasoningEffort
+        }
+      }
+    }
+  }
+
+  const effortRatio = EFFORT_RATIO[reasoningEffort]
+  const tokenLimit = findTokenLimit(model.id)
+  let budgetTokens: number | undefined
+  if (tokenLimit) {
+    budgetTokens = Math.floor((tokenLimit.max - tokenLimit.min) * effortRatio + tokenLimit.min)
+  }
+
+  // See https://docs.siliconflow.cn/cn/api-reference/chat-completions/chat-completions
+  if (model.provider === SystemProviderIds.silicon) {
+    if (
+      isDeepSeekHybridInferenceModel(model) ||
+      isSupportedThinkingTokenZhipuModel(model) ||
+      isSupportedThinkingTokenQwenModel(model) ||
+      isSupportedThinkingTokenHunyuanModel(model)
+    ) {
+      return {
+        enable_thinking: true,
+        // Hard-encoded maximum, only for silicon
+        thinking_budget: budgetTokens ? toInteger(Math.max(budgetTokens, 32768)) : undefined
+      }
+    }
+    return {}
+  }
+
  // DeepSeek hybrid inference models, v3.1 and maybe more in the future
  // 不同的 provider 有不同的思考控制方式，在这里统一解决
-
  if (isDeepSeekHybridInferenceModel(model)) {
    if (isSystemProvider(provider)) {
      switch (provider.id) {
@ -123,10 +169,6 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
            enable_thinking: true,
            incremental_output: true
          }
-        case SystemProviderIds.silicon:
-          return {
-            enable_thinking: true
-          }
        case SystemProviderIds.hunyuan:
        case SystemProviderIds['tencent-cloud-ti']:
        case SystemProviderIds.doubao:
@ -151,53 +193,12 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
          logger.warn(
            `Skipping thinking options for provider ${provider.name} as DeepSeek v3.1 thinking control method is unknown`
          )
+        case SystemProviderIds.silicon:
+        // specially handled before
      }
    }
  }

-  // OpenRouter models
-  if (model.provider === SystemProviderIds.openrouter) {
-    // Grok 4 Fast doesn't support effort levels, always use enabled: true
-    if (isGrok4FastReasoningModel(model)) {
-      return {
-        reasoning: {
-          enabled: true // Ignore effort level, just enable reasoning
-        }
-      }
-    }
-
-    // Other OpenRouter models that support effort levels
-    if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
-      return {
-        reasoning: {
-          effort: reasoningEffort === 'auto' ? 'medium' : reasoningEffort
-        }
-      }
-    }
-  }
-
-  // Doubao 思考模式支持
-  if (isSupportedThinkingTokenDoubaoModel(model)) {
-    if (isDoubaoSeedAfter251015(model)) {
-      return { reasoningEffort }
-    }
-    // Comment below this line seems weird. reasoning is high instead of null/undefined. Who wrote this?
-    // reasoningEffort 为空，默认开启 enabled
-    if (reasoningEffort === 'high') {
-      return { thinking: { type: 'enabled' } }
-    }
-    if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) {
-      return { thinking: { type: 'auto' } }
-    }
-    // 其他情况不带 thinking 字段
-    return {}
-  }
-
-  const effortRatio = EFFORT_RATIO[reasoningEffort]
-  const budgetTokens = Math.floor(
-    (findTokenLimit(model.id)?.max! - findTokenLimit(model.id)?.min!) * effortRatio + findTokenLimit(model.id)?.min!
-  )
-
  // OpenRouter models, use thinking
  if (model.provider === SystemProviderIds.openrouter) {
    if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
@ -255,8 +256,8 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
        extra_body: {
          google: {
            thinking_config: {
-              thinking_budget: -1,
-              include_thoughts: true
+              thinkingBudget: -1,
+              includeThoughts: true
            }
          }
        }
@ -266,8 +267,8 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
      extra_body: {
        google: {
          thinking_config: {
-            thinking_budget: budgetTokens,
-            include_thoughts: true
+            thinkingBudget: budgetTokens,
+            includeThoughts: true
          }
        }
      }
@ -280,22 +281,26 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
    return {
      thinking: {
        type: 'enabled',
-        budget_tokens: Math.floor(
-          Math.max(1024, Math.min(budgetTokens, (maxTokens || DEFAULT_MAX_TOKENS) * effortRatio))
-        )
+        budget_tokens: budgetTokens
+          ? Math.floor(Math.max(1024, Math.min(budgetTokens, (maxTokens || DEFAULT_MAX_TOKENS) * effortRatio)))
+          : undefined
      }
    }
  }

  // Use thinking, doubao, zhipu, etc.
  if (isSupportedThinkingTokenDoubaoModel(model)) {
-    if (assistant.settings?.reasoning_effort === 'high') {
-      return {
-        thinking: {
-          type: 'enabled'
-        }
-      }
+    if (isDoubaoSeedAfter251015(model)) {
+      return { reasoningEffort }
    }
+    if (reasoningEffort === 'high') {
+      return { thinking: { type: 'enabled' } }
+    }
+    if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) {
+      return { thinking: { type: 'auto' } }
+    }
+    // 其他情况不带 thinking 字段
+    return {}
  }
  if (isSupportedThinkingTokenZhipuModel(model)) {
    return { thinking: { type: 'enabled' } }
--- a/src/renderer/src/types/sdk.ts
+++ b/src/renderer/src/types/sdk.ts
@ -19,6 +19,7 @@ import {
  GoogleGenAI,
  Model as GeminiModel,
  SendMessageParameters,
+  ThinkingConfig,
  Tool
 } from '@google/genai'
 import OpenAI, { AzureOpenAI } from 'openai'
@ -90,10 +91,7 @@ export type ReasoningEffortOptionalParams = {
  }
  extra_body?: {
    google?: {
-      thinking_config: {
-        thinking_budget: number
-        include_thoughts?: boolean
-      }
+      thinking_config: ThinkingConfig
    }
  }
  // Add any other potential reasoning-related keys here if they exist