From bd4ba47e61e90183c03f5d89df32de7e2920de1c Mon Sep 17 00:00:00 2001
From: Phantom <59059173+EurFelux@users.noreply.github.com>
Date: Tue, 26 Aug 2025 17:43:29 +0800
Subject: [PATCH] feat(models): support qwen-flash & deepseek v3.1 (limited)
 (#9539)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(models): 添加 qwen-flash 模型支持并更新相关配置

* feat(ai): 添加对deepseek-v3.1模型的支持(dashscope)

添加deepseek_hybrid类型到ThinkModelTypes，并实现相关推理逻辑

* refactor(openai): 移除调试日志语句以清理代码

* feat(推理模型): 增强DeepSeek混合推理模型支持并优化逻辑顺序

重构DeepSeek混合推理模型的判断逻辑，支持更多版本格式
将Doubao思考模式处理逻辑移至更合理的位置
添加对DeepSeek模型在不同provider下的思考控制处理

* fix(deepseek): 支持openrouter的deepseek-chat-v3.1模型推理控制

统一处理不同provider的DeepSeek混合推理模型控制方式，添加对openrouter的deepseek-chat-v3.1模型支持

* fix(模型): 修正函数调用模型的判断逻辑

更新函数调用模型的判断条件，明确不支持v3.1函数调用的提供商并处理openrouter的特殊情况

* feat(openai): 为silicon模型添加enable_thinking配置

* fix(模型配置): 修正深度求索混合推理模型的函数调用支持逻辑

更新深度求索混合推理模型的函数调用判断逻辑，默认支持函数调用

* feat(模型支持): 为DeepSeek V3.1添加白名单支持

添加对DeepSeek V3.1混合推理模型的白名单支持，目前仅允许openrouter、dashscope和doubao作为提供商

* feat(config): 添加silicon到DeepSeek V3.1支持的白名单中

* feat(sdk): 添加对NVIDIA推理参数的支持

在ReasoningEffortOptionalParams类型中添加chat_template_kwargs参数，用于支持NVIDIA的推理配置。同时在模型支持列表中新增nvidia提供商，并在OpenAIApiClient中实现对应的参数处理逻辑。

* refactor(openai): 使用SystemProviderIds替换硬编码的provider id字符串
---
 .../aiCore/clients/openai/OpenAIApiClient.ts  | 92 ++++++++++++++-----
 src/renderer/src/config/models.ts             | 51 ++++++++--
 src/renderer/src/types/index.ts               |  3 +-
 src/renderer/src/types/sdk.ts                 |  4 +
 4 files changed, 121 insertions(+), 29 deletions(-)

diff --git a/src/renderer/src/aiCore/clients/openai/OpenAIApiClient.ts b/src/renderer/src/aiCore/clients/openai/OpenAIApiClient.ts
index f2bfeed026..089d1af799 100644
--- a/src/renderer/src/aiCore/clients/openai/OpenAIApiClient.ts
+++ b/src/renderer/src/aiCore/clients/openai/OpenAIApiClient.ts
@@ -6,6 +6,7 @@ import {
   getOpenAIWebSearchParams,
   getThinkModelType,
   isClaudeReasoningModel,
+  isDeepSeekHybridInferenceModel,
   isDoubaoThinkingAutoModel,
   isGeminiReasoningModel,
   isGPT5SeriesModel,
@@ -44,6 +45,7 @@ import {
   Assistant,
   EFFORT_RATIO,
   FileTypes,
+  isSystemProvider,
   MCPCallToolResponse,
   MCPTool,
   MCPToolResponse,
@@ -113,7 +115,7 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
    */
   // Method for reasoning effort, moved from OpenAIProvider
   override getReasoningEffort(assistant: Assistant, model: Model): ReasoningEffortOptionalParams {
-    if (this.provider.id === 'groq') {
+    if (this.provider.id === SystemProviderIds.groq) {
       return {}
     }
 
@@ -122,22 +124,6 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
     }
     const reasoningEffort = assistant?.settings?.reasoning_effort
 
-    // Doubao 思考模式支持
-    if (isSupportedThinkingTokenDoubaoModel(model)) {
-      // reasoningEffort 为空，默认开启 enabled
-      if (!reasoningEffort) {
-        return { thinking: { type: 'disabled' } }
-      }
-      if (reasoningEffort === 'high') {
-        return { thinking: { type: 'enabled' } }
-      }
-      if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) {
-        return { thinking: { type: 'auto' } }
-      }
-      // 其他情况不带 thinking 字段
-      return {}
-    }
-
     if (isSupportedThinkingTokenZhipuModel(model)) {
       if (!reasoningEffort) {
         return { thinking: { type: 'disabled' } }
@@ -146,7 +132,14 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
     }
 
     if (!reasoningEffort) {
-      if (model.provider === 'openrouter') {
+      // DeepSeek hybrid inference models, v3.1 and maybe more in the future
+      // 不同的 provider 有不同的思考控制方式，在这里统一解决
+      // if (isDeepSeekHybridInferenceModel(model)) {
+      //   // do nothing for now. default to non-think.
+      // }
+
+      // openrouter: use reasoning
+      if (model.provider === SystemProviderIds.openrouter) {
         // Don't disable reasoning for Gemini models that support thinking tokens
         if (isSupportedThinkingTokenGeminiModel(model) && !GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
           return {}
@@ -158,17 +151,22 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
         return { reasoning: { enabled: false, exclude: true } }
       }
 
+      // providers that use enable_thinking
       if (
         isSupportEnableThinkingProvider(this.provider) &&
-        (isSupportedThinkingTokenQwenModel(model) || isSupportedThinkingTokenHunyuanModel(model))
+        (isSupportedThinkingTokenQwenModel(model) ||
+          isSupportedThinkingTokenHunyuanModel(model) ||
+          (this.provider.id === SystemProviderIds.dashscope && isDeepSeekHybridInferenceModel(model)))
       ) {
         return { enable_thinking: false }
       }
 
+      // claude
       if (isSupportedThinkingTokenClaudeModel(model)) {
         return {}
       }
 
+      // gemini
       if (isSupportedThinkingTokenGeminiModel(model)) {
         if (GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
           return {
@@ -197,8 +195,48 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
       (findTokenLimit(model.id)?.max! - findTokenLimit(model.id)?.min!) * effortRatio + findTokenLimit(model.id)?.min!
     )
 
+    // DeepSeek hybrid inference models, v3.1 and maybe more in the future
+    // 不同的 provider 有不同的思考控制方式，在这里统一解决
+    if (isDeepSeekHybridInferenceModel(model)) {
+      if (isSystemProvider(this.provider)) {
+        switch (this.provider.id) {
+          case SystemProviderIds.dashscope:
+            return {
+              enable_thinking: true,
+              incremental_output: true
+            }
+          case SystemProviderIds.silicon:
+            return {
+              enable_thinking: true
+            }
+          case SystemProviderIds.doubao:
+            return {
+              thinking: {
+                type: 'enabled' // auto is invalid
+              }
+            }
+          case SystemProviderIds.openrouter:
+            return {
+              reasoning: {
+                enabled: true
+              }
+            }
+          case 'nvidia':
+            return {
+              chat_template_kwargs: {
+                thinking: true
+              }
+            }
+          default:
+            logger.warn(
+              `Skipping thinking options for provider ${this.provider.name} as DeepSeek v3.1 thinking control method is unknown`
+            )
+        }
+      }
+    }
+
     // OpenRouter models
-    if (model.provider === 'openrouter') {
+    if (model.provider === SystemProviderIds.openrouter) {
       if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
         return {
           reasoning: {
@@ -208,6 +246,18 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
       }
     }
 
+    // Doubao 思考模式支持
+    if (isSupportedThinkingTokenDoubaoModel(model)) {
+      if (reasoningEffort === 'high') {
+        return { thinking: { type: 'enabled' } }
+      }
+      if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) {
+        return { thinking: { type: 'auto' } }
+      }
+      // 其他情况不带 thinking 字段
+      return {}
+    }
+
     // Qwen models
     if (isQwenReasoningModel(model)) {
       const thinkConfig = {
@@ -215,7 +265,7 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
           isQwenAlwaysThinkModel(model) || !isSupportEnableThinkingProvider(this.provider) ? undefined : true,
         thinking_budget: budgetTokens
       }
-      if (this.provider.id === 'dashscope') {
+      if (this.provider.id === SystemProviderIds.dashscope) {
         return {
           ...thinkConfig,
           incremental_output: true
diff --git a/src/renderer/src/config/models.ts b/src/renderer/src/config/models.ts
index 5425c918db..fb2ee8dd67 100644
--- a/src/renderer/src/config/models.ts
+++ b/src/renderer/src/config/models.ts
@@ -150,6 +150,7 @@ import YoudaoLogo from '@renderer/assets/images/providers/netease-youdao.svg'
 import NomicLogo from '@renderer/assets/images/providers/nomic.png'
 import { getProviderByModel } from '@renderer/services/AssistantService'
 import {
+  isSystemProviderId,
   Model,
   ReasoningEffortConfig,
   SystemProviderId,
@@ -290,6 +291,7 @@ export const CLAUDE_SUPPORTED_WEBSEARCH_REGEX = new RegExp(
 )
 
 // 模型类型到支持的reasoning_effort的映射表
+// TODO: refactor this. too many identical options
 export const MODEL_SUPPORTED_REASONING_EFFORT: ReasoningEffortConfig = {
   default: ['low', 'medium', 'high'] as const,
   o: ['low', 'medium', 'high'] as const,
@@ -303,7 +305,8 @@ export const MODEL_SUPPORTED_REASONING_EFFORT: ReasoningEffortConfig = {
   doubao_no_auto: ['high'] as const,
   hunyuan: ['auto'] as const,
   zhipu: ['auto'] as const,
-  perplexity: ['low', 'medium', 'high'] as const
+  perplexity: ['low', 'medium', 'high'] as const,
+  deepseek_hybrid: ['auto'] as const
 } as const
 
 // 模型类型到支持选项的映射表
@@ -320,7 +323,8 @@ export const MODEL_SUPPORTED_OPTIONS: ThinkingOptionConfig = {
   doubao_no_auto: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.doubao_no_auto] as const,
   hunyuan: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.hunyuan] as const,
   zhipu: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.zhipu] as const,
-  perplexity: MODEL_SUPPORTED_REASONING_EFFORT.perplexity
+  perplexity: MODEL_SUPPORTED_REASONING_EFFORT.perplexity,
+  deepseek_hybrid: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.deepseek_hybrid] as const
 } as const
 
 export const getThinkModelType = (model: Model): ThinkingModelType => {
@@ -350,6 +354,7 @@ export const getThinkModelType = (model: Model): ThinkingModelType => {
   } else if (isSupportedThinkingTokenHunyuanModel(model)) thinkingModelType = 'hunyuan'
   else if (isSupportedReasoningEffortPerplexityModel(model)) thinkingModelType = 'perplexity'
   else if (isSupportedThinkingTokenZhipuModel(model)) thinkingModelType = 'zhipu'
+  else if (isDeepSeekHybridInferenceModel(model)) thinkingModelType = 'deepseek_hybrid'
   return thinkingModelType
 }
 
@@ -372,11 +377,21 @@ export function isFunctionCallingModel(model?: Model): boolean {
     return FUNCTION_CALLING_REGEX.test(modelId) || FUNCTION_CALLING_REGEX.test(model.name)
   }
 
-  if (['deepseek', 'anthropic'].includes(model.provider)) {
+  if (['deepseek', 'anthropic', 'kimi', 'moonshot'].includes(model.provider)) {
     return true
   }
 
-  if (['kimi', 'moonshot'].includes(model.provider)) {
+  // 2025/08/26 百炼与火山引擎均不支持 v3.1 函数调用
+  // 先默认支持
+  if (isDeepSeekHybridInferenceModel(model)) {
+    if (isSystemProviderId(model.provider)) {
+      switch (model.provider) {
+        case 'dashscope':
+        case 'doubao':
+          // case 'nvidia': // nvidia api 太烂了 测不了能不能用 先假设能用
+          return false
+      }
+    }
     return true
   }
 
@@ -1401,7 +1416,7 @@ export const SYSTEM_MODELS: Record<SystemProviderId | 'defaultModel', Model[]> =
   dashscope: [
     { id: 'qwen-vl-plus', name: 'qwen-vl-plus', provider: 'dashscope', group: 'qwen-vl', owned_by: 'system' },
     { id: 'qwen-coder-plus', name: 'qwen-coder-plus', provider: 'dashscope', group: 'qwen-coder', owned_by: 'system' },
-    { id: 'qwen-turbo', name: 'qwen-turbo', provider: 'dashscope', group: 'qwen-turbo', owned_by: 'system' },
+    { id: 'qwen-flash', name: 'qwen-flash', provider: 'dashscope', group: 'qwen-flash', owned_by: 'system' },
     { id: 'qwen-plus', name: 'qwen-plus', provider: 'dashscope', group: 'qwen-plus', owned_by: 'system' },
     { id: 'qwen-max', name: 'qwen-max', provider: 'dashscope', group: 'qwen-max', owned_by: 'system' }
   ],
@@ -2627,6 +2642,13 @@ export function isSupportedThinkingTokenModel(model?: Model): boolean {
     return false
   }
 
+  // Specifically for DeepSeek V3.1. White list for now
+  if (isDeepSeekHybridInferenceModel(model)) {
+    return (['openrouter', 'dashscope', 'doubao', 'silicon', 'nvidia'] satisfies SystemProviderId[]).some(
+      (id) => id === model.provider
+    )
+  }
+
   return (
     isSupportedThinkingTokenGeminiModel(model) ||
     isSupportedThinkingTokenQwenModel(model) ||
@@ -2764,7 +2786,9 @@ export function isSupportedThinkingTokenQwenModel(model?: Model): boolean {
     'qwen-turbo-0428',
     'qwen-turbo-2025-04-28',
     'qwen-turbo-0715',
-    'qwen-turbo-2025-07-15'
+    'qwen-turbo-2025-07-15',
+    'qwen-flash',
+    'qwen-flash-2025-07-28'
   ].includes(modelId)
 }
 
@@ -2838,6 +2862,15 @@ export const isSupportedThinkingTokenZhipuModel = (model: Model): boolean => {
   return modelId.includes('glm-4.5')
 }
 
+export const isDeepSeekHybridInferenceModel = (model: Model) => {
+  const modelId = getLowerBaseModelName(model.id)
+  // deepseek官方使用chat和reasoner做推理控制，其他provider需要单独判断，id可能会有所差别
+  // openrouter: deepseek/deepseek-chat-v3.1 不知道会不会有其他provider仿照ds官方分出一个同id的作为非思考模式的模型，这里有风险
+  return /deepseek-v3(?:\.1|-1-\d+)?/.test(modelId) || modelId === 'deepseek-chat-v3.1'
+}
+
+export const isSupportedThinkingTokenDeepSeekModel = isDeepSeekHybridInferenceModel
+
 export const isZhipuReasoningModel = (model?: Model): boolean => {
   if (!model) {
     return false
@@ -2870,6 +2903,8 @@ export function isReasoningModel(model?: Model): boolean {
       REASONING_REGEX.test(modelId) ||
       REASONING_REGEX.test(model.name) ||
       isSupportedThinkingTokenDoubaoModel(model) ||
+      isDeepSeekHybridInferenceModel(model) ||
+      isDeepSeekHybridInferenceModel({ ...model, id: model.name }) ||
       false
     )
   }
@@ -2884,6 +2919,7 @@ export function isReasoningModel(model?: Model): boolean {
     isPerplexityReasoningModel(model) ||
     isZhipuReasoningModel(model) ||
     isStepReasoningModel(model) ||
+    isDeepSeekHybridInferenceModel(model) ||
     modelId.includes('magistral') ||
     modelId.includes('minimax-m1') ||
     modelId.includes('pangu-pro-moe')
@@ -2992,7 +3028,7 @@ export function isWebSearchModel(model: Model): boolean {
   }
 
   if (provider.id === 'dashscope') {
-    const models = ['qwen-turbo', 'qwen-max', 'qwen-plus', 'qwq']
+    const models = ['qwen-turbo', 'qwen-max', 'qwen-plus', 'qwq', 'qwen-flash']
     // matches id like qwen-max-0919, qwen-max-latest
     return models.some((i) => modelId.startsWith(i))
   }
@@ -3196,6 +3232,7 @@ export const THINKING_TOKEN_MAP: Record<string, { min: number; max: number }> =
   'qwen3-0\\.6b$': { min: 0, max: 30_720 },
   'qwen-plus.*$': { min: 0, max: 38_912 },
   'qwen-turbo.*$': { min: 0, max: 38_912 },
+  'qwen-flash.*$': { min: 0, max: 81_920 },
   'qwen3-.*$': { min: 1024, max: 38_912 },
 
   // Claude models
diff --git a/src/renderer/src/types/index.ts b/src/renderer/src/types/index.ts
index ee35d7202f..76fbf9a687 100644
--- a/src/renderer/src/types/index.ts
+++ b/src/renderer/src/types/index.ts
@@ -67,7 +67,8 @@ const ThinkModelTypes = [
   'doubao_no_auto',
   'hunyuan',
   'zhipu',
-  'perplexity'
+  'perplexity',
+  'deepseek_hybrid'
 ] as const
 
 export type ReasoningEffortOption = NonNullable<OpenAI.ReasoningEffort> | 'auto'
diff --git a/src/renderer/src/types/sdk.ts b/src/renderer/src/types/sdk.ts
index e897098963..4c7fa299d4 100644
--- a/src/renderer/src/types/sdk.ts
+++ b/src/renderer/src/types/sdk.ts
@@ -81,6 +81,10 @@ export type ReasoningEffortOptionalParams = {
   thinking_budget?: number
   incremental_output?: boolean
   enable_reasoning?: boolean
+  // nvidia
+  chat_template_kwargs?: {
+    thinking: boolean
+  }
   extra_body?: {
     google?: {
       thinking_config: {