feat(reasoning): add special handling for Grok 4 fast models & qwen3-omni/qwen3-vl (#10367)

* feat(reasoning): add special handling for Grok 4 fast models * feat(models): add grok4_fast model and refine grok reasoning * feat(reasoning): unify Grok reasoning handling and XAI params * feat(models): Grok/qwen handling and XAI * feat(models): recognize qwen3-vl thinking models and add sizes * fix(reasoning): reasoning enabled for QwenAlwaysThink models * feat(reasoning): enable reasoning for Grok 4 Fast models * fix(reasoning): rename and correct Grok 4 Fast model checks * fix: adjust Grok-4 Fast reasoning detection for OpenRouter * fix(reasoning): exclude non-reasoning models from reasoning detection
2025-12-27 12:51:26 +08:00 · 2025-10-12 11:34:16 +08:00 · 2025-10-12 11:34:16 +08:00 · 49deece835
commit 49deece835
parent eccdd7643e
4 changed files with 76 additions and 6 deletions
--- a/src/renderer/src/aiCore/utils/reasoning.ts
+++ b/src/renderer/src/aiCore/utils/reasoning.ts
@ -6,6 +6,7 @@ import {
  getThinkModelType,
  isDeepSeekHybridInferenceModel,
  isDoubaoThinkingAutoModel,
+  isGrok4FastReasoningModel,
  isGrokReasoningModel,
  isOpenAIReasoningModel,
  isQwenAlwaysThinkModel,
@ -52,7 +53,12 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
        return {}
      }
      // Don't disable reasoning for models that require it
-      if (isGrokReasoningModel(model) || isOpenAIReasoningModel(model) || model.id.includes('seed-oss')) {
+      if (
+        isGrokReasoningModel(model) ||
+        isOpenAIReasoningModel(model) ||
+        isQwenAlwaysThinkModel(model) ||
+        model.id.includes('seed-oss')
+      ) {
        return {}
      }
      return { reasoning: { enabled: false, exclude: true } }
@ -100,6 +106,7 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
  // reasoningEffort有效的情况
  // DeepSeek hybrid inference models, v3.1 and maybe more in the future
  // 不同的 provider 有不同的思考控制方式，在这里统一解决
+
  if (isDeepSeekHybridInferenceModel(model)) {
    if (isSystemProvider(provider)) {
      switch (provider.id) {
@ -142,6 +149,16 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin

  // OpenRouter models
  if (model.provider === SystemProviderIds.openrouter) {
+    // Grok 4 Fast doesn't support effort levels, always use enabled: true
+    if (isGrok4FastReasoningModel(model)) {
+      return {
+        reasoning: {
+          enabled: true // Ignore effort level, just enable reasoning
+        }
+      }
+    }
+
+    // Other OpenRouter models that support effort levels
    if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
      return {
        reasoning: {
@ -412,6 +429,13 @@ export function getGeminiReasoningParams(assistant: Assistant, model: Model): Re
  return {}
 }

+/**
+ * Get XAI-specific reasoning parameters
+ * This function should only be called for XAI provider models
+ * @param assistant - The assistant configuration
+ * @param model - The model being used
+ * @returns XAI-specific reasoning parameters
+ */
 export function getXAIReasoningParams(assistant: Assistant, model: Model): Record<string, any> {
  if (!isSupportedReasoningEffortGrokModel(model)) {
    return {}
@ -419,6 +443,11 @@ export function getXAIReasoningParams(assistant: Assistant, model: Model): Recor

  const { reasoning_effort: reasoningEffort } = getAssistantSettings(assistant)

+  if (!reasoningEffort) {
+    return {}
+  }
+
+  // For XAI provider Grok models, use reasoningEffort parameter directly
  return {
    reasoningEffort
  }
--- a/src/renderer/src/config/models/reasoning.ts
+++ b/src/renderer/src/config/models/reasoning.ts
@ -14,7 +14,7 @@ import { GEMINI_FLASH_MODEL_REGEX } from './websearch'

 // Reasoning models
 export const REASONING_REGEX =
-  /^(o\d+(?:-[\w-]+)?|.*\b(?:reasoning|reasoner|thinking)\b.*|.*-[rR]\d+.*|.*\bqwq(?:-[\w-]+)?\b.*|.*\bhunyuan-t1(?:-[\w-]+)?\b.*|.*\bglm-zero-preview\b.*|.*\bgrok-(?:3-mini|4)(?:-[\w-]+)?\b.*)$/i
+  /^(?!.*-non-reasoning\b)(o\d+(?:-[\w-]+)?|.*\b(?:reasoning|reasoner|thinking)\b.*|.*-[rR]\d+.*|.*\bqwq(?:-[\w-]+)?\b.*|.*\bhunyuan-t1(?:-[\w-]+)?\b.*|.*\bglm-zero-preview\b.*|.*\bgrok-(?:3-mini|4|4-fast)(?:-[\w-]+)?\b.*)$/i

 // 模型类型到支持的reasoning_effort的映射表
 // TODO: refactor this. too many identical options
@ -24,6 +24,7 @@ export const MODEL_SUPPORTED_REASONING_EFFORT: ReasoningEffortConfig = {
  gpt5: ['minimal', 'low', 'medium', 'high'] as const,
  gpt5_codex: ['low', 'medium', 'high'] as const,
  grok: ['low', 'high'] as const,
+  grok4_fast: ['auto'] as const,
  gemini: ['low', 'medium', 'high', 'auto'] as const,
  gemini_pro: ['low', 'medium', 'high', 'auto'] as const,
  qwen: ['low', 'medium', 'high'] as const,
@ -43,6 +44,7 @@ export const MODEL_SUPPORTED_OPTIONS: ThinkingOptionConfig = {
  gpt5: [...MODEL_SUPPORTED_REASONING_EFFORT.gpt5] as const,
  gpt5_codex: MODEL_SUPPORTED_REASONING_EFFORT.gpt5_codex,
  grok: MODEL_SUPPORTED_REASONING_EFFORT.grok,
+  grok4_fast: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.grok4_fast] as const,
  gemini: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.gemini] as const,
  gemini_pro: MODEL_SUPPORTED_REASONING_EFFORT.gemini_pro,
  qwen: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.qwen] as const,
@ -66,6 +68,8 @@ export const getThinkModelType = (model: Model): ThinkingModelType => {
    }
  } else if (isSupportedReasoningEffortOpenAIModel(model)) {
    thinkingModelType = 'o'
+  } else if (isGrok4FastReasoningModel(model)) {
+    thinkingModelType = 'grok4_fast'
  } else if (isSupportedThinkingTokenGeminiModel(model)) {
    if (GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
      thinkingModelType = 'gemini'
@ -142,19 +146,46 @@ export function isSupportedReasoningEffortGrokModel(model?: Model): boolean {
  }

  const modelId = getLowerBaseModelName(model.id)
+  const providerId = model.provider.toLowerCase()
  if (modelId.includes('grok-3-mini')) {
    return true
  }

+  if (providerId === 'openrouter' && modelId.includes('grok-4-fast')) {
+    return true
+  }
+
  return false
 }

+/**
+ * Checks if the model is Grok 4 Fast reasoning version
+ * Explicitly excludes non-reasoning variants (models with 'non-reasoning' in their ID)
+ *
+ * Note: XAI official uses different model IDs for reasoning vs non-reasoning
+ * Third-party providers like OpenRouter expose a single ID with reasoning parameters, while first-party providers require separate IDs. Only the OpenRouter variant supports toggling.
+ *
+ * @param model - The model to check
+ * @returns true if the model is a reasoning-enabled Grok 4 Fast model
+ */
+export function isGrok4FastReasoningModel(model?: Model): boolean {
+  if (!model) {
+    return false
+  }
+
+  const modelId = getLowerBaseModelName(model.id)
+  return modelId.includes('grok-4-fast') && !modelId.includes('non-reasoning')
+}
+
 export function isGrokReasoningModel(model?: Model): boolean {
  if (!model) {
    return false
  }
  const modelId = getLowerBaseModelName(model.id)
-  if (isSupportedReasoningEffortGrokModel(model) || modelId.includes('grok-4')) {
+  if (
+    isSupportedReasoningEffortGrokModel(model) ||
+    (modelId.includes('grok-4') && !modelId.includes('non-reasoning'))
+  ) {
    return true
  }

@ -265,7 +296,11 @@ export function isQwenAlwaysThinkModel(model?: Model): boolean {
    return false
  }
  const modelId = getLowerBaseModelName(model.id, '/')
-  return modelId.startsWith('qwen3') && modelId.includes('thinking')
+  // 包括 qwen3 开头的 thinking 模型和 qwen3-vl 的 thinking 模型
+  return (
+    (modelId.startsWith('qwen3') && modelId.includes('thinking')) ||
+    (modelId.includes('qwen3-vl') && modelId.includes('thinking'))
+  )
 }

 // Doubao 支持思考模式的模型正则
@ -329,7 +364,10 @@ export const isPerplexityReasoningModel = (model?: Model): boolean => {
  }

  const modelId = getLowerBaseModelName(model.id, '/')
-  return isSupportedReasoningEffortPerplexityModel(model) || modelId.includes('reasoning')
+  return (
+    isSupportedReasoningEffortPerplexityModel(model) ||
+    (modelId.includes('reasoning') && !modelId.includes('non-reasoning'))
+  )
 }

 export const isSupportedReasoningEffortPerplexityModel = (model: Model): boolean => {
@ -443,6 +481,8 @@ export const THINKING_TOKEN_MAP: Record<string, { min: number; max: number }> =
  // qwen-plus-x 系列自 qwen-plus-2025-07-28 后模型最长思维链变为 81_920, qwen-plus 模型于 2025.9.16 同步变更
  'qwen3-235b-a22b-thinking-2507$': { min: 0, max: 81_920 },
  'qwen3-30b-a3b-thinking-2507$': { min: 0, max: 81_920 },
+  'qwen3-vl-235b-a22b-thinking$': { min: 0, max: 81_920 },
+  'qwen3-vl-30b-a3b-thinking$': { min: 0, max: 81_920 },
  'qwen-plus-2025-07-14$': { min: 0, max: 38_912 },
  'qwen-plus-2025-04-28$': { min: 0, max: 38_912 },
  'qwen3-1\\.7b$': { min: 0, max: 30_720 },
--- a/src/renderer/src/config/models/vision.ts
+++ b/src/renderer/src/config/models/vision.ts
@ -24,7 +24,7 @@ const visionAllowedModels = [
  'qwen2.5-vl',
  'qwen3-vl',
  'qwen2.5-omni',
-  'qwen3-omni',
+  'qwen3-omni(?:-[\\w-]+)?',
  'qvq',
  'internvl2',
  'grok-vision-beta',
--- a/src/renderer/src/types/index.ts
+++ b/src/renderer/src/types/index.ts
@ -78,6 +78,7 @@ const ThinkModelTypes = [
  'gpt5',
  'gpt5_codex',
  'grok',
+  'grok4_fast',
  'gemini',
  'gemini_pro',
  'qwen',