feat(reasoning): add special handling for Grok 4 fast models & qwen3-omni/qwen3-vl (#10367)

* feat(reasoning): add special handling for Grok 4 fast models

* feat(models): add grok4_fast model and refine grok reasoning

* feat(reasoning): unify Grok reasoning handling and XAI params

* feat(models): Grok/qwen handling and XAI

* feat(models): recognize qwen3-vl thinking models and add sizes

* fix(reasoning): reasoning enabled for QwenAlwaysThink models

* feat(reasoning): enable reasoning for Grok 4 Fast models

* fix(reasoning): rename and correct Grok 4 Fast model checks

* fix: adjust Grok-4 Fast reasoning detection for OpenRouter

* fix(reasoning): exclude non-reasoning models from reasoning detection
This commit is contained in:
George·Dong 2025-10-12 11:34:16 +08:00 committed by kangfenmao
parent eccdd7643e
commit 49deece835
4 changed files with 76 additions and 6 deletions

View File

@ -6,6 +6,7 @@ import {
getThinkModelType,
isDeepSeekHybridInferenceModel,
isDoubaoThinkingAutoModel,
isGrok4FastReasoningModel,
isGrokReasoningModel,
isOpenAIReasoningModel,
isQwenAlwaysThinkModel,
@ -52,7 +53,12 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
return {}
}
// Don't disable reasoning for models that require it
if (isGrokReasoningModel(model) || isOpenAIReasoningModel(model) || model.id.includes('seed-oss')) {
if (
isGrokReasoningModel(model) ||
isOpenAIReasoningModel(model) ||
isQwenAlwaysThinkModel(model) ||
model.id.includes('seed-oss')
) {
return {}
}
return { reasoning: { enabled: false, exclude: true } }
@ -100,6 +106,7 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
// reasoningEffort有效的情况
// DeepSeek hybrid inference models, v3.1 and maybe more in the future
// 不同的 provider 有不同的思考控制方式,在这里统一解决
if (isDeepSeekHybridInferenceModel(model)) {
if (isSystemProvider(provider)) {
switch (provider.id) {
@ -142,6 +149,16 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
// OpenRouter models
if (model.provider === SystemProviderIds.openrouter) {
// Grok 4 Fast doesn't support effort levels, always use enabled: true
if (isGrok4FastReasoningModel(model)) {
return {
reasoning: {
enabled: true // Ignore effort level, just enable reasoning
}
}
}
// Other OpenRouter models that support effort levels
if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
return {
reasoning: {
@ -412,6 +429,13 @@ export function getGeminiReasoningParams(assistant: Assistant, model: Model): Re
return {}
}
/**
* Get XAI-specific reasoning parameters
* This function should only be called for XAI provider models
* @param assistant - The assistant configuration
* @param model - The model being used
* @returns XAI-specific reasoning parameters
*/
export function getXAIReasoningParams(assistant: Assistant, model: Model): Record<string, any> {
if (!isSupportedReasoningEffortGrokModel(model)) {
return {}
@ -419,6 +443,11 @@ export function getXAIReasoningParams(assistant: Assistant, model: Model): Recor
const { reasoning_effort: reasoningEffort } = getAssistantSettings(assistant)
if (!reasoningEffort) {
return {}
}
// For XAI provider Grok models, use reasoningEffort parameter directly
return {
reasoningEffort
}

View File

@ -14,7 +14,7 @@ import { GEMINI_FLASH_MODEL_REGEX } from './websearch'
// Reasoning models
export const REASONING_REGEX =
/^(o\d+(?:-[\w-]+)?|.*\b(?:reasoning|reasoner|thinking)\b.*|.*-[rR]\d+.*|.*\bqwq(?:-[\w-]+)?\b.*|.*\bhunyuan-t1(?:-[\w-]+)?\b.*|.*\bglm-zero-preview\b.*|.*\bgrok-(?:3-mini|4)(?:-[\w-]+)?\b.*)$/i
/^(?!.*-non-reasoning\b)(o\d+(?:-[\w-]+)?|.*\b(?:reasoning|reasoner|thinking)\b.*|.*-[rR]\d+.*|.*\bqwq(?:-[\w-]+)?\b.*|.*\bhunyuan-t1(?:-[\w-]+)?\b.*|.*\bglm-zero-preview\b.*|.*\bgrok-(?:3-mini|4|4-fast)(?:-[\w-]+)?\b.*)$/i
// 模型类型到支持的reasoning_effort的映射表
// TODO: refactor this. too many identical options
@ -24,6 +24,7 @@ export const MODEL_SUPPORTED_REASONING_EFFORT: ReasoningEffortConfig = {
gpt5: ['minimal', 'low', 'medium', 'high'] as const,
gpt5_codex: ['low', 'medium', 'high'] as const,
grok: ['low', 'high'] as const,
grok4_fast: ['auto'] as const,
gemini: ['low', 'medium', 'high', 'auto'] as const,
gemini_pro: ['low', 'medium', 'high', 'auto'] as const,
qwen: ['low', 'medium', 'high'] as const,
@ -43,6 +44,7 @@ export const MODEL_SUPPORTED_OPTIONS: ThinkingOptionConfig = {
gpt5: [...MODEL_SUPPORTED_REASONING_EFFORT.gpt5] as const,
gpt5_codex: MODEL_SUPPORTED_REASONING_EFFORT.gpt5_codex,
grok: MODEL_SUPPORTED_REASONING_EFFORT.grok,
grok4_fast: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.grok4_fast] as const,
gemini: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.gemini] as const,
gemini_pro: MODEL_SUPPORTED_REASONING_EFFORT.gemini_pro,
qwen: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.qwen] as const,
@ -66,6 +68,8 @@ export const getThinkModelType = (model: Model): ThinkingModelType => {
}
} else if (isSupportedReasoningEffortOpenAIModel(model)) {
thinkingModelType = 'o'
} else if (isGrok4FastReasoningModel(model)) {
thinkingModelType = 'grok4_fast'
} else if (isSupportedThinkingTokenGeminiModel(model)) {
if (GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
thinkingModelType = 'gemini'
@ -142,19 +146,46 @@ export function isSupportedReasoningEffortGrokModel(model?: Model): boolean {
}
const modelId = getLowerBaseModelName(model.id)
const providerId = model.provider.toLowerCase()
if (modelId.includes('grok-3-mini')) {
return true
}
if (providerId === 'openrouter' && modelId.includes('grok-4-fast')) {
return true
}
return false
}
/**
* Checks if the model is Grok 4 Fast reasoning version
* Explicitly excludes non-reasoning variants (models with 'non-reasoning' in their ID)
*
* Note: XAI official uses different model IDs for reasoning vs non-reasoning
* Third-party providers like OpenRouter expose a single ID with reasoning parameters, while first-party providers require separate IDs. Only the OpenRouter variant supports toggling.
*
* @param model - The model to check
* @returns true if the model is a reasoning-enabled Grok 4 Fast model
*/
export function isGrok4FastReasoningModel(model?: Model): boolean {
if (!model) {
return false
}
const modelId = getLowerBaseModelName(model.id)
return modelId.includes('grok-4-fast') && !modelId.includes('non-reasoning')
}
export function isGrokReasoningModel(model?: Model): boolean {
if (!model) {
return false
}
const modelId = getLowerBaseModelName(model.id)
if (isSupportedReasoningEffortGrokModel(model) || modelId.includes('grok-4')) {
if (
isSupportedReasoningEffortGrokModel(model) ||
(modelId.includes('grok-4') && !modelId.includes('non-reasoning'))
) {
return true
}
@ -265,7 +296,11 @@ export function isQwenAlwaysThinkModel(model?: Model): boolean {
return false
}
const modelId = getLowerBaseModelName(model.id, '/')
return modelId.startsWith('qwen3') && modelId.includes('thinking')
// 包括 qwen3 开头的 thinking 模型和 qwen3-vl 的 thinking 模型
return (
(modelId.startsWith('qwen3') && modelId.includes('thinking')) ||
(modelId.includes('qwen3-vl') && modelId.includes('thinking'))
)
}
// Doubao 支持思考模式的模型正则
@ -329,7 +364,10 @@ export const isPerplexityReasoningModel = (model?: Model): boolean => {
}
const modelId = getLowerBaseModelName(model.id, '/')
return isSupportedReasoningEffortPerplexityModel(model) || modelId.includes('reasoning')
return (
isSupportedReasoningEffortPerplexityModel(model) ||
(modelId.includes('reasoning') && !modelId.includes('non-reasoning'))
)
}
export const isSupportedReasoningEffortPerplexityModel = (model: Model): boolean => {
@ -443,6 +481,8 @@ export const THINKING_TOKEN_MAP: Record<string, { min: number; max: number }> =
// qwen-plus-x 系列自 qwen-plus-2025-07-28 后模型最长思维链变为 81_920, qwen-plus 模型于 2025.9.16 同步变更
'qwen3-235b-a22b-thinking-2507$': { min: 0, max: 81_920 },
'qwen3-30b-a3b-thinking-2507$': { min: 0, max: 81_920 },
'qwen3-vl-235b-a22b-thinking$': { min: 0, max: 81_920 },
'qwen3-vl-30b-a3b-thinking$': { min: 0, max: 81_920 },
'qwen-plus-2025-07-14$': { min: 0, max: 38_912 },
'qwen-plus-2025-04-28$': { min: 0, max: 38_912 },
'qwen3-1\\.7b$': { min: 0, max: 30_720 },

View File

@ -24,7 +24,7 @@ const visionAllowedModels = [
'qwen2.5-vl',
'qwen3-vl',
'qwen2.5-omni',
'qwen3-omni',
'qwen3-omni(?:-[\\w-]+)?',
'qvq',
'internvl2',
'grok-vision-beta',

View File

@ -78,6 +78,7 @@ const ThinkModelTypes = [
'gpt5',
'gpt5_codex',
'grok',
'grok4_fast',
'gemini',
'gemini_pro',
'qwen',