mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-27 12:51:26 +08:00
feat(reasoning): add special handling for Grok 4 fast models & qwen3-omni/qwen3-vl (#10367)
* feat(reasoning): add special handling for Grok 4 fast models * feat(models): add grok4_fast model and refine grok reasoning * feat(reasoning): unify Grok reasoning handling and XAI params * feat(models): Grok/qwen handling and XAI * feat(models): recognize qwen3-vl thinking models and add sizes * fix(reasoning): reasoning enabled for QwenAlwaysThink models * feat(reasoning): enable reasoning for Grok 4 Fast models * fix(reasoning): rename and correct Grok 4 Fast model checks * fix: adjust Grok-4 Fast reasoning detection for OpenRouter * fix(reasoning): exclude non-reasoning models from reasoning detection
This commit is contained in:
parent
eccdd7643e
commit
49deece835
@ -6,6 +6,7 @@ import {
|
||||
getThinkModelType,
|
||||
isDeepSeekHybridInferenceModel,
|
||||
isDoubaoThinkingAutoModel,
|
||||
isGrok4FastReasoningModel,
|
||||
isGrokReasoningModel,
|
||||
isOpenAIReasoningModel,
|
||||
isQwenAlwaysThinkModel,
|
||||
@ -52,7 +53,12 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
||||
return {}
|
||||
}
|
||||
// Don't disable reasoning for models that require it
|
||||
if (isGrokReasoningModel(model) || isOpenAIReasoningModel(model) || model.id.includes('seed-oss')) {
|
||||
if (
|
||||
isGrokReasoningModel(model) ||
|
||||
isOpenAIReasoningModel(model) ||
|
||||
isQwenAlwaysThinkModel(model) ||
|
||||
model.id.includes('seed-oss')
|
||||
) {
|
||||
return {}
|
||||
}
|
||||
return { reasoning: { enabled: false, exclude: true } }
|
||||
@ -100,6 +106,7 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
||||
// reasoningEffort有效的情况
|
||||
// DeepSeek hybrid inference models, v3.1 and maybe more in the future
|
||||
// 不同的 provider 有不同的思考控制方式,在这里统一解决
|
||||
|
||||
if (isDeepSeekHybridInferenceModel(model)) {
|
||||
if (isSystemProvider(provider)) {
|
||||
switch (provider.id) {
|
||||
@ -142,6 +149,16 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
||||
|
||||
// OpenRouter models
|
||||
if (model.provider === SystemProviderIds.openrouter) {
|
||||
// Grok 4 Fast doesn't support effort levels, always use enabled: true
|
||||
if (isGrok4FastReasoningModel(model)) {
|
||||
return {
|
||||
reasoning: {
|
||||
enabled: true // Ignore effort level, just enable reasoning
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Other OpenRouter models that support effort levels
|
||||
if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
|
||||
return {
|
||||
reasoning: {
|
||||
@ -412,6 +429,13 @@ export function getGeminiReasoningParams(assistant: Assistant, model: Model): Re
|
||||
return {}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get XAI-specific reasoning parameters
|
||||
* This function should only be called for XAI provider models
|
||||
* @param assistant - The assistant configuration
|
||||
* @param model - The model being used
|
||||
* @returns XAI-specific reasoning parameters
|
||||
*/
|
||||
export function getXAIReasoningParams(assistant: Assistant, model: Model): Record<string, any> {
|
||||
if (!isSupportedReasoningEffortGrokModel(model)) {
|
||||
return {}
|
||||
@ -419,6 +443,11 @@ export function getXAIReasoningParams(assistant: Assistant, model: Model): Recor
|
||||
|
||||
const { reasoning_effort: reasoningEffort } = getAssistantSettings(assistant)
|
||||
|
||||
if (!reasoningEffort) {
|
||||
return {}
|
||||
}
|
||||
|
||||
// For XAI provider Grok models, use reasoningEffort parameter directly
|
||||
return {
|
||||
reasoningEffort
|
||||
}
|
||||
|
||||
@ -14,7 +14,7 @@ import { GEMINI_FLASH_MODEL_REGEX } from './websearch'
|
||||
|
||||
// Reasoning models
|
||||
export const REASONING_REGEX =
|
||||
/^(o\d+(?:-[\w-]+)?|.*\b(?:reasoning|reasoner|thinking)\b.*|.*-[rR]\d+.*|.*\bqwq(?:-[\w-]+)?\b.*|.*\bhunyuan-t1(?:-[\w-]+)?\b.*|.*\bglm-zero-preview\b.*|.*\bgrok-(?:3-mini|4)(?:-[\w-]+)?\b.*)$/i
|
||||
/^(?!.*-non-reasoning\b)(o\d+(?:-[\w-]+)?|.*\b(?:reasoning|reasoner|thinking)\b.*|.*-[rR]\d+.*|.*\bqwq(?:-[\w-]+)?\b.*|.*\bhunyuan-t1(?:-[\w-]+)?\b.*|.*\bglm-zero-preview\b.*|.*\bgrok-(?:3-mini|4|4-fast)(?:-[\w-]+)?\b.*)$/i
|
||||
|
||||
// 模型类型到支持的reasoning_effort的映射表
|
||||
// TODO: refactor this. too many identical options
|
||||
@ -24,6 +24,7 @@ export const MODEL_SUPPORTED_REASONING_EFFORT: ReasoningEffortConfig = {
|
||||
gpt5: ['minimal', 'low', 'medium', 'high'] as const,
|
||||
gpt5_codex: ['low', 'medium', 'high'] as const,
|
||||
grok: ['low', 'high'] as const,
|
||||
grok4_fast: ['auto'] as const,
|
||||
gemini: ['low', 'medium', 'high', 'auto'] as const,
|
||||
gemini_pro: ['low', 'medium', 'high', 'auto'] as const,
|
||||
qwen: ['low', 'medium', 'high'] as const,
|
||||
@ -43,6 +44,7 @@ export const MODEL_SUPPORTED_OPTIONS: ThinkingOptionConfig = {
|
||||
gpt5: [...MODEL_SUPPORTED_REASONING_EFFORT.gpt5] as const,
|
||||
gpt5_codex: MODEL_SUPPORTED_REASONING_EFFORT.gpt5_codex,
|
||||
grok: MODEL_SUPPORTED_REASONING_EFFORT.grok,
|
||||
grok4_fast: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.grok4_fast] as const,
|
||||
gemini: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.gemini] as const,
|
||||
gemini_pro: MODEL_SUPPORTED_REASONING_EFFORT.gemini_pro,
|
||||
qwen: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.qwen] as const,
|
||||
@ -66,6 +68,8 @@ export const getThinkModelType = (model: Model): ThinkingModelType => {
|
||||
}
|
||||
} else if (isSupportedReasoningEffortOpenAIModel(model)) {
|
||||
thinkingModelType = 'o'
|
||||
} else if (isGrok4FastReasoningModel(model)) {
|
||||
thinkingModelType = 'grok4_fast'
|
||||
} else if (isSupportedThinkingTokenGeminiModel(model)) {
|
||||
if (GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
|
||||
thinkingModelType = 'gemini'
|
||||
@ -142,19 +146,46 @@ export function isSupportedReasoningEffortGrokModel(model?: Model): boolean {
|
||||
}
|
||||
|
||||
const modelId = getLowerBaseModelName(model.id)
|
||||
const providerId = model.provider.toLowerCase()
|
||||
if (modelId.includes('grok-3-mini')) {
|
||||
return true
|
||||
}
|
||||
|
||||
if (providerId === 'openrouter' && modelId.includes('grok-4-fast')) {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the model is Grok 4 Fast reasoning version
|
||||
* Explicitly excludes non-reasoning variants (models with 'non-reasoning' in their ID)
|
||||
*
|
||||
* Note: XAI official uses different model IDs for reasoning vs non-reasoning
|
||||
* Third-party providers like OpenRouter expose a single ID with reasoning parameters, while first-party providers require separate IDs. Only the OpenRouter variant supports toggling.
|
||||
*
|
||||
* @param model - The model to check
|
||||
* @returns true if the model is a reasoning-enabled Grok 4 Fast model
|
||||
*/
|
||||
export function isGrok4FastReasoningModel(model?: Model): boolean {
|
||||
if (!model) {
|
||||
return false
|
||||
}
|
||||
|
||||
const modelId = getLowerBaseModelName(model.id)
|
||||
return modelId.includes('grok-4-fast') && !modelId.includes('non-reasoning')
|
||||
}
|
||||
|
||||
export function isGrokReasoningModel(model?: Model): boolean {
|
||||
if (!model) {
|
||||
return false
|
||||
}
|
||||
const modelId = getLowerBaseModelName(model.id)
|
||||
if (isSupportedReasoningEffortGrokModel(model) || modelId.includes('grok-4')) {
|
||||
if (
|
||||
isSupportedReasoningEffortGrokModel(model) ||
|
||||
(modelId.includes('grok-4') && !modelId.includes('non-reasoning'))
|
||||
) {
|
||||
return true
|
||||
}
|
||||
|
||||
@ -265,7 +296,11 @@ export function isQwenAlwaysThinkModel(model?: Model): boolean {
|
||||
return false
|
||||
}
|
||||
const modelId = getLowerBaseModelName(model.id, '/')
|
||||
return modelId.startsWith('qwen3') && modelId.includes('thinking')
|
||||
// 包括 qwen3 开头的 thinking 模型和 qwen3-vl 的 thinking 模型
|
||||
return (
|
||||
(modelId.startsWith('qwen3') && modelId.includes('thinking')) ||
|
||||
(modelId.includes('qwen3-vl') && modelId.includes('thinking'))
|
||||
)
|
||||
}
|
||||
|
||||
// Doubao 支持思考模式的模型正则
|
||||
@ -329,7 +364,10 @@ export const isPerplexityReasoningModel = (model?: Model): boolean => {
|
||||
}
|
||||
|
||||
const modelId = getLowerBaseModelName(model.id, '/')
|
||||
return isSupportedReasoningEffortPerplexityModel(model) || modelId.includes('reasoning')
|
||||
return (
|
||||
isSupportedReasoningEffortPerplexityModel(model) ||
|
||||
(modelId.includes('reasoning') && !modelId.includes('non-reasoning'))
|
||||
)
|
||||
}
|
||||
|
||||
export const isSupportedReasoningEffortPerplexityModel = (model: Model): boolean => {
|
||||
@ -443,6 +481,8 @@ export const THINKING_TOKEN_MAP: Record<string, { min: number; max: number }> =
|
||||
// qwen-plus-x 系列自 qwen-plus-2025-07-28 后模型最长思维链变为 81_920, qwen-plus 模型于 2025.9.16 同步变更
|
||||
'qwen3-235b-a22b-thinking-2507$': { min: 0, max: 81_920 },
|
||||
'qwen3-30b-a3b-thinking-2507$': { min: 0, max: 81_920 },
|
||||
'qwen3-vl-235b-a22b-thinking$': { min: 0, max: 81_920 },
|
||||
'qwen3-vl-30b-a3b-thinking$': { min: 0, max: 81_920 },
|
||||
'qwen-plus-2025-07-14$': { min: 0, max: 38_912 },
|
||||
'qwen-plus-2025-04-28$': { min: 0, max: 38_912 },
|
||||
'qwen3-1\\.7b$': { min: 0, max: 30_720 },
|
||||
|
||||
@ -24,7 +24,7 @@ const visionAllowedModels = [
|
||||
'qwen2.5-vl',
|
||||
'qwen3-vl',
|
||||
'qwen2.5-omni',
|
||||
'qwen3-omni',
|
||||
'qwen3-omni(?:-[\\w-]+)?',
|
||||
'qvq',
|
||||
'internvl2',
|
||||
'grok-vision-beta',
|
||||
|
||||
@ -78,6 +78,7 @@ const ThinkModelTypes = [
|
||||
'gpt5',
|
||||
'gpt5_codex',
|
||||
'grok',
|
||||
'grok4_fast',
|
||||
'gemini',
|
||||
'gemini_pro',
|
||||
'qwen',
|
||||
|
||||
Loading…
Reference in New Issue
Block a user