mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-01-07 22:10:21 +08:00
feat(reasoning): add special handling for Grok 4 fast models & qwen3-omni/qwen3-vl (#10367)
* feat(reasoning): add special handling for Grok 4 fast models * feat(models): add grok4_fast model and refine grok reasoning * feat(reasoning): unify Grok reasoning handling and XAI params * feat(models): Grok/qwen handling and XAI * feat(models): recognize qwen3-vl thinking models and add sizes * fix(reasoning): reasoning enabled for QwenAlwaysThink models * feat(reasoning): enable reasoning for Grok 4 Fast models * fix(reasoning): rename and correct Grok 4 Fast model checks * fix: adjust Grok-4 Fast reasoning detection for OpenRouter * fix(reasoning): exclude non-reasoning models from reasoning detection
This commit is contained in:
parent
162e33f478
commit
ea51439aac
@ -6,6 +6,7 @@ import {
|
|||||||
getThinkModelType,
|
getThinkModelType,
|
||||||
isDeepSeekHybridInferenceModel,
|
isDeepSeekHybridInferenceModel,
|
||||||
isDoubaoThinkingAutoModel,
|
isDoubaoThinkingAutoModel,
|
||||||
|
isGrok4FastReasoningModel,
|
||||||
isGrokReasoningModel,
|
isGrokReasoningModel,
|
||||||
isOpenAIReasoningModel,
|
isOpenAIReasoningModel,
|
||||||
isQwenAlwaysThinkModel,
|
isQwenAlwaysThinkModel,
|
||||||
@ -52,7 +53,12 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
|||||||
return {}
|
return {}
|
||||||
}
|
}
|
||||||
// Don't disable reasoning for models that require it
|
// Don't disable reasoning for models that require it
|
||||||
if (isGrokReasoningModel(model) || isOpenAIReasoningModel(model) || model.id.includes('seed-oss')) {
|
if (
|
||||||
|
isGrokReasoningModel(model) ||
|
||||||
|
isOpenAIReasoningModel(model) ||
|
||||||
|
isQwenAlwaysThinkModel(model) ||
|
||||||
|
model.id.includes('seed-oss')
|
||||||
|
) {
|
||||||
return {}
|
return {}
|
||||||
}
|
}
|
||||||
return { reasoning: { enabled: false, exclude: true } }
|
return { reasoning: { enabled: false, exclude: true } }
|
||||||
@ -100,6 +106,7 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
|||||||
// reasoningEffort有效的情况
|
// reasoningEffort有效的情况
|
||||||
// DeepSeek hybrid inference models, v3.1 and maybe more in the future
|
// DeepSeek hybrid inference models, v3.1 and maybe more in the future
|
||||||
// 不同的 provider 有不同的思考控制方式,在这里统一解决
|
// 不同的 provider 有不同的思考控制方式,在这里统一解决
|
||||||
|
|
||||||
if (isDeepSeekHybridInferenceModel(model)) {
|
if (isDeepSeekHybridInferenceModel(model)) {
|
||||||
if (isSystemProvider(provider)) {
|
if (isSystemProvider(provider)) {
|
||||||
switch (provider.id) {
|
switch (provider.id) {
|
||||||
@ -142,6 +149,16 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
|||||||
|
|
||||||
// OpenRouter models
|
// OpenRouter models
|
||||||
if (model.provider === SystemProviderIds.openrouter) {
|
if (model.provider === SystemProviderIds.openrouter) {
|
||||||
|
// Grok 4 Fast doesn't support effort levels, always use enabled: true
|
||||||
|
if (isGrok4FastReasoningModel(model)) {
|
||||||
|
return {
|
||||||
|
reasoning: {
|
||||||
|
enabled: true // Ignore effort level, just enable reasoning
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Other OpenRouter models that support effort levels
|
||||||
if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
|
if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
|
||||||
return {
|
return {
|
||||||
reasoning: {
|
reasoning: {
|
||||||
@ -412,6 +429,13 @@ export function getGeminiReasoningParams(assistant: Assistant, model: Model): Re
|
|||||||
return {}
|
return {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get XAI-specific reasoning parameters
|
||||||
|
* This function should only be called for XAI provider models
|
||||||
|
* @param assistant - The assistant configuration
|
||||||
|
* @param model - The model being used
|
||||||
|
* @returns XAI-specific reasoning parameters
|
||||||
|
*/
|
||||||
export function getXAIReasoningParams(assistant: Assistant, model: Model): Record<string, any> {
|
export function getXAIReasoningParams(assistant: Assistant, model: Model): Record<string, any> {
|
||||||
if (!isSupportedReasoningEffortGrokModel(model)) {
|
if (!isSupportedReasoningEffortGrokModel(model)) {
|
||||||
return {}
|
return {}
|
||||||
@ -419,6 +443,11 @@ export function getXAIReasoningParams(assistant: Assistant, model: Model): Recor
|
|||||||
|
|
||||||
const { reasoning_effort: reasoningEffort } = getAssistantSettings(assistant)
|
const { reasoning_effort: reasoningEffort } = getAssistantSettings(assistant)
|
||||||
|
|
||||||
|
if (!reasoningEffort) {
|
||||||
|
return {}
|
||||||
|
}
|
||||||
|
|
||||||
|
// For XAI provider Grok models, use reasoningEffort parameter directly
|
||||||
return {
|
return {
|
||||||
reasoningEffort
|
reasoningEffort
|
||||||
}
|
}
|
||||||
|
|||||||
@ -14,7 +14,7 @@ import { GEMINI_FLASH_MODEL_REGEX } from './websearch'
|
|||||||
|
|
||||||
// Reasoning models
|
// Reasoning models
|
||||||
export const REASONING_REGEX =
|
export const REASONING_REGEX =
|
||||||
/^(o\d+(?:-[\w-]+)?|.*\b(?:reasoning|reasoner|thinking)\b.*|.*-[rR]\d+.*|.*\bqwq(?:-[\w-]+)?\b.*|.*\bhunyuan-t1(?:-[\w-]+)?\b.*|.*\bglm-zero-preview\b.*|.*\bgrok-(?:3-mini|4)(?:-[\w-]+)?\b.*)$/i
|
/^(?!.*-non-reasoning\b)(o\d+(?:-[\w-]+)?|.*\b(?:reasoning|reasoner|thinking)\b.*|.*-[rR]\d+.*|.*\bqwq(?:-[\w-]+)?\b.*|.*\bhunyuan-t1(?:-[\w-]+)?\b.*|.*\bglm-zero-preview\b.*|.*\bgrok-(?:3-mini|4|4-fast)(?:-[\w-]+)?\b.*)$/i
|
||||||
|
|
||||||
// 模型类型到支持的reasoning_effort的映射表
|
// 模型类型到支持的reasoning_effort的映射表
|
||||||
// TODO: refactor this. too many identical options
|
// TODO: refactor this. too many identical options
|
||||||
@ -24,6 +24,7 @@ export const MODEL_SUPPORTED_REASONING_EFFORT: ReasoningEffortConfig = {
|
|||||||
gpt5: ['minimal', 'low', 'medium', 'high'] as const,
|
gpt5: ['minimal', 'low', 'medium', 'high'] as const,
|
||||||
gpt5_codex: ['low', 'medium', 'high'] as const,
|
gpt5_codex: ['low', 'medium', 'high'] as const,
|
||||||
grok: ['low', 'high'] as const,
|
grok: ['low', 'high'] as const,
|
||||||
|
grok4_fast: ['auto'] as const,
|
||||||
gemini: ['low', 'medium', 'high', 'auto'] as const,
|
gemini: ['low', 'medium', 'high', 'auto'] as const,
|
||||||
gemini_pro: ['low', 'medium', 'high', 'auto'] as const,
|
gemini_pro: ['low', 'medium', 'high', 'auto'] as const,
|
||||||
qwen: ['low', 'medium', 'high'] as const,
|
qwen: ['low', 'medium', 'high'] as const,
|
||||||
@ -43,6 +44,7 @@ export const MODEL_SUPPORTED_OPTIONS: ThinkingOptionConfig = {
|
|||||||
gpt5: [...MODEL_SUPPORTED_REASONING_EFFORT.gpt5] as const,
|
gpt5: [...MODEL_SUPPORTED_REASONING_EFFORT.gpt5] as const,
|
||||||
gpt5_codex: MODEL_SUPPORTED_REASONING_EFFORT.gpt5_codex,
|
gpt5_codex: MODEL_SUPPORTED_REASONING_EFFORT.gpt5_codex,
|
||||||
grok: MODEL_SUPPORTED_REASONING_EFFORT.grok,
|
grok: MODEL_SUPPORTED_REASONING_EFFORT.grok,
|
||||||
|
grok4_fast: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.grok4_fast] as const,
|
||||||
gemini: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.gemini] as const,
|
gemini: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.gemini] as const,
|
||||||
gemini_pro: MODEL_SUPPORTED_REASONING_EFFORT.gemini_pro,
|
gemini_pro: MODEL_SUPPORTED_REASONING_EFFORT.gemini_pro,
|
||||||
qwen: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.qwen] as const,
|
qwen: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.qwen] as const,
|
||||||
@ -66,6 +68,8 @@ export const getThinkModelType = (model: Model): ThinkingModelType => {
|
|||||||
}
|
}
|
||||||
} else if (isSupportedReasoningEffortOpenAIModel(model)) {
|
} else if (isSupportedReasoningEffortOpenAIModel(model)) {
|
||||||
thinkingModelType = 'o'
|
thinkingModelType = 'o'
|
||||||
|
} else if (isGrok4FastReasoningModel(model)) {
|
||||||
|
thinkingModelType = 'grok4_fast'
|
||||||
} else if (isSupportedThinkingTokenGeminiModel(model)) {
|
} else if (isSupportedThinkingTokenGeminiModel(model)) {
|
||||||
if (GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
|
if (GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
|
||||||
thinkingModelType = 'gemini'
|
thinkingModelType = 'gemini'
|
||||||
@ -142,19 +146,46 @@ export function isSupportedReasoningEffortGrokModel(model?: Model): boolean {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const modelId = getLowerBaseModelName(model.id)
|
const modelId = getLowerBaseModelName(model.id)
|
||||||
|
const providerId = model.provider.toLowerCase()
|
||||||
if (modelId.includes('grok-3-mini')) {
|
if (modelId.includes('grok-3-mini')) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (providerId === 'openrouter' && modelId.includes('grok-4-fast')) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if the model is Grok 4 Fast reasoning version
|
||||||
|
* Explicitly excludes non-reasoning variants (models with 'non-reasoning' in their ID)
|
||||||
|
*
|
||||||
|
* Note: XAI official uses different model IDs for reasoning vs non-reasoning
|
||||||
|
* Third-party providers like OpenRouter expose a single ID with reasoning parameters, while first-party providers require separate IDs. Only the OpenRouter variant supports toggling.
|
||||||
|
*
|
||||||
|
* @param model - The model to check
|
||||||
|
* @returns true if the model is a reasoning-enabled Grok 4 Fast model
|
||||||
|
*/
|
||||||
|
export function isGrok4FastReasoningModel(model?: Model): boolean {
|
||||||
|
if (!model) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
const modelId = getLowerBaseModelName(model.id)
|
||||||
|
return modelId.includes('grok-4-fast') && !modelId.includes('non-reasoning')
|
||||||
|
}
|
||||||
|
|
||||||
export function isGrokReasoningModel(model?: Model): boolean {
|
export function isGrokReasoningModel(model?: Model): boolean {
|
||||||
if (!model) {
|
if (!model) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
const modelId = getLowerBaseModelName(model.id)
|
const modelId = getLowerBaseModelName(model.id)
|
||||||
if (isSupportedReasoningEffortGrokModel(model) || modelId.includes('grok-4')) {
|
if (
|
||||||
|
isSupportedReasoningEffortGrokModel(model) ||
|
||||||
|
(modelId.includes('grok-4') && !modelId.includes('non-reasoning'))
|
||||||
|
) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -265,7 +296,11 @@ export function isQwenAlwaysThinkModel(model?: Model): boolean {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
const modelId = getLowerBaseModelName(model.id, '/')
|
const modelId = getLowerBaseModelName(model.id, '/')
|
||||||
return modelId.startsWith('qwen3') && modelId.includes('thinking')
|
// 包括 qwen3 开头的 thinking 模型和 qwen3-vl 的 thinking 模型
|
||||||
|
return (
|
||||||
|
(modelId.startsWith('qwen3') && modelId.includes('thinking')) ||
|
||||||
|
(modelId.includes('qwen3-vl') && modelId.includes('thinking'))
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Doubao 支持思考模式的模型正则
|
// Doubao 支持思考模式的模型正则
|
||||||
@ -329,7 +364,10 @@ export const isPerplexityReasoningModel = (model?: Model): boolean => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const modelId = getLowerBaseModelName(model.id, '/')
|
const modelId = getLowerBaseModelName(model.id, '/')
|
||||||
return isSupportedReasoningEffortPerplexityModel(model) || modelId.includes('reasoning')
|
return (
|
||||||
|
isSupportedReasoningEffortPerplexityModel(model) ||
|
||||||
|
(modelId.includes('reasoning') && !modelId.includes('non-reasoning'))
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
export const isSupportedReasoningEffortPerplexityModel = (model: Model): boolean => {
|
export const isSupportedReasoningEffortPerplexityModel = (model: Model): boolean => {
|
||||||
@ -443,6 +481,8 @@ export const THINKING_TOKEN_MAP: Record<string, { min: number; max: number }> =
|
|||||||
// qwen-plus-x 系列自 qwen-plus-2025-07-28 后模型最长思维链变为 81_920, qwen-plus 模型于 2025.9.16 同步变更
|
// qwen-plus-x 系列自 qwen-plus-2025-07-28 后模型最长思维链变为 81_920, qwen-plus 模型于 2025.9.16 同步变更
|
||||||
'qwen3-235b-a22b-thinking-2507$': { min: 0, max: 81_920 },
|
'qwen3-235b-a22b-thinking-2507$': { min: 0, max: 81_920 },
|
||||||
'qwen3-30b-a3b-thinking-2507$': { min: 0, max: 81_920 },
|
'qwen3-30b-a3b-thinking-2507$': { min: 0, max: 81_920 },
|
||||||
|
'qwen3-vl-235b-a22b-thinking$': { min: 0, max: 81_920 },
|
||||||
|
'qwen3-vl-30b-a3b-thinking$': { min: 0, max: 81_920 },
|
||||||
'qwen-plus-2025-07-14$': { min: 0, max: 38_912 },
|
'qwen-plus-2025-07-14$': { min: 0, max: 38_912 },
|
||||||
'qwen-plus-2025-04-28$': { min: 0, max: 38_912 },
|
'qwen-plus-2025-04-28$': { min: 0, max: 38_912 },
|
||||||
'qwen3-1\\.7b$': { min: 0, max: 30_720 },
|
'qwen3-1\\.7b$': { min: 0, max: 30_720 },
|
||||||
|
|||||||
@ -24,7 +24,7 @@ const visionAllowedModels = [
|
|||||||
'qwen2.5-vl',
|
'qwen2.5-vl',
|
||||||
'qwen3-vl',
|
'qwen3-vl',
|
||||||
'qwen2.5-omni',
|
'qwen2.5-omni',
|
||||||
'qwen3-omni',
|
'qwen3-omni(?:-[\\w-]+)?',
|
||||||
'qvq',
|
'qvq',
|
||||||
'internvl2',
|
'internvl2',
|
||||||
'grok-vision-beta',
|
'grok-vision-beta',
|
||||||
|
|||||||
@ -81,6 +81,7 @@ const ThinkModelTypes = [
|
|||||||
'gpt5',
|
'gpt5',
|
||||||
'gpt5_codex',
|
'gpt5_codex',
|
||||||
'grok',
|
'grok',
|
||||||
|
'grok4_fast',
|
||||||
'gemini',
|
'gemini',
|
||||||
'gemini_pro',
|
'gemini_pro',
|
||||||
'qwen',
|
'qwen',
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user