feat: enhance Anthropic and OpenAI API clients with incremental output support (#8104)

- Added support for incremental output in AnthropicAPIClient by introducing TEXT_START and THINKING_START chunk types.
- Updated OpenAIAPIClient to conditionally enable incremental output for specific models.
- Modified messageThunk to handle updated smartBlockUpdate calls with an isComplete parameter for better state management.
- Introduced incremental_output parameter in ReasoningEffortOptionalParams type for enhanced configuration options.
This commit is contained in:
SuYao 2025-07-14 10:30:51 +08:00 committed by GitHub
parent 7961ba87ed
commit 4dd99b5240
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 35 additions and 4 deletions

View File

@ -524,9 +524,18 @@ export class AnthropicAPIClient extends BaseApiClient<
switch (rawChunk.type) {
case 'message': {
let i = 0
let hasTextContent = false
let hasThinkingContent = false
for (const content of rawChunk.content) {
switch (content.type) {
case 'text': {
if (!hasTextContent) {
controller.enqueue({
type: ChunkType.TEXT_START
} as TextStartChunk)
hasTextContent = true
}
controller.enqueue({
type: ChunkType.TEXT_DELTA,
text: content.text
@ -539,6 +548,12 @@ export class AnthropicAPIClient extends BaseApiClient<
break
}
case 'thinking': {
if (!hasThinkingContent) {
controller.enqueue({
type: ChunkType.THINKING_START
} as ThinkingStartChunk)
hasThinkingContent = true
}
controller.enqueue({
type: ChunkType.THINKING_DELTA,
text: content.thinking

View File

@ -5,6 +5,7 @@ import {
GEMINI_FLASH_MODEL_REGEX,
getOpenAIWebSearchParams,
isDoubaoThinkingAutoModel,
isQwenReasoningModel,
isReasoningModel,
isSupportedReasoningEffortGrokModel,
isSupportedReasoningEffortModel,
@ -166,10 +167,17 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
// Qwen models
if (isSupportedThinkingTokenQwenModel(model)) {
return {
const thinkConfig = {
enable_thinking: true,
thinking_budget: budgetTokens
}
if (this.provider.id === 'dashscope') {
return {
...thinkConfig,
incremental_output: true
}
}
return thinkConfig
}
// Grok models
@ -436,7 +444,14 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
messages: OpenAISdkMessageParam[]
metadata: Record<string, any>
}> => {
const { messages, mcpTools, maxTokens, streamOutput, enableWebSearch } = coreRequest
const { messages, mcpTools, maxTokens, enableWebSearch } = coreRequest
let { streamOutput } = coreRequest
// Qwen3商业版思考模式、Qwen3开源版、QwQ、QVQ只支持流式输出。
if (isQwenReasoningModel(model)) {
streamOutput = true
}
// 1. 处理系统消息
let systemMessage = { role: 'system', content: assistant.prompt || '' }

View File

@ -446,7 +446,7 @@ const fetchAndProcessAssistantResponseImpl = async (
content: accumulatedContent,
status: MessageBlockStatus.STREAMING
}
smartBlockUpdate(initialPlaceholderBlockId, changes, MessageBlockType.MAIN_TEXT)
smartBlockUpdate(initialPlaceholderBlockId, changes, MessageBlockType.MAIN_TEXT, true)
mainTextBlockId = initialPlaceholderBlockId
initialPlaceholderBlockId = null
} else if (!mainTextBlockId) {
@ -496,7 +496,7 @@ const fetchAndProcessAssistantResponseImpl = async (
}
thinkingBlockId = initialPlaceholderBlockId
initialPlaceholderBlockId = null
smartBlockUpdate(thinkingBlockId, changes, MessageBlockType.THINKING)
smartBlockUpdate(thinkingBlockId, changes, MessageBlockType.THINKING, true)
} else if (!thinkingBlockId) {
const newBlock = createThinkingBlock(assistantMsgId, accumulatedThinking, {
status: MessageBlockStatus.STREAMING,

View File

@ -54,6 +54,7 @@ export type ReasoningEffortOptionalParams = {
reasoning_effort?: OpenAI.Chat.Completions.ChatCompletionCreateParams['reasoning_effort'] | 'none' | 'auto'
enable_thinking?: boolean
thinking_budget?: number
incremental_output?: boolean
enable_reasoning?: boolean
extra_body?: Record<string, any>
// Add any other potential reasoning-related keys here if they exist