diff --git a/src/renderer/src/aiCore/clients/openai/OpenAIApiClient.ts b/src/renderer/src/aiCore/clients/openai/OpenAIApiClient.ts
index efea48e6b0..85fc6cb717 100644
--- a/src/renderer/src/aiCore/clients/openai/OpenAIApiClient.ts
+++ b/src/renderer/src/aiCore/clients/openai/OpenAIApiClient.ts
@@ -716,8 +716,8 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
       isFinished = true
     }
 
-    let isFirstThinkingChunk = true
-    let isFirstTextChunk = true
+    let isThinking = false
+    let accumulatingText = false
     return (context: ResponseChunkTransformerContext) => ({
       async transform(chunk: OpenAISdkRawChunk, controller: TransformStreamDefaultController<GenericChunk>) {
         const isOpenRouter = context.provider?.id === 'openrouter'
@@ -774,6 +774,15 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
               contentSource = choice.message
             }
 
+            // 状态管理
+            if (!contentSource?.content) {
+              accumulatingText = false
+            }
+            // @ts-ignore - reasoning_content is not in standard OpenAI types but some providers use it
+            if (!contentSource?.reasoning_content && !contentSource?.reasoning) {
+              isThinking = false
+            }
+
             if (!contentSource) {
               if ('finish_reason' in choice && choice.finish_reason) {
                 // For OpenRouter, don't emit completion signals immediately after finish_reason
@@ -811,30 +820,41 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
             // @ts-ignore - reasoning_content is not in standard OpenAI types but some providers use it
             const reasoningText = contentSource.reasoning_content || contentSource.reasoning
             if (reasoningText) {
-              if (isFirstThinkingChunk) {
+              // logger.silly('since reasoningText is trusy, try to enqueue THINKING_START AND THINKING_DELTA')
+              if (!isThinking) {
+                // logger.silly('since isThinking is falsy, try to enqueue THINKING_START')
                 controller.enqueue({
                   type: ChunkType.THINKING_START
                 } as ThinkingStartChunk)
-                isFirstThinkingChunk = false
+                isThinking = true
               }
+
+              // logger.silly('enqueue THINKING_DELTA')
               controller.enqueue({
                 type: ChunkType.THINKING_DELTA,
                 text: reasoningText
               })
+            } else {
+              isThinking = false
             }
 
             // 处理文本内容
             if (contentSource.content) {
-              if (isFirstTextChunk) {
+              // logger.silly('since contentSource.content is trusy, try to enqueue TEXT_START and TEXT_DELTA')
+              if (!accumulatingText) {
+                // logger.silly('enqueue TEXT_START')
                 controller.enqueue({
                   type: ChunkType.TEXT_START
                 } as TextStartChunk)
-                isFirstTextChunk = false
+                accumulatingText = true
               }
+              // logger.silly('enqueue TEXT_DELTA')
               controller.enqueue({
                 type: ChunkType.TEXT_DELTA,
                 text: contentSource.content
               })
+            } else {
+              accumulatingText = false
             }
 
             // 处理工具调用
diff --git a/src/renderer/src/aiCore/index.ts b/src/renderer/src/aiCore/index.ts
index 83646d502a..47fb4cd707 100644
--- a/src/renderer/src/aiCore/index.ts
+++ b/src/renderer/src/aiCore/index.ts
@@ -20,7 +20,6 @@ import { MIDDLEWARE_NAME as FinalChunkConsumerMiddlewareName } from './middlewar
 import { applyCompletionsMiddlewares } from './middleware/composer'
 import { MIDDLEWARE_NAME as McpToolChunkMiddlewareName } from './middleware/core/McpToolChunkMiddleware'
 import { MIDDLEWARE_NAME as RawStreamListenerMiddlewareName } from './middleware/core/RawStreamListenerMiddleware'
-import { MIDDLEWARE_NAME as ThinkChunkMiddlewareName } from './middleware/core/ThinkChunkMiddleware'
 import { MIDDLEWARE_NAME as WebSearchMiddlewareName } from './middleware/core/WebSearchMiddleware'
 import { MIDDLEWARE_NAME as ImageGenerationMiddlewareName } from './middleware/feat/ImageGenerationMiddleware'
 import { MIDDLEWARE_NAME as ThinkingTagExtractionMiddlewareName } from './middleware/feat/ThinkingTagExtractionMiddleware'
@@ -120,8 +119,6 @@ export default class AiProvider {
         logger.silly('ErrorHandlerMiddleware is removed')
         builder.remove(FinalChunkConsumerMiddlewareName)
         logger.silly('FinalChunkConsumerMiddleware is removed')
-        builder.insertBefore(ThinkChunkMiddlewareName, MiddlewareRegistry[ThinkingTagExtractionMiddlewareName])
-        logger.silly('ThinkingTagExtractionMiddleware is inserted')
       }
     }
 
diff --git a/src/renderer/src/aiCore/middleware/feat/ThinkingTagExtractionMiddleware.ts b/src/renderer/src/aiCore/middleware/feat/ThinkingTagExtractionMiddleware.ts
index d4983365d9..5ab19a6175 100644
--- a/src/renderer/src/aiCore/middleware/feat/ThinkingTagExtractionMiddleware.ts
+++ b/src/renderer/src/aiCore/middleware/feat/ThinkingTagExtractionMiddleware.ts
@@ -70,12 +70,13 @@ export const ThinkingTagExtractionMiddleware: CompletionsMiddleware =
         let hasThinkingContent = false
         let thinkingStartTime = 0
 
-        let isFirstTextChunk = true
+        let accumulatingText = false
         let accumulatedThinkingContent = ''
         const processedStream = resultFromUpstream.pipeThrough(
           new TransformStream<GenericChunk, GenericChunk>({
             transform(chunk: GenericChunk, controller) {
               logger.silly('chunk', chunk)
+
               if (chunk.type === ChunkType.TEXT_DELTA) {
                 const textChunk = chunk as TextDeltaChunk
 
@@ -84,6 +85,13 @@ export const ThinkingTagExtractionMiddleware: CompletionsMiddleware =
 
                 for (const extractionResult of extractionResults) {
                   if (extractionResult.complete && extractionResult.tagContentExtracted?.trim()) {
+                    // 完成思考
+                    // logger.silly(
+                    //   'since extractionResult.complete and extractionResult.tagContentExtracted is not empty, THINKING_COMPLETE chunk is generated'
+                    // )
+                    // 如果完成思考，更新状态
+                    accumulatingText = false
+
                     // 生成 THINKING_COMPLETE 事件
                     const thinkingCompleteChunk: ThinkingCompleteChunk = {
                       type: ChunkType.THINKING_COMPLETE,
@@ -96,7 +104,13 @@ export const ThinkingTagExtractionMiddleware: CompletionsMiddleware =
                     hasThinkingContent = false
                     thinkingStartTime = 0
                   } else if (extractionResult.content.length > 0) {
+                    // logger.silly(
+                    //   'since extractionResult.content is not empty, try to generate THINKING_START/THINKING_DELTA chunk'
+                    // )
                     if (extractionResult.isTagContent) {
+                      // 如果提取到思考内容，更新状态
+                      accumulatingText = false
+
                       // 第一次接收到思考内容时记录开始时间
                       if (!hasThinkingContent) {
                         hasThinkingContent = true
@@ -116,11 +130,17 @@ export const ThinkingTagExtractionMiddleware: CompletionsMiddleware =
                         controller.enqueue(thinkingDeltaChunk)
                       }
                     } else {
-                      if (isFirstTextChunk) {
+                      // 如果没有思考内容，直接输出文本
+                      // logger.silly(
+                      //   'since extractionResult.isTagContent is falsy, try to generate TEXT_START/TEXT_DELTA chunk'
+                      // )
+                      // 在非组成文本状态下接收到非思考内容时，生成 TEXT_START chunk 并更新状态
+                      if (!accumulatingText) {
+                        // logger.silly('since accumulatingText is false, TEXT_START chunk is generated')
                         controller.enqueue({
                           type: ChunkType.TEXT_START
                         })
-                        isFirstTextChunk = false
+                        accumulatingText = true
                       }
                       // 发送清理后的文本内容
                       const cleanTextChunk: TextDeltaChunk = {
@@ -129,11 +149,20 @@ export const ThinkingTagExtractionMiddleware: CompletionsMiddleware =
                       }
                       controller.enqueue(cleanTextChunk)
                     }
+                  } else {
+                    // logger.silly('since both condition is false, skip')
                   }
                 }
               } else if (chunk.type !== ChunkType.TEXT_START) {
+                // logger.silly('since chunk.type is not TEXT_START and not TEXT_DELTA, pass through')
+
+                // logger.silly('since chunk.type is not TEXT_START and not TEXT_DELTA, accumulatingText is set to false')
+                accumulatingText = false
                 // 其他类型的chunk直接传递（包括 THINKING_DELTA, THINKING_COMPLETE 等）
                 controller.enqueue(chunk)
+              } else {
+                // 接收到的 TEXT_START chunk 直接丢弃
+                // logger.silly('since chunk.type is TEXT_START, passed')
               }
             },
             flush(controller) {
diff --git a/src/renderer/src/services/__tests__/ApiService.test.ts b/src/renderer/src/services/__tests__/ApiService.test.ts
index 249dcd01f6..3aa4120146 100644
--- a/src/renderer/src/services/__tests__/ApiService.test.ts
+++ b/src/renderer/src/services/__tests__/ApiService.test.ts
@@ -9,10 +9,9 @@ import {
 import { FinishReason, MediaModality } from '@google/genai'
 import { FunctionCall } from '@google/genai'
 import AiProvider from '@renderer/aiCore'
-import { OpenAIAPIClient, ResponseChunkTransformerContext } from '@renderer/aiCore/clients'
+import { BaseApiClient, OpenAIAPIClient, ResponseChunkTransformerContext } from '@renderer/aiCore/clients'
 import { AnthropicAPIClient } from '@renderer/aiCore/clients/anthropic/AnthropicAPIClient'
 import { ApiClientFactory } from '@renderer/aiCore/clients/ApiClientFactory'
-import { BaseApiClient } from '@renderer/aiCore/clients/BaseApiClient'
 import { GeminiAPIClient } from '@renderer/aiCore/clients/gemini/GeminiAPIClient'
 import { OpenAIResponseAPIClient } from '@renderer/aiCore/clients/openai/OpenAIResponseAPIClient'
 import { GenericChunk } from '@renderer/aiCore/middleware/schemas'
@@ -35,13 +34,12 @@ import {
   OpenAISdkRawChunk,
   OpenAISdkRawContentSource
 } from '@renderer/types/sdk'
-import * as McpToolsModule from '@renderer/utils/mcp-tools'
 import { mcpToolCallResponseToGeminiMessage } from '@renderer/utils/mcp-tools'
+import * as McpToolsModule from '@renderer/utils/mcp-tools'
 import { cloneDeep } from 'lodash'
 import OpenAI from 'openai'
 import { ChatCompletionChunk } from 'openai/resources'
 import { beforeEach, describe, expect, it, vi } from 'vitest'
-
 // Mock the ApiClientFactory
 vi.mock('@renderer/aiCore/clients/ApiClientFactory', () => ({
   ApiClientFactory: {
@@ -1108,8 +1106,8 @@ const mockOpenaiApiClient = {
       isFinished = true
     }
 
-    let isFirstThinkingChunk = true
-    let isFirstTextChunk = true
+    let isThinking = false
+    let accumulatingText = false
     return (context: ResponseChunkTransformerContext) => ({
       async transform(chunk: OpenAISdkRawChunk, controller: TransformStreamDefaultController<GenericChunk>) {
         // 持续更新usage信息
@@ -1146,6 +1144,15 @@ const mockOpenaiApiClient = {
               contentSource = choice.message
             }
 
+            // 状态管理
+            if (!contentSource?.content) {
+              accumulatingText = false
+            }
+            // @ts-ignore - reasoning_content is not in standard OpenAI types but some providers use it
+            if (!contentSource?.reasoning_content && !contentSource?.reasoning) {
+              isThinking = false
+            }
+
             if (!contentSource) {
               if ('finish_reason' in choice && choice.finish_reason) {
                 emitCompletionSignals(controller)
@@ -1165,30 +1172,34 @@ const mockOpenaiApiClient = {
             // @ts-ignore - reasoning_content is not in standard OpenAI types but some providers use it
             const reasoningText = contentSource.reasoning_content || contentSource.reasoning
             if (reasoningText) {
-              if (isFirstThinkingChunk) {
+              if (!isThinking) {
                 controller.enqueue({
                   type: ChunkType.THINKING_START
                 } as ThinkingStartChunk)
-                isFirstThinkingChunk = false
+                isThinking = true
               }
               controller.enqueue({
                 type: ChunkType.THINKING_DELTA,
                 text: reasoningText
               })
+            } else {
+              isThinking = false
             }
 
             // 处理文本内容
             if (contentSource.content) {
-              if (isFirstTextChunk) {
+              if (!accumulatingText) {
                 controller.enqueue({
                   type: ChunkType.TEXT_START
                 } as TextStartChunk)
-                isFirstTextChunk = false
+                accumulatingText = true
               }
               controller.enqueue({
                 type: ChunkType.TEXT_DELTA,
                 text: contentSource.content
               })
+            } else {
+              accumulatingText = false
             }
 
             // 处理工具调用
@@ -2570,4 +2581,239 @@ describe('ApiService', () => {
     expect(filteredFirstResponseChunks).toEqual(expectedFirstResponseChunks)
     expect(mcpChunks).toEqual(expectedMcpResponseChunks)
   })
+
+  it('should handle multiple reasoning blocks and text blocks', async () => {
+    const rawChunks = [
+      {
+        choices: [
+          {
+            delta: { content: '', reasoning_content: '\n', role: 'assistant' },
+            index: 0,
+            finish_reason: null
+          }
+        ],
+        created: 1754192522,
+        id: 'chat-network/glm-4.5-GLM-4.5-Flash-2025-08-03-11-42-02',
+        model: 'glm-4.5-flash',
+        object: 'chat.completion',
+        system_fingerprint: '3000y'
+      },
+      {
+        choices: [{ delta: { reasoning_content: '开始', role: 'assistant' }, index: 0, finish_reason: null }],
+        created: 1754192522,
+        id: 'chat-network/glm-4.5-GLM-4.5-Flash-2025-08-03-11-42-02',
+        model: 'glm-4.5-flash',
+        object: 'chat.completion',
+        system_fingerprint: '3000y'
+      },
+      {
+        choices: [{ delta: { reasoning_content: '思考', role: 'assistant' }, index: 0, finish_reason: null }],
+        created: 1754192522,
+        id: 'chat-network/glm-4.5-GLM-4.5-Flash-2025-08-03-11-42-02',
+        model: 'glm-4.5-flash',
+        object: 'chat.completion',
+        system_fingerprint: '3000y'
+      },
+      {
+        choices: [
+          { delta: { content: '思考', reasoning_content: null, role: 'assistant' }, index: 0, finish_reason: null }
+        ],
+        created: 1754192522,
+        id: 'chat-network/glm-4.5-GLM-4.5-Flash-2025-08-03-11-42-02',
+        model: 'glm-4.5-flash',
+        object: 'chat.completion',
+        system_fingerprint: '3000y'
+      },
+      {
+        choices: [
+          { delta: { content: '完成', reasoning_content: null, role: 'assistant' }, index: 0, finish_reason: null }
+        ],
+        created: 1754192522,
+        id: 'chat-network/glm-4.5-GLM-4.5-Flash-2025-08-03-11-42-02',
+        model: 'glm-4.5-flash',
+        object: 'chat.completion',
+        system_fingerprint: '3000y'
+      },
+      {
+        choices: [{ delta: { reasoning_content: '再次', role: 'assistant' }, index: 0, finish_reason: null }],
+        created: 1754192522,
+        id: 'chat-network/glm-4.5-GLM-4.5-Flash-2025-08-03-11-42-02',
+        model: 'glm-4.5-flash',
+        object: 'chat.completion',
+        system_fingerprint: '3000y'
+      },
+      {
+        choices: [{ delta: { reasoning_content: '思考', role: 'assistant' }, index: 0, finish_reason: null }],
+        created: 1754192522,
+        id: 'chat-network/glm-4.5-GLM-4.5-Flash-2025-08-03-11-42-02',
+        model: 'glm-4.5-flash',
+        object: 'chat.completion',
+        system_fingerprint: '3000y'
+      },
+      {
+        choices: [
+          { delta: { content: '思考', reasoning_content: null, role: 'assistant' }, index: 0, finish_reason: null }
+        ],
+        created: 1754192522,
+        id: 'chat-network/glm-4.5-GLM-4.5-Flash-2025-08-03-11-42-02',
+        model: 'glm-4.5-flash',
+        object: 'chat.completion',
+        system_fingerprint: '3000y'
+      },
+      {
+        choices: [
+          { delta: { content: '完成', reasoning_content: null, role: 'assistant' }, index: 0, finish_reason: null }
+        ],
+        created: 1754192522,
+        id: 'chat-network/glm-4.5-GLM-4.5-Flash-2025-08-03-11-42-02',
+        model: 'glm-4.5-flash',
+        object: 'chat.completion',
+        system_fingerprint: '3000y'
+      },
+      {
+        choices: [
+          { delta: { content: '', reasoning_content: null, role: 'assistant' }, index: 0, finish_reason: 'stop' }
+        ],
+        created: 1754192522,
+        id: 'chat-network/glm-4.5-GLM-4.5-Flash-2025-08-03-11-42-02',
+        model: 'glm-4.5-flash',
+        object: 'chat.completion',
+        system_fingerprint: '3000y'
+      }
+    ]
+
+    async function* mockChunksGenerator(): AsyncGenerator<OpenAISdkRawChunk> {
+      for (const chunk of rawChunks) {
+        // since no reasoning_content field
+        yield chunk as OpenAISdkRawChunk
+      }
+    }
+
+    const mockOpenaiApiClient_ = cloneDeep(mockOpenaiApiClient)
+
+    mockOpenaiApiClient_.createCompletions = vi.fn().mockImplementation(() => mockChunksGenerator())
+
+    const mockCreate = vi.mocked(ApiClientFactory.create)
+    // @ts-ignore mockOpenaiApiClient_ is a OpenAIAPIClient
+    mockCreate.mockReturnValue(mockOpenaiApiClient_ as unknown as OpenAIAPIClient)
+    const AI = new AiProvider(mockProvider as Provider)
+
+    const result = await AI.completions({
+      callType: 'test',
+      messages: [],
+      assistant: {
+        id: '1',
+        name: 'test',
+        prompt: 'test',
+        model: {
+          id: 'gpt-4o',
+          name: 'GPT-4o',
+          supported_text_delta: true
+        }
+      } as Assistant,
+      onChunk: mockOnChunk,
+      enableReasoning: true,
+      streamOutput: true
+    })
+
+    const stream = result.stream! as ReadableStream<GenericChunk>
+    const reader = stream.getReader()
+
+    const chunks: GenericChunk[] = []
+
+    while (true) {
+      const { done, value } = await reader.read()
+      if (done) break
+      chunks.push(value)
+    }
+
+    reader.releaseLock()
+
+    const filteredChunks = chunks.map((chunk) => {
+      if (chunk.type === ChunkType.THINKING_DELTA || chunk.type === ChunkType.THINKING_COMPLETE) {
+        delete (chunk as any).thinking_millsec
+        return chunk
+      }
+      return chunk
+    })
+
+    const expectedChunks = [
+      {
+        type: ChunkType.THINKING_START
+      },
+      {
+        type: ChunkType.THINKING_DELTA,
+        text: '\n'
+      },
+      {
+        type: ChunkType.THINKING_DELTA,
+        text: '\n开始'
+      },
+      {
+        type: ChunkType.THINKING_DELTA,
+        text: '\n开始思考'
+      },
+      {
+        type: ChunkType.THINKING_COMPLETE,
+        text: '\n开始思考'
+      },
+      {
+        type: ChunkType.TEXT_START
+      },
+      {
+        type: ChunkType.TEXT_DELTA,
+        text: '思考'
+      },
+      {
+        type: ChunkType.TEXT_DELTA,
+        text: '思考完成'
+      },
+      {
+        type: ChunkType.TEXT_COMPLETE,
+        text: '思考完成'
+      },
+      {
+        type: ChunkType.THINKING_START
+      },
+      {
+        type: ChunkType.THINKING_DELTA,
+        text: '再次'
+      },
+      {
+        type: ChunkType.THINKING_DELTA,
+        text: '再次思考'
+      },
+      {
+        type: ChunkType.THINKING_COMPLETE,
+        text: '再次思考'
+      },
+      {
+        type: ChunkType.TEXT_START
+      },
+      {
+        type: ChunkType.TEXT_DELTA,
+        text: '思考'
+      },
+      {
+        type: ChunkType.TEXT_DELTA,
+        text: '思考完成'
+      },
+      {
+        type: ChunkType.TEXT_COMPLETE,
+        text: '思考完成'
+      },
+      {
+        type: ChunkType.LLM_RESPONSE_COMPLETE,
+        response: {
+          usage: {
+            completion_tokens: 0,
+            prompt_tokens: 0,
+            total_tokens: 0
+          }
+        }
+      }
+    ]
+
+    expect(filteredChunks).toEqual(expectedChunks)
+  })
 })