diff --git a/src/renderer/src/config/models.ts b/src/renderer/src/config/models.ts
index b6705dc9f5..42073b43e3 100644
--- a/src/renderer/src/config/models.ts
+++ b/src/renderer/src/config/models.ts
@@ -130,7 +130,6 @@ import XirangModelLogoDark from '@renderer/assets/images/models/xirang_dark.png'
 import YiModelLogo from '@renderer/assets/images/models/yi.png'
 import YiModelLogoDark from '@renderer/assets/images/models/yi_dark.png'
 import { getProviderByModel } from '@renderer/services/AssistantService'
-import WebSearchService from '@renderer/services/WebSearchService'
 import { Assistant, Model } from '@renderer/types'
 import OpenAI from 'openai'
 
@@ -2223,6 +2222,19 @@ export function isOpenAIReasoningModel(model: Model): boolean {
   return model.id.includes('o1') || model.id.includes('o3') || model.id.includes('o4')
 }
 
+export function isOpenAILLMModel(model: Model): boolean {
+  if (!model) {
+    return false
+  }
+  if (isOpenAIReasoningModel(model)) {
+    return true
+  }
+  if (model.id.includes('gpt')) {
+    return true
+  }
+  return false
+}
+
 export function isSupportedReasoningEffortOpenAIModel(model: Model): boolean {
   return (
     (model.id.includes('o1') && !(model.id.includes('o1-preview') || model.id.includes('o1-mini'))) ||
@@ -2387,16 +2399,38 @@ export function isWebSearchModel(model: Model): boolean {
     return false
   }
 
+  if (provider.type === 'openai') {
+    if (
+      isOpenAILLMModel(model) &&
+      !isTextToImageModel(model) &&
+      !isOpenAIReasoningModel(model) &&
+      !GENERATE_IMAGE_MODELS.includes(model.id)
+    ) {
+      return true
+    }
+
+    return false
+  }
+
   if (provider.id === 'perplexity') {
     return PERPLEXITY_SEARCH_MODELS.includes(model?.id)
   }
 
   if (provider.id === 'aihubmix') {
+    if (
+      isOpenAILLMModel(model) &&
+      !isTextToImageModel(model) &&
+      !isOpenAIReasoningModel(model) &&
+      !GENERATE_IMAGE_MODELS.includes(model.id)
+    ) {
+      return true
+    }
+
     const models = ['gemini-2.0-flash-search', 'gemini-2.0-flash-exp-search', 'gemini-2.0-pro-exp-02-05-search']
     return models.includes(model?.id)
   }
 
-  if (provider?.type === 'openai') {
+  if (provider?.type === 'openai-compatible') {
     if (GEMINI_SEARCH_MODELS.includes(model?.id) || isOpenAIWebSearch(model)) {
       return true
     }
@@ -2450,9 +2484,6 @@ export function isGenerateImageModel(model: Model): boolean {
 }
 
 export function getOpenAIWebSearchParams(assistant: Assistant, model: Model): Record<string, any> {
-  if (WebSearchService.isWebSearchEnabled()) {
-    return {}
-  }
   if (isWebSearchModel(model)) {
     if (assistant.enableWebSearch) {
       const webSearchTools = getWebSearchTools(model)
@@ -2477,7 +2508,9 @@ export function getOpenAIWebSearchParams(assistant: Assistant, model: Model): Re
       }
 
       if (isOpenAIWebSearch(model)) {
-        return {}
+        return {
+          web_search_options: {}
+        }
       }
 
       return {
diff --git a/src/renderer/src/pages/settings/ProviderSettings/AddProviderPopup.tsx b/src/renderer/src/pages/settings/ProviderSettings/AddProviderPopup.tsx
index 7ab60fb466..8268785b2e 100644
--- a/src/renderer/src/pages/settings/ProviderSettings/AddProviderPopup.tsx
+++ b/src/renderer/src/pages/settings/ProviderSettings/AddProviderPopup.tsx
@@ -16,7 +16,7 @@ interface Props {
 const PopupContainer: React.FC<Props> = ({ provider, resolve }) => {
   const [open, setOpen] = useState(true)
   const [name, setName] = useState(provider?.name || '')
-  const [type, setType] = useState<ProviderType>(provider?.type || 'openai')
+  const [type, setType] = useState<ProviderType>(provider?.type || 'openai-compatible')
   const [logo, setLogo] = useState<string | null>(null)
   const [dropdownOpen, setDropdownOpen] = useState(false)
   const { t } = useTranslation()
@@ -52,7 +52,7 @@ const PopupContainer: React.FC<Props> = ({ provider, resolve }) => {
 
   const onCancel = () => {
     setOpen(false)
-    resolve({ name: '', type: 'openai' })
+    resolve({ name: '', type: 'openai-compatible' })
   }
 
   const onClose = () => {
@@ -188,7 +188,8 @@ const PopupContainer: React.FC<Props> = ({ provider, resolve }) => {
             value={type}
             onChange={setType}
             options={[
-              { label: 'OpenAI', value: 'openai' },
+              { label: 'OpenAI-Compatible', value: 'openai-compatible' },
+              { label: 'OpenAI-Response', value: 'openai' },
               { label: 'Gemini', value: 'gemini' },
               { label: 'Anthropic', value: 'anthropic' },
               { label: 'Azure OpenAI', value: 'azure-openai' }
diff --git a/src/renderer/src/pages/settings/ProviderSettings/ProviderSetting.tsx b/src/renderer/src/pages/settings/ProviderSettings/ProviderSetting.tsx
index 5463f6eeea..dd0a32ac35 100644
--- a/src/renderer/src/pages/settings/ProviderSettings/ProviderSetting.tsx
+++ b/src/renderer/src/pages/settings/ProviderSettings/ProviderSetting.tsx
@@ -269,8 +269,10 @@ const ProviderSetting: FC<Props> = ({ provider: _provider }) => {
     if (apiHost.endsWith('#')) {
       return apiHost.replace('#', '')
     }
-
-    return formatApiHost(apiHost) + 'chat/completions'
+    if (provider.type === 'openai-compatible') {
+      return formatApiHost(apiHost) + 'chat/completions'
+    }
+    return formatApiHost(apiHost) + 'responses'
   }
 
   useEffect(() => {
diff --git a/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts b/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts
new file mode 100644
index 0000000000..437f384cb4
--- /dev/null
+++ b/src/renderer/src/providers/AiProvider/OpenAICompatibleProvider.ts
@@ -0,0 +1,1090 @@
+import {
+  findTokenLimit,
+  getOpenAIWebSearchParams,
+  isHunyuanSearchModel,
+  isOpenAILLMModel,
+  isOpenAIReasoningModel,
+  isOpenAIWebSearch,
+  isReasoningModel,
+  isSupportedModel,
+  isSupportedReasoningEffortGrokModel,
+  isSupportedReasoningEffortModel,
+  isSupportedReasoningEffortOpenAIModel,
+  isSupportedThinkingTokenClaudeModel,
+  isSupportedThinkingTokenModel,
+  isSupportedThinkingTokenQwenModel,
+  isVisionModel,
+  isZhipuModel
+} from '@renderer/config/models'
+import { getStoreSetting } from '@renderer/hooks/useSettings'
+import i18n from '@renderer/i18n'
+import { getAssistantSettings, getDefaultModel, getTopNamingModel } from '@renderer/services/AssistantService'
+import { EVENT_NAMES } from '@renderer/services/EventService'
+import {
+  filterContextMessages,
+  filterEmptyMessages,
+  filterUserRoleStartMessages
+} from '@renderer/services/MessagesService'
+import { processReqMessages } from '@renderer/services/ModelMessageService'
+import store from '@renderer/store'
+import {
+  Assistant,
+  EFFORT_RATIO,
+  FileTypes,
+  MCPToolResponse,
+  Model,
+  Provider,
+  Suggestion,
+  Usage,
+  WebSearchSource
+} from '@renderer/types'
+import { ChunkType, LLMWebSearchCompleteChunk } from '@renderer/types/chunk'
+import { Message } from '@renderer/types/newMessage'
+import { removeSpecialCharactersForTopicName } from '@renderer/utils'
+import { addImageFileToContents } from '@renderer/utils/formats'
+import {
+  convertLinks,
+  convertLinksToHunyuan,
+  convertLinksToOpenRouter,
+  convertLinksToZhipu
+} from '@renderer/utils/linkConverter'
+import { mcpToolCallResponseToOpenAICompatibleMessage, parseAndCallTools } from '@renderer/utils/mcp-tools'
+import { findFileBlocks, findImageBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find'
+import { buildSystemPrompt } from '@renderer/utils/prompt'
+import { isEmpty, takeRight } from 'lodash'
+import OpenAI, { AzureOpenAI } from 'openai'
+import {
+  ChatCompletionContentPart,
+  ChatCompletionCreateParamsNonStreaming,
+  ChatCompletionMessageParam
+} from 'openai/resources'
+
+import { CompletionsParams } from '.'
+import OpenAIProvider from './OpenAIProvider'
+
+export default class OpenAICompatibleProvider extends OpenAIProvider {
+  constructor(provider: Provider) {
+    super(provider)
+
+    if (provider.id === 'azure-openai' || provider.type === 'azure-openai') {
+      this.sdk = new AzureOpenAI({
+        dangerouslyAllowBrowser: true,
+        apiKey: this.apiKey,
+        apiVersion: provider.apiVersion,
+        endpoint: provider.apiHost
+      })
+      return
+    }
+
+    this.sdk = new OpenAI({
+      dangerouslyAllowBrowser: true,
+      apiKey: this.apiKey,
+      baseURL: this.getBaseURL(),
+      defaultHeaders: {
+        ...this.defaultHeaders(),
+        ...(this.provider.id === 'copilot' ? { 'editor-version': 'vscode/1.97.2' } : {}),
+        ...(this.provider.id === 'copilot' ? { 'copilot-vision-request': 'true' } : {})
+      }
+    })
+  }
+
+  /**
+   * Check if the provider does not support files
+   * @returns True if the provider does not support files, false otherwise
+   */
+  private get isNotSupportFiles() {
+    if (this.provider?.isNotSupportArrayContent) {
+      return true
+    }
+
+    const providers = ['deepseek', 'baichuan', 'minimax', 'xirang']
+
+    return providers.includes(this.provider.id)
+  }
+
+  /**
+   * Get the message parameter
+   * @param message - The message
+   * @param model - The model
+   * @returns The message parameter
+   */
+  override async getMessageParam(
+    message: Message,
+    model: Model
+  ): Promise<OpenAI.Chat.Completions.ChatCompletionMessageParam> {
+    const isVision = isVisionModel(model)
+    const content = await this.getMessageContent(message)
+    const fileBlocks = findFileBlocks(message)
+    const imageBlocks = findImageBlocks(message)
+
+    if (fileBlocks.length === 0 && imageBlocks.length === 0) {
+      return {
+        role: message.role === 'system' ? 'user' : message.role,
+        content
+      }
+    }
+
+    // If the model does not support files, extract the file content
+    if (this.isNotSupportFiles) {
+      const fileContent = await this.extractFileContent(message)
+
+      return {
+        role: message.role === 'system' ? 'user' : message.role,
+        content: content + '\n\n---\n\n' + fileContent
+      }
+    }
+
+    // If the model supports files, add the file content to the message
+    const parts: ChatCompletionContentPart[] = []
+
+    if (content) {
+      parts.push({ type: 'text', text: content })
+    }
+
+    for (const imageBlock of imageBlocks) {
+      if (isVision) {
+        if (imageBlock.file) {
+          const image = await window.api.file.base64Image(imageBlock.file.id + imageBlock.file.ext)
+          parts.push({ type: 'image_url', image_url: { url: image.data } })
+        } else if (imageBlock.url && imageBlock.url.startsWith('data:')) {
+          parts.push({ type: 'image_url', image_url: { url: imageBlock.url } })
+        }
+      }
+    }
+
+    for (const fileBlock of fileBlocks) {
+      const file = fileBlock.file
+      if (!file) {
+        continue
+      }
+
+      if ([FileTypes.TEXT, FileTypes.DOCUMENT].includes(file.type)) {
+        const fileContent = await (await window.api.file.read(file.id + file.ext)).trim()
+        parts.push({
+          type: 'text',
+          text: file.origin_name + '\n' + fileContent
+        })
+      }
+    }
+
+    return {
+      role: message.role === 'system' ? 'user' : message.role,
+      content: parts
+    } as ChatCompletionMessageParam
+  }
+
+  /**
+   * Get the temperature for the assistant
+   * @param assistant - The assistant
+   * @param model - The model
+   * @returns The temperature
+   */
+  override getTemperature(assistant: Assistant, model: Model) {
+    return isReasoningModel(model) || isOpenAIWebSearch(model) ? undefined : assistant?.settings?.temperature
+  }
+
+  /**
+   * Get the provider specific parameters for the assistant
+   * @param assistant - The assistant
+   * @param model - The model
+   * @returns The provider specific parameters
+   */
+  private getProviderSpecificParameters(assistant: Assistant, model: Model) {
+    const { maxTokens } = getAssistantSettings(assistant)
+
+    if (this.provider.id === 'openrouter') {
+      if (model.id.includes('deepseek-r1')) {
+        return {
+          include_reasoning: true
+        }
+      }
+    }
+
+    if (isOpenAIReasoningModel(model)) {
+      return {
+        max_tokens: undefined,
+        max_completion_tokens: maxTokens
+      }
+    }
+
+    return {}
+  }
+
+  /**
+   * Get the top P for the assistant
+   * @param assistant - The assistant
+   * @param model - The model
+   * @returns The top P
+   */
+  override getTopP(assistant: Assistant, model: Model) {
+    if (isReasoningModel(model) || isOpenAIWebSearch(model)) {
+      return undefined
+    }
+
+    return assistant?.settings?.topP
+  }
+
+  /**
+   * Get the reasoning effort for the assistant
+   * @param assistant - The assistant
+   * @param model - The model
+   * @returns The reasoning effort
+   */
+  private getReasoningEffort(assistant: Assistant, model: Model) {
+    if (this.provider.id === 'groq') {
+      return {}
+    }
+
+    if (!isReasoningModel(model)) {
+      return {}
+    }
+    const reasoningEffort = assistant?.settings?.reasoning_effort
+    if (!reasoningEffort) {
+      if (isSupportedThinkingTokenQwenModel(model)) {
+        return { enable_thinking: false }
+      }
+
+      if (isSupportedThinkingTokenClaudeModel(model)) {
+        return { thinking: { type: 'disabled' } }
+      }
+
+      return {}
+    }
+    const effortRatio = EFFORT_RATIO[reasoningEffort]
+    const budgetTokens = Math.floor((findTokenLimit(model.id)?.max || 0) * effortRatio)
+    // OpenRouter models
+    if (model.provider === 'openrouter') {
+      if (isSupportedReasoningEffortModel(model)) {
+        return {
+          reasoning: {
+            effort: assistant?.settings?.reasoning_effort
+          }
+        }
+      }
+
+      if (isSupportedThinkingTokenModel(model)) {
+        return {
+          reasoning: {
+            max_tokens: budgetTokens
+          }
+        }
+      }
+    }
+
+    // Qwen models
+    if (isSupportedThinkingTokenQwenModel(model)) {
+      return {
+        enable_thinking: true,
+        thinking_budget: budgetTokens
+      }
+    }
+
+    // Grok models
+    if (isSupportedReasoningEffortGrokModel(model)) {
+      return {
+        reasoning_effort: assistant?.settings?.reasoning_effort
+      }
+    }
+
+    // OpenAI models
+    if (isSupportedReasoningEffortOpenAIModel(model)) {
+      return {
+        reasoning_effort: assistant?.settings?.reasoning_effort
+      }
+    }
+
+    // Claude models
+    if (isSupportedThinkingTokenClaudeModel(model)) {
+      return {
+        thinking: {
+          type: 'enabled',
+          budget_tokens: budgetTokens
+        }
+      }
+    }
+
+    // Default case: no special thinking settings
+    return {}
+  }
+
+  /**
+   * Generate completions for the assistant
+   * @param messages - The messages
+   * @param assistant - The assistant
+   * @param mcpTools - The MCP tools
+   * @param onChunk - The onChunk callback
+   * @param onFilterMessages - The onFilterMessages callback
+   * @returns The completions
+   */
+  async completions({ messages, assistant, mcpTools, onChunk, onFilterMessages }: CompletionsParams): Promise<void> {
+    if (assistant.enableGenerateImage) {
+      await this.generateImageByChat({ messages, assistant, onChunk } as CompletionsParams)
+      return
+    }
+    const defaultModel = getDefaultModel()
+    const model = assistant.model || defaultModel
+
+    if (assistant.model?.provider === 'aihubmix' && isOpenAILLMModel(model)) {
+      await super.completions({ messages, assistant, mcpTools, onChunk, onFilterMessages })
+      return
+    }
+    const { contextCount, maxTokens, streamOutput } = getAssistantSettings(assistant)
+    const isEnabledWebSearch = assistant.enableWebSearch || !!assistant.webSearchProviderId
+    messages = addImageFileToContents(messages)
+    let systemMessage = { role: 'system', content: assistant.prompt || '' }
+    if (isSupportedReasoningEffortOpenAIModel(model)) {
+      systemMessage = {
+        role: 'developer',
+        content: `Formatting re-enabled${systemMessage ? '\n' + systemMessage.content : ''}`
+      }
+    }
+    if (mcpTools && mcpTools.length > 0) {
+      systemMessage.content = buildSystemPrompt(systemMessage.content || '', mcpTools)
+    }
+
+    const userMessages: ChatCompletionMessageParam[] = []
+    const _messages = filterUserRoleStartMessages(
+      filterEmptyMessages(filterContextMessages(takeRight(messages, contextCount + 1)))
+    )
+
+    onFilterMessages(_messages)
+
+    for (const message of _messages) {
+      userMessages.push(await this.getMessageParam(message, model))
+    }
+
+    const isSupportStreamOutput = () => {
+      return streamOutput
+    }
+
+    let hasReasoningContent = false
+    let lastChunk = ''
+    const isReasoningJustDone = (
+      delta: OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta & {
+        reasoning_content?: string
+        reasoning?: string
+        thinking?: string
+      }
+    ) => {
+      if (!delta?.content) {
+        return false
+      }
+
+      // 检查当前chunk和上一个chunk的组合是否形成###Response标记
+      const combinedChunks = lastChunk + delta.content
+      lastChunk = delta.content
+
+      // 检测思考结束
+      if (combinedChunks.includes('###Response') || delta.content === '</think>') {
+        return true
+      }
+
+      // 如果有reasoning_content或reasoning，说明是在思考中
+      if (delta?.reasoning_content || delta?.reasoning || delta?.thinking) {
+        hasReasoningContent = true
+      }
+
+      // 如果之前有reasoning_content或reasoning，现在有普通content，说明思考结束
+      return !!(hasReasoningContent && delta.content)
+    }
+
+    let time_first_token_millsec = 0
+    let time_first_token_millsec_delta = 0
+    let time_first_content_millsec = 0
+    const start_time_millsec = new Date().getTime()
+    console.log(
+      `completions start_time_millsec ${new Date(start_time_millsec).toLocaleString(undefined, {
+        year: 'numeric',
+        month: 'numeric',
+        day: 'numeric',
+        hour: 'numeric',
+        minute: 'numeric',
+        second: 'numeric',
+        fractionalSecondDigits: 3
+      })}`
+    )
+    const lastUserMessage = _messages.findLast((m) => m.role === 'user')
+    const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
+    const { signal } = abortController
+    await this.checkIsCopilot()
+
+    //当 systemMessage 内容为空时不发送 systemMessage
+    let reqMessages: ChatCompletionMessageParam[]
+    if (!systemMessage.content) {
+      reqMessages = [...userMessages]
+    } else {
+      reqMessages = [systemMessage, ...userMessages].filter(Boolean) as ChatCompletionMessageParam[]
+    }
+
+    const toolResponses: MCPToolResponse[] = []
+
+    const processToolUses = async (content: string, idx: number) => {
+      const toolResults = await parseAndCallTools(
+        content,
+        toolResponses,
+        onChunk,
+        idx,
+        mcpToolCallResponseToOpenAICompatibleMessage,
+        mcpTools,
+        isVisionModel(model)
+      )
+
+      if (toolResults.length > 0) {
+        reqMessages.push({
+          role: 'assistant',
+          content: content
+        } as ChatCompletionMessageParam)
+        toolResults.forEach((ts) => reqMessages.push(ts as ChatCompletionMessageParam))
+
+        console.debug('[tool] reqMessages before processing', model.id, reqMessages)
+        reqMessages = processReqMessages(model, reqMessages)
+        console.debug('[tool] reqMessages', model.id, reqMessages)
+        const newStream = await this.sdk.chat.completions
+          // @ts-ignore key is not typed
+          .create(
+            {
+              model: model.id,
+              messages: reqMessages,
+              temperature: this.getTemperature(assistant, model),
+              top_p: this.getTopP(assistant, model),
+              max_tokens: maxTokens,
+              keep_alive: this.keepAliveTime,
+              stream: isSupportStreamOutput(),
+              // tools: tools,
+              service_tier: this.getServiceTier(model),
+              ...getOpenAIWebSearchParams(assistant, model),
+              ...this.getReasoningEffort(assistant, model),
+              ...this.getProviderSpecificParameters(assistant, model),
+              ...this.getCustomParameters(assistant)
+            },
+            {
+              signal,
+              timeout: this.getTimeout(model)
+            }
+          )
+        await processStream(newStream, idx + 1)
+      }
+    }
+
+    const processStream = async (stream: any, idx: number) => {
+      // Handle non-streaming case (already returns early, no change needed here)
+      if (!isSupportStreamOutput()) {
+        const time_completion_millsec = new Date().getTime() - start_time_millsec
+        // Calculate final metrics once
+        const finalMetrics = {
+          completion_tokens: stream.usage?.completion_tokens,
+          time_completion_millsec,
+          time_first_token_millsec: 0 // Non-streaming, first token time is not relevant
+        }
+
+        // Create a synthetic usage object if stream.usage is undefined
+        const finalUsage = stream.usage
+        // Separate onChunk calls for text and usage/metrics
+        if (stream.choices[0].message?.content) {
+          onChunk({ type: ChunkType.TEXT_COMPLETE, text: stream.choices[0].message.content })
+        }
+
+        // Always send usage and metrics data
+        onChunk({ type: ChunkType.BLOCK_COMPLETE, response: { usage: finalUsage, metrics: finalMetrics } })
+        return
+      }
+
+      let content = '' // Accumulate content for tool processing if needed
+      let thinkingContent = ''
+      // 记录最终的完成时间差
+      let final_time_completion_millsec_delta = 0
+      let final_time_thinking_millsec_delta = 0
+      // Variable to store the last received usage object
+      let lastUsage: Usage | undefined = undefined
+      // let isThinkingInContent: ThoughtProcessor | undefined = undefined
+      // const processThinkingChunk = this.handleThinkingTags()
+      let isFirstChunk = true
+      let isFirstThinkingChunk = true
+      for await (const chunk of stream) {
+        if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
+          break
+        }
+
+        const delta = chunk.choices[0]?.delta
+        const finishReason = chunk.choices[0]?.finish_reason
+
+        // --- Incremental onChunk calls ---
+
+        // 1. Reasoning Content
+        const reasoningContent = delta?.reasoning_content || delta?.reasoning
+        const currentTime = new Date().getTime() // Get current time for each chunk
+
+        if (time_first_token_millsec === 0 && isFirstThinkingChunk && reasoningContent) {
+          // 记录第一个token的时间
+          time_first_token_millsec = currentTime
+          // 记录第一个token的时间差
+          time_first_token_millsec_delta = currentTime - start_time_millsec
+          console.log(
+            `completions time_first_token_millsec ${new Date(currentTime).toLocaleString(undefined, {
+              year: 'numeric',
+              month: 'numeric',
+              day: 'numeric',
+              hour: 'numeric',
+              minute: 'numeric',
+              second: 'numeric',
+              fractionalSecondDigits: 3
+            })}`
+          )
+          isFirstThinkingChunk = false
+        }
+        if (reasoningContent) {
+          thinkingContent += reasoningContent
+          hasReasoningContent = true // Keep track if reasoning occurred
+
+          // Calculate thinking time as time elapsed since start until this chunk
+          const thinking_time = currentTime - time_first_token_millsec
+          onChunk({ type: ChunkType.THINKING_DELTA, text: reasoningContent, thinking_millsec: thinking_time })
+        }
+        if (isReasoningJustDone(delta) && time_first_content_millsec === 0) {
+          time_first_content_millsec = currentTime
+          final_time_thinking_millsec_delta = time_first_content_millsec - time_first_token_millsec
+          onChunk({
+            type: ChunkType.THINKING_COMPLETE,
+            text: thinkingContent,
+            thinking_millsec: final_time_thinking_millsec_delta
+          })
+
+          thinkingContent = ''
+          isFirstThinkingChunk = true
+          hasReasoningContent = false
+        }
+
+        // 2. Text Content
+        if (delta?.content) {
+          if (isEnabledWebSearch) {
+            if (delta?.annotations) {
+              delta.content = convertLinks(delta.content || '', isFirstChunk)
+            } else if (assistant.model?.provider === 'openrouter') {
+              delta.content = convertLinksToOpenRouter(delta.content || '', isFirstChunk)
+            } else if (isZhipuModel(assistant.model)) {
+              delta.content = convertLinksToZhipu(delta.content || '', isFirstChunk)
+            } else if (isHunyuanSearchModel(assistant.model)) {
+              delta.content = convertLinksToHunyuan(
+                delta.content || '',
+                chunk.search_info.search_results || [],
+                isFirstChunk
+              )
+            }
+          }
+          // 说明前面没有思考内容
+          if (isFirstChunk && time_first_token_millsec === 0 && time_first_token_millsec_delta === 0) {
+            isFirstChunk = false
+            time_first_token_millsec = currentTime
+            time_first_token_millsec_delta = time_first_token_millsec - start_time_millsec
+          }
+          content += delta.content // Still accumulate for processToolUses
+
+          // isThinkingInContent = this.findThinkingProcessor(content, model)
+          // if (isThinkingInContent) {
+          //   processThinkingChunk(content, isThinkingInContent, onChunk)
+          onChunk({ type: ChunkType.TEXT_DELTA, text: delta.content })
+          // } else {
+          // }
+        }
+        // console.log('delta?.finish_reason', delta?.finish_reason)
+        if (!isEmpty(finishReason) || delta?.annotations) {
+          onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
+          final_time_completion_millsec_delta = currentTime - start_time_millsec
+          console.log(
+            `completions final_time_completion_millsec ${new Date(currentTime).toLocaleString(undefined, {
+              year: 'numeric',
+              month: 'numeric',
+              day: 'numeric',
+              hour: 'numeric',
+              minute: 'numeric',
+              second: 'numeric',
+              fractionalSecondDigits: 3
+            })}`
+          )
+          // 6. Usage (If provided per chunk) - Capture the last known usage
+          if (chunk.usage) {
+            // console.log('chunk.usage', chunk.usage)
+            lastUsage = chunk.usage // Update with the latest usage info
+            // Send incremental usage update if needed by UI (optional, keep if useful)
+            // onChunk({ type: 'block_in_progress', response: { usage: chunk.usage } })
+          }
+
+          // 3. Web Search
+          if (delta?.annotations) {
+            onChunk({
+              type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
+              llm_web_search: {
+                results: delta.annotations,
+                source: WebSearchSource.OPENAI_COMPATIBLE
+              }
+            } as LLMWebSearchCompleteChunk)
+          }
+
+          if (assistant.model?.provider === 'perplexity') {
+            const citations = chunk.citations
+            if (citations) {
+              onChunk({
+                type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
+                llm_web_search: {
+                  results: citations,
+                  source: WebSearchSource.PERPLEXITY
+                }
+              } as LLMWebSearchCompleteChunk)
+            }
+          }
+          if (isEnabledWebSearch && isZhipuModel(model) && finishReason === 'stop' && chunk?.web_search) {
+            onChunk({
+              type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
+              llm_web_search: {
+                results: chunk.web_search,
+                source: WebSearchSource.ZHIPU
+              }
+            } as LLMWebSearchCompleteChunk)
+          }
+          if (isEnabledWebSearch && isHunyuanSearchModel(model) && chunk?.search_info?.search_results) {
+            onChunk({
+              type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
+              llm_web_search: {
+                results: chunk.search_info.search_results,
+                source: WebSearchSource.HUNYUAN
+              }
+            } as LLMWebSearchCompleteChunk)
+          }
+        }
+
+        // --- End of Incremental onChunk calls ---
+      } // End of for await loop
+
+      // Call processToolUses AFTER the loop finishes processing the main stream content
+      // Note: parseAndCallTools inside processToolUses should handle its own onChunk for tool responses
+      await processToolUses(content, idx)
+
+      // Send the final block_complete chunk with accumulated data
+      onChunk({
+        type: ChunkType.BLOCK_COMPLETE,
+        response: {
+          // Use the enhanced usage object
+          usage: lastUsage,
+          metrics: {
+            // Get completion tokens from the last usage object if available
+            completion_tokens: lastUsage?.completion_tokens,
+            time_completion_millsec: final_time_completion_millsec_delta,
+            time_first_token_millsec: time_first_token_millsec_delta,
+            time_thinking_millsec: final_time_thinking_millsec_delta
+          }
+        }
+      })
+
+      // FIXME: 临时方案，重置时间戳和思考内容
+      time_first_token_millsec = 0
+      time_first_content_millsec = 0
+
+      // OpenAI stream typically doesn't provide a final summary chunk easily.
+      // We are sending per-chunk usage if available.
+    }
+
+    console.debug('[completions] reqMessages before processing', model.id, reqMessages)
+    reqMessages = processReqMessages(model, reqMessages)
+    console.debug('[completions] reqMessages', model.id, reqMessages)
+    // 等待接口返回流
+    onChunk({ type: ChunkType.LLM_RESPONSE_CREATED })
+    const stream = await this.sdk.chat.completions
+      // @ts-ignore key is not typed
+      .create(
+        {
+          model: model.id,
+          messages: reqMessages,
+          temperature: this.getTemperature(assistant, model),
+          top_p: this.getTopP(assistant, model),
+          max_tokens: maxTokens,
+          keep_alive: this.keepAliveTime,
+          stream: isSupportStreamOutput(),
+          // tools: tools,
+          service_tier: this.getServiceTier(model),
+          ...getOpenAIWebSearchParams(assistant, model),
+          ...this.getReasoningEffort(assistant, model),
+          ...this.getProviderSpecificParameters(assistant, model),
+          ...this.getCustomParameters(assistant)
+        },
+        {
+          signal,
+          timeout: this.getTimeout(model)
+        }
+      )
+
+    await processStream(stream, 0).finally(cleanup)
+
+    // 捕获signal的错误
+    await signalPromise?.promise?.catch((error) => {
+      throw error
+    })
+  }
+
+  /**
+   * Translate a message
+   * @param content
+   * @param assistant - The assistant
+   * @param onResponse - The onResponse callback
+   * @returns The translated message
+   */
+  async translate(content: string, assistant: Assistant, onResponse?: (text: string, isComplete: boolean) => void) {
+    const defaultModel = getDefaultModel()
+    const model = assistant.model || defaultModel
+    if (assistant.model?.provider === 'aihubmix' && isOpenAILLMModel(model)) {
+      return await super.translate(content, assistant, onResponse)
+    }
+    const messagesForApi = content
+      ? [
+          { role: 'system', content: assistant.prompt },
+          { role: 'user', content }
+        ]
+      : [{ role: 'user', content: assistant.prompt }]
+
+    const isSupportedStreamOutput = () => {
+      if (!onResponse) {
+        return false
+      }
+      return true
+    }
+
+    const stream = isSupportedStreamOutput()
+
+    await this.checkIsCopilot()
+
+    // console.debug('[translate] reqMessages', model.id, message)
+    // @ts-ignore key is not typed
+    const response = await this.sdk.chat.completions.create({
+      model: model.id,
+      messages: messagesForApi as ChatCompletionMessageParam[],
+      stream,
+      keep_alive: this.keepAliveTime,
+      temperature: this.getTemperature(assistant, model),
+      top_p: this.getTopP(assistant, model),
+      ...this.getReasoningEffort(assistant, model)
+    })
+
+    if (!stream) {
+      return response.choices[0].message?.content || ''
+    }
+
+    let text = ''
+    let isThinking = false
+    const isReasoning = isReasoningModel(model)
+
+    for await (const chunk of response) {
+      const deltaContent = chunk.choices[0]?.delta?.content || ''
+
+      if (isReasoning) {
+        if (deltaContent.includes('<think>')) {
+          isThinking = true
+        }
+
+        if (!isThinking) {
+          text += deltaContent
+          onResponse?.(text, false)
+        }
+
+        if (deltaContent.includes('</think>')) {
+          isThinking = false
+        }
+      } else {
+        text += deltaContent
+        onResponse?.(text, false)
+      }
+    }
+
+    onResponse?.(text, true)
+
+    return text
+  }
+
+  /**
+   * Summarize a message
+   * @param messages - The messages
+   * @param assistant - The assistant
+   * @returns The summary
+   */
+  public async summaries(messages: Message[], assistant: Assistant): Promise<string> {
+    const model = getTopNamingModel() || assistant.model || getDefaultModel()
+
+    if (assistant.model?.provider === 'aihubmix' && isOpenAILLMModel(model)) {
+      return await super.summaries(messages, assistant)
+    }
+
+    const userMessages = takeRight(messages, 5)
+      .filter((message) => !message.isPreset)
+      .map((message) => ({
+        role: message.role,
+        content: getMainTextContent(message)
+      }))
+
+    const userMessageContent = userMessages.reduce((prev, curr) => {
+      const content = curr.role === 'user' ? `User: ${curr.content}` : `Assistant: ${curr.content}`
+      return prev + (prev ? '\n' : '') + content
+    }, '')
+
+    const systemMessage = {
+      role: 'system',
+      content: getStoreSetting('topicNamingPrompt') || i18n.t('prompts.title')
+    }
+
+    const userMessage = {
+      role: 'user',
+      content: userMessageContent
+    }
+
+    await this.checkIsCopilot()
+
+    console.debug('[summaries] reqMessages', model.id, [systemMessage, userMessage])
+    // @ts-ignore key is not typed
+    const response = await this.sdk.chat.completions.create({
+      model: model.id,
+      messages: [systemMessage, userMessage] as ChatCompletionMessageParam[],
+      stream: false,
+      keep_alive: this.keepAliveTime,
+      max_tokens: 1000
+    })
+
+    // 针对思考类模型的返回，总结仅截取</think>之后的内容
+    let content = response.choices[0].message?.content || ''
+    content = content.replace(/^<think>(.*?)<\/think>/s, '')
+
+    return removeSpecialCharactersForTopicName(content.substring(0, 50))
+  }
+
+  /**
+   * Summarize a message for search
+   * @param messages - The messages
+   * @param assistant - The assistant
+   * @returns The summary
+   */
+  public async summaryForSearch(messages: Message[], assistant: Assistant): Promise<string | null> {
+    const model = assistant.model || getDefaultModel()
+
+    if (assistant.model?.provider === 'aihubmix' && isOpenAILLMModel(model)) {
+      return await super.summaryForSearch(messages, assistant)
+    }
+
+    const systemMessage = {
+      role: 'system',
+      content: assistant.prompt
+    }
+
+    const messageContents = messages.map((m) => getMainTextContent(m))
+    const userMessageContent = messageContents.join('\n')
+
+    const userMessage = {
+      role: 'user',
+      content: userMessageContent
+    }
+    console.debug('[summaryForSearch] reqMessages', model.id, [systemMessage, userMessage])
+
+    const lastUserMessage = messages[messages.length - 1]
+    console.log('lastUserMessage?.id', lastUserMessage?.id)
+    const { abortController, cleanup } = this.createAbortController(lastUserMessage?.id)
+    const { signal } = abortController
+
+    const response = await this.sdk.chat.completions
+      // @ts-ignore key is not typed
+      .create(
+        {
+          model: model.id,
+          messages: [systemMessage, userMessage] as ChatCompletionMessageParam[],
+          stream: false,
+          keep_alive: this.keepAliveTime,
+          max_tokens: 1000
+        },
+        {
+          timeout: 20 * 1000,
+          signal: signal
+        }
+      )
+      .finally(cleanup)
+
+    // 针对思考类模型的返回，总结仅截取</think>之后的内容
+    let content = response.choices[0].message?.content || ''
+    content = content.replace(/^<think>(.*?)<\/think>/s, '')
+
+    return content
+  }
+
+  /**
+   * Generate text
+   * @param prompt - The prompt
+   * @param content - The content
+   * @returns The generated text
+   */
+  public async generateText({ prompt, content }: { prompt: string; content: string }): Promise<string> {
+    const model = getDefaultModel()
+
+    await this.checkIsCopilot()
+
+    const response = await this.sdk.chat.completions.create({
+      model: model.id,
+      stream: false,
+      messages: [
+        { role: 'system', content: prompt },
+        { role: 'user', content }
+      ]
+    })
+
+    return response.choices[0].message?.content || ''
+  }
+
+  /**
+   * Generate suggestions
+   * @param messages - The messages
+   * @param assistant - The assistant
+   * @returns The suggestions
+   */
+  async suggestions(messages: Message[], assistant: Assistant): Promise<Suggestion[]> {
+    const { model } = assistant
+
+    if (!model) {
+      return []
+    }
+
+    await this.checkIsCopilot()
+
+    const userMessagesForApi = messages
+      .filter((m) => m.role === 'user')
+      .map((m) => ({
+        role: m.role,
+        content: getMainTextContent(m)
+      }))
+
+    const response: any = await this.sdk.request({
+      method: 'post',
+      path: '/advice_questions',
+      body: {
+        messages: userMessagesForApi,
+        model: model.id,
+        max_tokens: 0,
+        temperature: 0,
+        n: 0
+      }
+    })
+
+    return response?.questions?.filter(Boolean)?.map((q: any) => ({ content: q })) || []
+  }
+
+  /**
+   * Check if the model is valid
+   * @param model - The model
+   * @param stream - Whether to use streaming interface
+   * @returns The validity of the model
+   */
+  public async check(model: Model, stream: boolean = false): Promise<{ valid: boolean; error: Error | null }> {
+    if (!model) {
+      return { valid: false, error: new Error('No model found') }
+    }
+    if (model.provider === 'aihubmix' && isOpenAILLMModel(model)) {
+      return await super.check(model, stream)
+    }
+    const body = {
+      model: model.id,
+      messages: [{ role: 'user', content: 'hi' }],
+      stream
+    }
+
+    try {
+      await this.checkIsCopilot()
+      console.debug('[checkModel] body', model.id, body)
+      if (!stream) {
+        const response = await this.sdk.chat.completions.create(body as ChatCompletionCreateParamsNonStreaming)
+        if (!response?.choices[0].message) {
+          throw new Error('Empty response')
+        }
+        return { valid: true, error: null }
+      } else {
+        const response: any = await this.sdk.chat.completions.create(body as any)
+        // 等待整个流式响应结束
+        let hasContent = false
+        for await (const chunk of response) {
+          if (chunk.choices?.[0]?.delta?.content) {
+            hasContent = true
+          }
+        }
+        if (hasContent) {
+          return { valid: true, error: null }
+        }
+        throw new Error('Empty streaming response')
+      }
+    } catch (error: any) {
+      return {
+        valid: false,
+        error
+      }
+    }
+  }
+
+  /**
+   * Get the models
+   * @returns The models
+   */
+  public async models(): Promise<OpenAI.Models.Model[]> {
+    try {
+      await this.checkIsCopilot()
+
+      const response = await this.sdk.models.list()
+
+      if (this.provider.id === 'github') {
+        // @ts-ignore key is not typed
+        return response.body
+          .map((model) => ({
+            id: model.name,
+            description: model.summary,
+            object: 'model',
+            owned_by: model.publisher
+          }))
+          .filter(isSupportedModel)
+      }
+
+      if (this.provider.id === 'together') {
+        // @ts-ignore key is not typed
+        return response?.body
+          .map((model: any) => ({
+            id: model.id,
+            description: model.display_name,
+            object: 'model',
+            owned_by: model.organization
+          }))
+          .filter(isSupportedModel)
+      }
+
+      const models = response.data || []
+      models.forEach((model) => {
+        model.id = model.id.trim()
+      })
+
+      return models.filter(isSupportedModel)
+    } catch (error) {
+      return []
+    }
+  }
+
+  /**
+   * Get the embedding dimensions
+   * @param model - The model
+   * @returns The embedding dimensions
+   */
+  public async getEmbeddingDimensions(model: Model): Promise<number> {
+    await this.checkIsCopilot()
+
+    const data = await this.sdk.embeddings.create({
+      model: model.id,
+      input: model?.provider === 'baidu-cloud' ? ['hi'] : 'hi'
+    })
+    return data.data[0].embedding.length
+  }
+
+  public async checkIsCopilot() {
+    if (this.provider.id !== 'copilot') {
+      return
+    }
+    const defaultHeaders = store.getState().copilot.defaultHeaders
+    // copilot每次请求前需要重新获取token，因为token中附带时间戳
+    const { token } = await window.api.copilot.getToken(defaultHeaders)
+    this.sdk.apiKey = token
+  }
+}
diff --git a/src/renderer/src/providers/AiProvider/OpenAIProvider.ts b/src/renderer/src/providers/AiProvider/OpenAIProvider.ts
index 980058ebb2..38b9708cad 100644
--- a/src/renderer/src/providers/AiProvider/OpenAIProvider.ts
+++ b/src/renderer/src/providers/AiProvider/OpenAIProvider.ts
@@ -1,19 +1,11 @@
-import { DEFAULT_MAX_TOKENS } from '@renderer/config/constant'
 import {
-  findTokenLimit,
   getOpenAIWebSearchParams,
-  isHunyuanSearchModel,
+  isOpenAILLMModel,
+  isOpenAIReasoningModel,
   isOpenAIWebSearch,
-  isReasoningModel,
   isSupportedModel,
-  isSupportedReasoningEffortGrokModel,
-  isSupportedReasoningEffortModel,
   isSupportedReasoningEffortOpenAIModel,
-  isSupportedThinkingTokenClaudeModel,
-  isSupportedThinkingTokenModel,
-  isSupportedThinkingTokenQwenModel,
-  isVisionModel,
-  isZhipuModel
+  isVisionModel
 } from '@renderer/config/models'
 import { getStoreSetting } from '@renderer/hooks/useSettings'
 import i18n from '@renderer/i18n'
@@ -25,11 +17,8 @@ import {
   filterEmptyMessages,
   filterUserRoleStartMessages
 } from '@renderer/services/MessagesService'
-import { processReqMessages } from '@renderer/services/ModelMessageService'
-import store from '@renderer/store'
 import {
   Assistant,
-  EFFORT_RATIO,
   FileTypes,
   GenerateImageParams,
   MCPToolResponse,
@@ -39,79 +28,45 @@ import {
   Usage,
   WebSearchSource
 } from '@renderer/types'
-import { ChunkType, LLMWebSearchCompleteChunk } from '@renderer/types/chunk'
+import { ChunkType } from '@renderer/types/chunk'
 import { Message } from '@renderer/types/newMessage'
 import { removeSpecialCharactersForTopicName } from '@renderer/utils'
 import { addImageFileToContents } from '@renderer/utils/formats'
-import {
-  convertLinks,
-  convertLinksToHunyuan,
-  convertLinksToOpenRouter,
-  convertLinksToZhipu
-} from '@renderer/utils/linkConverter'
+import { convertLinks } from '@renderer/utils/linkConverter'
 import { mcpToolCallResponseToOpenAIMessage, parseAndCallTools } from '@renderer/utils/mcp-tools'
 import { findFileBlocks, findImageBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find'
 import { buildSystemPrompt } from '@renderer/utils/prompt'
 import { isEmpty, takeRight } from 'lodash'
-import OpenAI, { AzureOpenAI, toFile } from 'openai'
-import {
-  ChatCompletionContentPart,
-  ChatCompletionCreateParamsNonStreaming,
-  ChatCompletionMessageParam
-} from 'openai/resources'
-import { FileLike } from 'openai/uploads'
+import OpenAI from 'openai'
+import { ChatCompletionContentPart, ChatCompletionMessageParam } from 'openai/resources/chat/completions'
+import { Stream } from 'openai/streaming'
+import { FileLike, toFile } from 'openai/uploads'
 
 import { CompletionsParams } from '.'
 import BaseProvider from './BaseProvider'
 
 export default class OpenAIProvider extends BaseProvider {
-  private sdk: OpenAI
+  protected sdk: OpenAI
 
   constructor(provider: Provider) {
     super(provider)
 
-    if (provider.id === 'azure-openai' || provider.type === 'azure-openai') {
-      this.sdk = new AzureOpenAI({
-        dangerouslyAllowBrowser: true,
-        apiKey: this.apiKey,
-        apiVersion: provider.apiVersion,
-        endpoint: provider.apiHost
-      })
-      return
-    }
-
     this.sdk = new OpenAI({
       dangerouslyAllowBrowser: true,
       apiKey: this.apiKey,
       baseURL: this.getBaseURL(),
       defaultHeaders: {
-        ...this.defaultHeaders(),
-        ...(this.provider.id === 'copilot' ? { 'editor-version': 'vscode/1.97.2' } : {}),
-        ...(this.provider.id === 'copilot' ? { 'copilot-vision-request': 'true' } : {})
+        ...this.defaultHeaders()
       }
     })
   }
 
-  /**
-   * Check if the provider does not support files
-   * @returns True if the provider does not support files, false otherwise
-   */
-  private get isNotSupportFiles() {
-    if (this.provider?.isNotSupportArrayContent) {
-      return true
-    }
-
-    const providers = ['deepseek', 'baichuan', 'minimax', 'xirang']
-
-    return providers.includes(this.provider.id)
-  }
-
   /**
    * Extract the file content from the message
    * @param message - The message
    * @returns The file content
    */
-  private async extractFileContent(message: Message) {
+  protected async extractFileContent(message: Message) {
     const fileBlocks = findFileBlocks(message)
     if (fileBlocks.length > 0) {
       const textFileBlocks = fileBlocks.filter(
@@ -136,13 +91,131 @@ export default class OpenAIProvider extends BaseProvider {
     return ''
   }
 
+  private async getReponseMessageParam(message: Message, model: Model): Promise<OpenAI.Responses.EasyInputMessage> {
+    const isVision = isVisionModel(model)
+    const content = await this.getMessageContent(message)
+    const fileBlocks = findFileBlocks(message)
+    const imageBlocks = findImageBlocks(message)
+
+    if (fileBlocks.length === 0 && imageBlocks.length === 0) {
+      return {
+        role: message.role === 'system' ? 'user' : message.role,
+        content: content ? [{ type: 'input_text', text: content }] : []
+      }
+    }
+
+    const parts: OpenAI.Responses.ResponseInputContent[] = []
+    if (content) {
+      parts.push({
+        type: 'input_text',
+        text: content
+      })
+    }
+
+    for (const imageBlock of imageBlocks) {
+      if (isVision) {
+        if (imageBlock.file) {
+          const image = await window.api.file.base64Image(imageBlock.file.id + imageBlock.file.ext)
+          parts.push({
+            detail: 'auto',
+            type: 'input_image',
+            image_url: image.data as string
+          })
+        } else if (imageBlock.url && imageBlock.url.startsWith('data:')) {
+          parts.push({
+            detail: 'auto',
+            type: 'input_image',
+            image_url: imageBlock.url
+          })
+        }
+      }
+    }
+
+    for (const fileBlock of fileBlocks) {
+      const file = fileBlock.file
+      if (!file) continue
+
+      if ([FileTypes.TEXT, FileTypes.DOCUMENT].includes(file.type)) {
+        const fileContent = (await window.api.file.read(file.id + file.ext)).trim()
+        parts.push({
+          type: 'input_text',
+          text: file.origin_name + '\n' + fileContent
+        })
+      }
+    }
+
+    return {
+      role: message.role === 'system' ? 'user' : message.role,
+      content: parts
+    }
+  }
+
+  protected getServiceTier(model: Model) {
+    if ((model.id.includes('o3') && !model.id.includes('o3-mini')) || model.id.includes('o4-mini')) {
+      return 'flex'
+    }
+    if (isOpenAILLMModel(model)) {
+      return 'auto'
+    }
+    return undefined
+  }
+
+  protected getTimeout(model: Model) {
+    if ((model.id.includes('o3') && !model.id.includes('o3-mini')) || model.id.includes('o4-mini')) {
+      return 15 * 1000 * 60
+    }
+    return undefined
+  }
+
+  /**
+   * Get the temperature for the assistant
+   * @param assistant - The assistant
+   * @param model - The model
+   * @returns The temperature
+   */
+  protected getTemperature(assistant: Assistant, model: Model) {
+    return isOpenAIReasoningModel(model) || isOpenAILLMModel(model) ? undefined : assistant?.settings?.temperature
+  }
+
+  /**
+   * Get the top P for the assistant
+   * @param assistant - The assistant
+   * @param model - The model
+   * @returns The top P
+   */
+  protected getTopP(assistant: Assistant, model: Model) {
+    return isOpenAIReasoningModel(model) || isOpenAILLMModel(model) ? undefined : assistant?.settings?.topP
+  }
+
+  private getResponseReasoningEffort(assistant: Assistant, model: Model) {
+    if (!isSupportedReasoningEffortOpenAIModel(model)) {
+      return {}
+    }
+
+    const reasoningEffort = assistant?.settings?.reasoning_effort
+    if (!reasoningEffort) {
+      return {}
+    }
+
+    if (isSupportedReasoningEffortOpenAIModel(model)) {
+      return {
+        reasoning: {
+          effort: reasoningEffort as OpenAI.ReasoningEffort,
+          summary: 'detailed'
+        } as OpenAI.Reasoning
+      }
+    }
+
+    return {}
+  }
+
   /**
    * Get the message parameter
    * @param message - The message
    * @param model - The model
    * @returns The message parameter
    */
-  private async getMessageParam(
+  protected async getMessageParam(
     message: Message,
     model: Model
   ): Promise<OpenAI.Chat.Completions.ChatCompletionMessageParam> {
@@ -158,17 +231,6 @@ export default class OpenAIProvider extends BaseProvider {
       }
     }
 
-    // If the model does not support files, extract the file content
-    if (this.isNotSupportFiles) {
-      const fileContent = await this.extractFileContent(message)
-
-      return {
-        role: message.role === 'system' ? 'user' : message.role,
-        content: content + '\n\n---\n\n' + fileContent
-      }
-    }
-
-    // If the model supports files, add the file content to the message
     const parts: ChatCompletionContentPart[] = []
 
     if (content) {
@@ -187,8 +249,10 @@ export default class OpenAIProvider extends BaseProvider {
     }
 
     for (const fileBlock of fileBlocks) {
-      const file = fileBlock.file
-      if (!file) continue
+      const { file } = fileBlock
+      if (!file) {
+        continue
+      }
 
       if ([FileTypes.TEXT, FileTypes.DOCUMENT].includes(file.type)) {
         const fileContent = await (await window.api.file.read(file.id + file.ext)).trim()
@@ -206,151 +270,10 @@ export default class OpenAIProvider extends BaseProvider {
   }
 
   /**
-   * Get the temperature for the assistant
-   * @param assistant - The assistant
-   * @param model - The model
-   * @returns The temperature
-   */
-  private getTemperature(assistant: Assistant, model: Model) {
-    return isReasoningModel(model) || isOpenAIWebSearch(model) ? undefined : assistant?.settings?.temperature
-  }
-
-  /**
-   * Get the provider specific parameters for the assistant
-   * @param assistant - The assistant
-   * @param model - The model
-   * @returns The provider specific parameters
-   */
-  private getProviderSpecificParameters(assistant: Assistant, model: Model) {
-    const { maxTokens } = getAssistantSettings(assistant)
-
-    if (this.provider.id === 'openrouter') {
-      if (model.id.includes('deepseek-r1')) {
-        return {
-          include_reasoning: true
-        }
-      }
-    }
-
-    if (this.isOpenAIReasoning(model)) {
-      return {
-        max_tokens: undefined,
-        max_completion_tokens: maxTokens
-      }
-    }
-
-    return {}
-  }
-
-  /**
-   * Get the top P for the assistant
-   * @param assistant - The assistant
-   * @param model - The model
-   * @returns The top P
-   */
-  private getTopP(assistant: Assistant, model: Model) {
-    if (isReasoningModel(model) || isOpenAIWebSearch(model)) return undefined
-
-    return assistant?.settings?.topP
-  }
-
-  /**
-   * Get the reasoning effort for the assistant
-   * @param assistant - The assistant
-   * @param model - The model
-   * @returns The reasoning effort
-   */
-  private getReasoningEffort(assistant: Assistant, model: Model) {
-    if (this.provider.id === 'groq') {
-      return {}
-    }
-
-    if (!isReasoningModel(model)) {
-      return {}
-    }
-    const reasoningEffort = assistant?.settings?.reasoning_effort
-    if (!reasoningEffort) {
-      if (isSupportedThinkingTokenQwenModel(model)) {
-        return { enable_thinking: false }
-      }
-
-      if (isSupportedThinkingTokenClaudeModel(model)) {
-        return { thinking: { type: 'disabled' } }
-      }
-
-      return {}
-    }
-    const effortRatio = EFFORT_RATIO[reasoningEffort]
-    const budgetTokens = Math.floor((findTokenLimit(model.id)?.max || 0) * effortRatio)
-    // OpenRouter models
-    if (model.provider === 'openrouter') {
-      if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenClaudeModel(model)) {
-        return {
-          reasoning: {
-            effort: assistant?.settings?.reasoning_effort
-          }
-        }
-      }
-      if (isSupportedThinkingTokenModel(model)) {
-        return {
-          reasoning: {
-            max_tokens: budgetTokens
-          }
-        }
-      }
-    }
-
-    // Qwen models
-    if (isSupportedThinkingTokenQwenModel(model)) {
-      return {
-        enable_thinking: true,
-        thinking_budget: budgetTokens
-      }
-    }
-
-    // Grok models
-    if (isSupportedReasoningEffortGrokModel(model)) {
-      return {
-        reasoning_effort: assistant?.settings?.reasoning_effort
-      }
-    }
-
-    // OpenAI models
-    if (isSupportedReasoningEffortOpenAIModel(model)) {
-      return {
-        reasoning_effort: assistant?.settings?.reasoning_effort
-      }
-    }
-
-    // Claude models
-    const { maxTokens } = getAssistantSettings(assistant)
-    if (isSupportedThinkingTokenClaudeModel(model)) {
-      return {
-        thinking: {
-          type: 'enabled',
-          budget_tokens: Math.floor(Math.max(Math.min(budgetTokens, maxTokens || DEFAULT_MAX_TOKENS), 1024))
-        }
-      }
-    }
-
-    // Default case: no special thinking settings
-    return {}
-  }
-
-  /**
-   * Check if the model is an OpenAI reasoning model
-   * @param model - The model
-   * @returns True if the model is an OpenAI reasoning model, false otherwise
-   */
-  private isOpenAIReasoning(model: Model) {
-    return model.id.startsWith('o1') || model.id.startsWith('o3') || model.id.startsWith('o4')
-  }
-
-  /**
-   * Generate completions for the assistant
+   * Generate completions for the assistant use Response API
    * @param messages - The messages
    * @param assistant - The assistant
-   * @param mcpTools - The MCP tools
+   * @param mcpTools
    * @param onChunk - The onChunk callback
    * @param onFilterMessages - The onFilterMessages callback
    * @returns The completions
@@ -363,93 +286,156 @@ export default class OpenAIProvider extends BaseProvider {
     const defaultModel = getDefaultModel()
     const model = assistant.model || defaultModel
     const { contextCount, maxTokens, streamOutput } = getAssistantSettings(assistant)
+
     const isEnabledWebSearch = assistant.enableWebSearch || !!assistant.webSearchProviderId
-    messages = addImageFileToContents(messages)
-    let systemMessage = { role: 'system', content: assistant.prompt || '' }
-    if (isSupportedReasoningEffortOpenAIModel(model)) {
-      systemMessage = {
-        role: 'developer',
-        content: `Formatting re-enabled${systemMessage ? '\n' + systemMessage.content : ''}`
+    onChunk({ type: ChunkType.LLM_RESPONSE_CREATED })
+    // 退回到 OpenAI 兼容模式
+    if (isOpenAIWebSearch(model)) {
+      const systemMessage = { role: 'system', content: assistant.prompt || '' }
+      const userMessages: ChatCompletionMessageParam[] = []
+      const _messages = filterUserRoleStartMessages(
+        filterEmptyMessages(filterContextMessages(takeRight(messages, contextCount + 1)))
+      )
+      onFilterMessages(_messages)
+
+      for (const message of _messages) {
+        userMessages.push(await this.getMessageParam(message, model))
       }
+      //当 systemMessage 内容为空时不发送 systemMessage
+      let reqMessages: ChatCompletionMessageParam[]
+      if (!systemMessage.content) {
+        reqMessages = [...userMessages]
+      } else {
+        reqMessages = [systemMessage, ...userMessages].filter(Boolean) as ChatCompletionMessageParam[]
+      }
+      const lastUserMessage = _messages.findLast((m) => m.role === 'user')
+      const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
+      const { signal } = abortController
+      let time_first_token_millsec_delta = 0
+      const start_time_millsec = new Date().getTime()
+      const response = await this.sdk.chat.completions
+        // @ts-ignore key is not typed
+        .create(
+          {
+            model: model.id,
+            messages: reqMessages,
+            stream: true,
+            temperature: this.getTemperature(assistant, model),
+            top_p: this.getTopP(assistant, model),
+            max_tokens: maxTokens,
+            ...getOpenAIWebSearchParams(assistant, model),
+            ...this.getCustomParameters(assistant)
+          },
+          {
+            signal
+          }
+        )
+      const processStream = async (stream: any) => {
+        let content = ''
+        let isFirstChunk = true
+        let final_time_completion_millsec_delta = 0
+        let lastUsage: Usage | undefined = undefined
+        for await (const chunk of stream as any) {
+          if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
+            break
+          }
+          const delta = chunk.choices[0]?.delta
+          const finishReason = chunk.choices[0]?.finish_reason
+          if (delta?.content) {
+            if (delta?.annotations) {
+              delta.content = convertLinks(delta.content || '', isFirstChunk)
+            }
+            if (isFirstChunk) {
+              isFirstChunk = false
+              time_first_token_millsec_delta = new Date().getTime() - start_time_millsec
+            }
+            content += delta.content
+            onChunk({ type: ChunkType.TEXT_DELTA, text: delta.content })
+          }
+          if (!isEmpty(finishReason) || chunk?.annotations) {
+            onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
+            final_time_completion_millsec_delta = new Date().getTime() - start_time_millsec
+            if (chunk.usage) {
+              lastUsage = chunk.usage
+            }
+          }
+          if (delta?.annotations) {
+            onChunk({
+              type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
+              llm_web_search: {
+                results: delta.annotations,
+                source: WebSearchSource.OPENAI_COMPATIBLE
+              }
+            })
+          }
+        }
+        onChunk({
+          type: ChunkType.BLOCK_COMPLETE,
+          response: {
+            usage: lastUsage,
+            metrics: {
+              completion_tokens: lastUsage?.completion_tokens,
+              time_completion_millsec: final_time_completion_millsec_delta,
+              time_first_token_millsec: time_first_token_millsec_delta
+            }
+          }
+        })
+      }
+      await processStream(response).finally(cleanup)
+      await signalPromise?.promise?.catch((error) => {
+        throw error
+      })
+      return
     }
-    if (mcpTools && mcpTools.length > 0) {
-      systemMessage.content = buildSystemPrompt(systemMessage.content || '', mcpTools)
+    const tools: OpenAI.Responses.Tool[] = []
+    if (isEnabledWebSearch) {
+      tools.push({
+        type: 'web_search_preview'
+      })
+    }
+    messages = addImageFileToContents(messages)
+    const systemMessage: OpenAI.Responses.EasyInputMessage = {
+      role: 'system',
+      content: []
+    }
+    const systemMessageContent: OpenAI.Responses.ResponseInputMessageContentList = []
+    const systemMessageInput: OpenAI.Responses.ResponseInputText = {
+      text: assistant.prompt || '',
+      type: 'input_text'
+    }
+    if (isSupportedReasoningEffortOpenAIModel(model)) {
+      systemMessageInput.text = `Formatting re-enabled${systemMessageInput.text ? '\n' + systemMessageInput.text : ''}`
+      systemMessage.role = 'developer'
     }
 
-    const userMessages: ChatCompletionMessageParam[] = []
+    if (mcpTools && mcpTools.length > 0) {
+      systemMessageInput.text = buildSystemPrompt(systemMessageInput.text || '', mcpTools)
+    }
+    systemMessageContent.push(systemMessageInput)
+    systemMessage.content = systemMessageContent
     const _messages = filterUserRoleStartMessages(
       filterEmptyMessages(filterContextMessages(takeRight(messages, contextCount + 1)))
     )
 
     onFilterMessages(_messages)
-
+    const userMessage: OpenAI.Responses.EasyInputMessage[] = []
     for (const message of _messages) {
-      userMessages.push(await this.getMessageParam(message, model))
-    }
-
-    const isSupportStreamOutput = () => {
-      return streamOutput
-    }
-
-    let hasReasoningContent = false
-    let lastChunk = ''
-    const isReasoningJustDone = (
-      delta: OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta & {
-        reasoning_content?: string
-        reasoning?: string
-        thinking?: string
-      }
-    ) => {
-      if (!delta?.content) return false
-
-      // 检查当前chunk和上一个chunk的组合是否形成###Response标记
-      const combinedChunks = lastChunk + delta.content
-      lastChunk = delta.content
-
-      // 检测思考结束
-      if (combinedChunks.includes('###Response') || delta.content === '</think>') {
-        return true
-      }
-
-      // 如果有reasoning_content或reasoning，说明是在思考中
-      if (delta?.reasoning_content || delta?.reasoning || delta?.thinking) {
-        hasReasoningContent = true
-      }
-
-      // 如果之前有reasoning_content或reasoning，现在有普通content，说明思考结束
-      if (hasReasoningContent && delta.content) {
-        return true
-      }
-
-      return false
+      userMessage.push(await this.getReponseMessageParam(message, model))
     }
 
     let time_first_token_millsec = 0
-    let time_first_token_millsec_delta = 0
-    let time_first_content_millsec = 0
     const start_time_millsec = new Date().getTime()
-    console.log(
-      `completions start_time_millsec ${new Date(start_time_millsec).toLocaleString(undefined, {
-        year: 'numeric',
-        month: 'numeric',
-        day: 'numeric',
-        hour: 'numeric',
-        minute: 'numeric',
-        second: 'numeric',
-        fractionalSecondDigits: 3
-      })}`
-    )
+
     const lastUserMessage = _messages.findLast((m) => m.role === 'user')
     const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true)
     const { signal } = abortController
-    await this.checkIsCopilot()
 
-    //当 systemMessage 内容为空时不发送 systemMessage
-    let reqMessages: ChatCompletionMessageParam[]
+    // 当 systemMessage 内容为空时不发送 systemMessage
+    let reqMessages: OpenAI.Responses.EasyInputMessage[]
     if (!systemMessage.content) {
-      reqMessages = [...userMessages]
+      reqMessages = [...userMessage]
     } else {
-      reqMessages = [systemMessage, ...userMessages].filter(Boolean) as ChatCompletionMessageParam[]
+      reqMessages = [systemMessage, ...userMessage].filter(Boolean) as OpenAI.Responses.EasyInputMessage[]
     }
 
     const toolResponses: MCPToolResponse[] = []
@@ -469,280 +455,193 @@ export default class OpenAIProvider extends BaseProvider {
         reqMessages.push({
           role: 'assistant',
           content: content
-        } as ChatCompletionMessageParam)
-        toolResults.forEach((ts) => reqMessages.push(ts as ChatCompletionMessageParam))
-
-        console.debug('[tool] reqMessages before processing', model.id, reqMessages)
-        reqMessages = processReqMessages(model, reqMessages)
-        console.debug('[tool] reqMessages', model.id, reqMessages)
-        const newStream = await this.sdk.chat.completions
-          // @ts-ignore key is not typed
-          .create(
-            {
-              model: model.id,
-              messages: reqMessages,
-              temperature: this.getTemperature(assistant, model),
-              top_p: this.getTopP(assistant, model),
-              max_tokens: maxTokens,
-              keep_alive: this.keepAliveTime,
-              stream: isSupportStreamOutput(),
-              // tools: tools,
-              ...getOpenAIWebSearchParams(assistant, model),
-              ...this.getReasoningEffort(assistant, model),
-              ...this.getProviderSpecificParameters(assistant, model),
-              ...this.getCustomParameters(assistant)
-            },
-            {
-              signal
-            }
-          )
+        })
+        toolResults.forEach((ts) => reqMessages.push(ts as OpenAI.Responses.EasyInputMessage))
+        const newStream = await this.sdk.responses.create(
+          {
+            model: model.id,
+            input: reqMessages,
+            temperature: this.getTemperature(assistant, model),
+            top_p: this.getTopP(assistant, model),
+            max_output_tokens: maxTokens,
+            stream: true,
+            service_tier: this.getServiceTier(model),
+            ...this.getResponseReasoningEffort(assistant, model),
+            ...this.getCustomParameters(assistant)
+          },
+          {
+            signal,
+            timeout: this.getTimeout(model)
+          }
+        )
         await processStream(newStream, idx + 1)
       }
     }
 
-    const processStream = async (stream: any, idx: number) => {
-      // Handle non-streaming case (already returns early, no change needed here)
-      if (!isSupportStreamOutput()) {
+    const processStream = async (
+      stream: Stream<OpenAI.Responses.ResponseStreamEvent> | OpenAI.Responses.Response,
+      idx: number
+    ) => {
+      if (!streamOutput) {
+        const nonStream = stream as OpenAI.Responses.Response
         const time_completion_millsec = new Date().getTime() - start_time_millsec
-        // Calculate final metrics once
+        const completion_tokens =
+          (nonStream.usage?.output_tokens || 0) + (nonStream.usage?.output_tokens_details.reasoning_tokens ?? 0)
+        const total_tokens =
+          (nonStream.usage?.total_tokens || 0) + (nonStream.usage?.output_tokens_details.reasoning_tokens ?? 0)
         const finalMetrics = {
-          completion_tokens: stream.usage?.completion_tokens,
+          completion_tokens,
           time_completion_millsec,
-          time_first_token_millsec: 0 // Non-streaming, first token time is not relevant
+          time_first_token_millsec: 0
         }
-
-        // Create a synthetic usage object if stream.usage is undefined
-        const finalUsage = stream.usage
-        // Separate onChunk calls for text and usage/metrics
-        if (stream.choices[0].message?.content) {
-          onChunk({ type: ChunkType.TEXT_COMPLETE, text: stream.choices[0].message.content })
+        const finalUsage = {
+          completion_tokens,
+          prompt_tokens: nonStream.usage?.input_tokens || 0,
+          total_tokens
         }
-
-        // Always send usage and metrics data
-        onChunk({ type: ChunkType.BLOCK_COMPLETE, response: { usage: finalUsage, metrics: finalMetrics } })
+        for (const output of nonStream.output) {
+          switch (output.type) {
+            case 'message':
+              if (output.content[0].type === 'output_text') {
+                onChunk({ type: ChunkType.TEXT_COMPLETE, text: output.content[0].text })
+                if (output.content[0].annotations && output.content[0].annotations.length > 0) {
+                  onChunk({
+                    type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
+                    llm_web_search: {
+                      source: WebSearchSource.OPENAI,
+                      results: output.content[0].annotations
+                    }
+                  })
+                }
+              }
+              break
+            case 'reasoning':
+              onChunk({
+                type: ChunkType.THINKING_COMPLETE,
+                text: output.summary.map((s) => s.text).join('\n'),
+                thinking_millsec: new Date().getTime() - start_time_millsec
+              })
+              break
+          }
+        }
+        onChunk({
+          type: ChunkType.BLOCK_COMPLETE,
+          response: {
+            usage: finalUsage,
+            metrics: finalMetrics
+          }
+        })
         return
       }
-
-      let content = '' // Accumulate content for tool processing if needed
-      let thinkingContent = ''
-      // 记录最终的完成时间差
-      let final_time_completion_millsec_delta = 0
-      let final_time_thinking_millsec_delta = 0
-      // Variable to store the last received usage object
+      let content = ''
       let lastUsage: Usage | undefined = undefined
-      // let isThinkingInContent: ThoughtProcessor | undefined = undefined
-      // const processThinkingChunk = this.handleThinkingTags()
-      let isFirstChunk = true
-      let isFirstThinkingChunk = true
-      for await (const chunk of stream) {
+      let final_time_completion_millsec_delta = 0
+      for await (const chunk of stream as Stream<OpenAI.Responses.ResponseStreamEvent>) {
         if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
           break
         }
-
-        const delta = chunk.choices[0]?.delta
-        const finishReason = chunk.choices[0]?.finish_reason
-
-        // --- Incremental onChunk calls ---
-
-        // 1. Reasoning Content
-        const reasoningContent = delta?.reasoning_content || delta?.reasoning
-        const currentTime = new Date().getTime() // Get current time for each chunk
-
-        if (time_first_token_millsec === 0 && isFirstThinkingChunk && reasoningContent) {
-          // 记录第一个token的时间
-          time_first_token_millsec = currentTime
-          // 记录第一个token的时间差
-          time_first_token_millsec_delta = currentTime - start_time_millsec
-          console.log(
-            `completions time_first_token_millsec ${new Date(currentTime).toLocaleString(undefined, {
-              year: 'numeric',
-              month: 'numeric',
-              day: 'numeric',
-              hour: 'numeric',
-              minute: 'numeric',
-              second: 'numeric',
-              fractionalSecondDigits: 3
-            })}`
-          )
-          isFirstThinkingChunk = false
-        }
-        if (reasoningContent) {
-          thinkingContent += reasoningContent
-          hasReasoningContent = true // Keep track if reasoning occurred
-
-          // Calculate thinking time as time elapsed since start until this chunk
-          const thinking_time = currentTime - time_first_token_millsec
-          onChunk({ type: ChunkType.THINKING_DELTA, text: reasoningContent, thinking_millsec: thinking_time })
-        }
-
-        if (isReasoningJustDone(delta)) {
-          if (time_first_content_millsec === 0) {
-            time_first_content_millsec = currentTime
-            final_time_thinking_millsec_delta = time_first_content_millsec - time_first_token_millsec
+        switch (chunk.type) {
+          case 'response.created':
+            time_first_token_millsec = new Date().getTime()
+            break
+          case 'response.reasoning_summary_text.delta':
+            onChunk({
+              type: ChunkType.THINKING_DELTA,
+              text: chunk.delta,
+              thinking_millsec: new Date().getTime() - time_first_token_millsec
+            })
+            break
+          case 'response.reasoning_summary_text.done':
             onChunk({
               type: ChunkType.THINKING_COMPLETE,
-              text: thinkingContent,
-              thinking_millsec: final_time_thinking_millsec_delta
+              text: chunk.text,
+              thinking_millsec: new Date().getTime() - time_first_token_millsec
             })
-
-            thinkingContent = ''
-            isFirstThinkingChunk = true
-            hasReasoningContent = false
-          }
-        }
-
-        // 2. Text Content
-        if (delta?.content) {
-          if (assistant.enableWebSearch) {
-            if (delta?.annotations) {
-              delta.content = convertLinks(delta.content || '', isFirstChunk)
-            } else if (assistant.model?.provider === 'openrouter') {
-              delta.content = convertLinksToOpenRouter(delta.content || '', isFirstChunk)
-            } else if (isZhipuModel(assistant.model)) {
-              delta.content = convertLinksToZhipu(delta.content || '', isFirstChunk)
-            } else if (isHunyuanSearchModel(assistant.model)) {
-              delta.content = convertLinksToHunyuan(
-                delta.content || '',
-                chunk.search_info.search_results || [],
-                isFirstChunk
-              )
-            }
-          }
-          // 说明前面没有思考内容
-          if (isFirstChunk && time_first_token_millsec === 0 && time_first_token_millsec_delta === 0) {
-            isFirstChunk = false
-            time_first_token_millsec = currentTime
-            time_first_token_millsec_delta = time_first_token_millsec - start_time_millsec
-          }
-          content += delta.content // Still accumulate for processToolUses
-
-          // isThinkingInContent = this.findThinkingProcessor(content, model)
-          // if (isThinkingInContent) {
-          //   processThinkingChunk(content, isThinkingInContent, onChunk)
-          onChunk({ type: ChunkType.TEXT_DELTA, text: delta.content })
-          // } else {
-          // }
-        }
-        // console.log('delta?.finish_reason', delta?.finish_reason)
-        if (!isEmpty(finishReason)) {
-          onChunk({ type: ChunkType.TEXT_COMPLETE, text: content })
-          final_time_completion_millsec_delta = currentTime - start_time_millsec
-          console.log(
-            `completions final_time_completion_millsec ${new Date(currentTime).toLocaleString(undefined, {
-              year: 'numeric',
-              month: 'numeric',
-              day: 'numeric',
-              hour: 'numeric',
-              minute: 'numeric',
-              second: 'numeric',
-              fractionalSecondDigits: 3
-            })}`
-          )
-          // 6. Usage (If provided per chunk) - Capture the last known usage
-          if (chunk.usage) {
-            // console.log('chunk.usage', chunk.usage)
-            lastUsage = chunk.usage // Update with the latest usage info
-            // Send incremental usage update if needed by UI (optional, keep if useful)
-            // onChunk({ type: 'block_in_progress', response: { usage: chunk.usage } })
-          }
-
-          // 3. Web Search
-          if (delta?.annotations) {
+            break
+          case 'response.output_text.delta':
             onChunk({
-              type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
-              llm_web_search: {
-                results: delta.annotations,
-                source: WebSearchSource.OPENAI
-              }
-            } as LLMWebSearchCompleteChunk)
-          }
-
-          if (assistant.model?.provider === 'perplexity') {
-            const citations = chunk.citations
-            if (citations) {
+              type: ChunkType.TEXT_DELTA,
+              text: chunk.delta
+            })
+            content += chunk.delta
+            break
+          case 'response.output_text.done':
+            onChunk({
+              type: ChunkType.TEXT_COMPLETE,
+              text: chunk.text
+            })
+            break
+          case 'response.content_part.done':
+            if (chunk.part.type === 'output_text' && chunk.part.annotations && chunk.part.annotations.length > 0) {
               onChunk({
                 type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
                 llm_web_search: {
-                  results: citations,
-                  source: WebSearchSource.PERPLEXITY
+                  source: WebSearchSource.OPENAI,
+                  results: chunk.part.annotations
                 }
-              } as LLMWebSearchCompleteChunk)
+              })
             }
+            break
+          case 'response.completed': {
+            final_time_completion_millsec_delta = new Date().getTime() - start_time_millsec
+            const completion_tokens =
+              (chunk.response.usage?.output_tokens || 0) +
+              (chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0)
+            const total_tokens =
+              (chunk.response.usage?.total_tokens || 0) +
+              (chunk.response.usage?.output_tokens_details.reasoning_tokens ?? 0)
+            lastUsage = {
+              completion_tokens,
+              prompt_tokens: chunk.response.usage?.input_tokens || 0,
+              total_tokens
+            }
+            break
           }
-          if (isEnabledWebSearch && isZhipuModel(model) && finishReason === 'stop' && chunk?.web_search) {
+          case 'error':
             onChunk({
-              type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
-              llm_web_search: {
-                results: chunk.web_search,
-                source: WebSearchSource.ZHIPU
+              type: ChunkType.ERROR,
+              error: {
+                message: chunk.message,
+                code: chunk.code
               }
-            } as LLMWebSearchCompleteChunk)
-          }
-          if (isEnabledWebSearch && isHunyuanSearchModel(model) && chunk?.search_info?.search_results) {
-            onChunk({
-              type: ChunkType.LLM_WEB_SEARCH_COMPLETE,
-              llm_web_search: {
-                results: chunk.search_info.search_results,
-                source: WebSearchSource.HUNYUAN
-              }
-            } as LLMWebSearchCompleteChunk)
-          }
+            })
+            break
         }
+      }
 
-        // --- End of Incremental onChunk calls ---
-      } // End of for await loop
-
-      // Call processToolUses AFTER the loop finishes processing the main stream content
-      // Note: parseAndCallTools inside processToolUses should handle its own onChunk for tool responses
       await processToolUses(content, idx)
 
-      // Send the final block_complete chunk with accumulated data
       onChunk({
         type: ChunkType.BLOCK_COMPLETE,
         response: {
-          // Use the enhanced usage object
           usage: lastUsage,
           metrics: {
-            // Get completion tokens from the last usage object if available
             completion_tokens: lastUsage?.completion_tokens,
             time_completion_millsec: final_time_completion_millsec_delta,
-            time_first_token_millsec: time_first_token_millsec_delta,
-            time_thinking_millsec: final_time_thinking_millsec_delta
+            time_first_token_millsec: time_first_token_millsec - start_time_millsec
           }
         }
       })
-
-      // FIXME: 临时方案，重置时间戳和思考内容
-      time_first_token_millsec = 0
-      time_first_content_millsec = 0
     }
 
-    console.debug('[completions] reqMessages before processing', model.id, reqMessages)
-    reqMessages = processReqMessages(model, reqMessages)
-    console.debug('[completions] reqMessages', model.id, reqMessages)
-    // 等待接口返回流
-    onChunk({ type: ChunkType.LLM_RESPONSE_CREATED })
-    const stream = await this.sdk.chat.completions
-      // @ts-ignore key is not typed
-      .create(
-        {
-          model: model.id,
-          messages: reqMessages,
-          temperature: this.getTemperature(assistant, model),
-          top_p: this.getTopP(assistant, model),
-          max_tokens: maxTokens,
-          keep_alive: this.keepAliveTime,
-          stream: isSupportStreamOutput(),
-          // tools: tools,
-          ...getOpenAIWebSearchParams(assistant, model),
-          ...this.getReasoningEffort(assistant, model),
-          ...this.getProviderSpecificParameters(assistant, model),
-          ...this.getCustomParameters(assistant)
-        },
-        {
-          signal
-        }
-      )
+    const stream = await this.sdk.responses.create(
+      {
+        model: model.id,
+        input: reqMessages,
+        temperature: this.getTemperature(assistant, model),
+        top_p: this.getTopP(assistant, model),
+        max_output_tokens: maxTokens,
+        stream: streamOutput,
+        tools: tools.length > 0 ? tools : undefined,
+        service_tier: this.getServiceTier(model),
+        ...this.getResponseReasoningEffort(assistant, model),
+        ...this.getCustomParameters(assistant)
+      },
+      {
+        signal,
+        timeout: this.getTimeout(model)
+      }
+    )
 
     await processStream(stream, 0).finally(cleanup)
 
@@ -753,210 +652,152 @@ export default class OpenAIProvider extends BaseProvider {
   }
 
   /**
-   * Translate a message
-   * @param message - The message
+   * Translate the content
+   * @param content - The content
    * @param assistant - The assistant
    * @param onResponse - The onResponse callback
-   * @returns The translated message
+   * @returns The translated content
    */
-  async translate(content: string, assistant: Assistant, onResponse?: (text: string, isComplete: boolean) => void) {
+  async translate(
+    content: string,
+    assistant: Assistant,
+    onResponse?: (text: string, isComplete: boolean) => void
+  ): Promise<string> {
     const defaultModel = getDefaultModel()
     const model = assistant.model || defaultModel
-    const messagesForApi = content
+    const messageForApi: OpenAI.Responses.EasyInputMessage[] = content
       ? [
-          { role: 'system', content: assistant.prompt },
-          { role: 'user', content }
+          {
+            role: 'system',
+            content: assistant.prompt
+          },
+          {
+            role: 'user',
+            content
+          }
         ]
       : [{ role: 'user', content: assistant.prompt }]
 
-    const isOpenAIReasoning = this.isOpenAIReasoning(model)
-
+    const isOpenAIReasoning = isOpenAIReasoningModel(model)
     const isSupportedStreamOutput = () => {
       if (!onResponse) {
         return false
       }
-      if (isOpenAIReasoning) {
-        return false
-      }
-      return true
+      return !isOpenAIReasoning
     }
 
     const stream = isSupportedStreamOutput()
-
-    await this.checkIsCopilot()
-
-    // console.debug('[translate] reqMessages', model.id, message)
-    // @ts-ignore key is not typed
-    const response = await this.sdk.chat.completions.create({
-      model: model.id,
-      messages: messagesForApi as ChatCompletionMessageParam[],
-      stream,
-      keep_alive: this.keepAliveTime,
-      temperature: assistant?.settings?.temperature
-    })
-
-    if (!stream) {
-      return response.choices[0].message?.content || ''
-    }
-
     let text = ''
-    let isThinking = false
-    const isReasoning = isReasoningModel(model)
+    if (stream) {
+      const response = await this.sdk.responses.create({
+        model: model.id,
+        input: messageForApi,
+        stream: true,
+        temperature: this.getTemperature(assistant, model),
+        top_p: this.getTopP(assistant, model),
+        ...this.getResponseReasoningEffort(assistant, model)
+      })
 
-    for await (const chunk of response) {
-      const deltaContent = chunk.choices[0]?.delta?.content || ''
-
-      if (isReasoning) {
-        if (deltaContent.includes('<think>')) {
-          isThinking = true
+      for await (const chunk of response) {
+        switch (chunk.type) {
+          case 'response.output_text.delta':
+            text += chunk.delta
+            onResponse?.(text, false)
+            break
+          case 'response.output_text.done':
+            onResponse?.(chunk.text, true)
+            break
         }
-
-        if (!isThinking) {
-          text += deltaContent
-          onResponse?.(text, false)
-        }
-
-        if (deltaContent.includes('</think>')) {
-          isThinking = false
-        }
-      } else {
-        text += deltaContent
-        onResponse?.(text, false)
       }
+    } else {
+      const response = await this.sdk.responses.create({
+        model: model.id,
+        input: messageForApi,
+        stream: false,
+        temperature: this.getTemperature(assistant, model),
+        top_p: this.getTopP(assistant, model),
+        ...this.getResponseReasoningEffort(assistant, model)
+      })
+      return response.output_text
     }
 
-    onResponse?.(text, true)
-
     return text
   }
 
   /**
-   * Summarize a message
+   * Summarize the messages
    * @param messages - The messages
    * @param assistant - The assistant
    * @returns The summary
    */
   public async summaries(messages: Message[], assistant: Assistant): Promise<string> {
     const model = getTopNamingModel() || assistant.model || getDefaultModel()
-
     const userMessages = takeRight(messages, 5)
       .filter((message) => !message.isPreset)
       .map((message) => ({
         role: message.role,
         content: getMainTextContent(message)
       }))
-
     const userMessageContent = userMessages.reduce((prev, curr) => {
       const content = curr.role === 'user' ? `User: ${curr.content}` : `Assistant: ${curr.content}`
       return prev + (prev ? '\n' : '') + content
     }, '')
 
-    const systemMessage = {
+    const systemMessage: OpenAI.Responses.EasyInputMessage = {
       role: 'system',
-      content: getStoreSetting('topicNamingPrompt') || i18n.t('prompts.title')
+      content: (getStoreSetting('topicNamingPrompt') as string) || i18n.t('prompts.title')
     }
 
-    const userMessage = {
+    const userMessage: OpenAI.Responses.EasyInputMessage = {
       role: 'user',
       content: userMessageContent
     }
 
-    await this.checkIsCopilot()
-
-    console.debug('[summaries] reqMessages', model.id, [systemMessage, userMessage])
-    // @ts-ignore key is not typed
-    const response = await this.sdk.chat.completions.create({
+    const response = await this.sdk.responses.create({
       model: model.id,
-      messages: [systemMessage, userMessage] as ChatCompletionMessageParam[],
+      input: [systemMessage, userMessage],
       stream: false,
-      keep_alive: this.keepAliveTime,
-      max_tokens: 1000
+      max_output_tokens: 1000
     })
-
-    // 针对思考类模型的返回，总结仅截取</think>之后的内容
-    let content = response.choices[0].message?.content || ''
-    content = content.replace(/^<think>(.*?)<\/think>/s, '')
-
-    return removeSpecialCharactersForTopicName(content.substring(0, 50))
+    return removeSpecialCharactersForTopicName(response.output_text.substring(0, 50))
   }
 
-  /**
-   * Summarize a message for search
-   * @param messages - The messages
-   * @param assistant - The assistant
-   * @returns The summary
-   */
   public async summaryForSearch(messages: Message[], assistant: Assistant): Promise<string | null> {
-    const model = assistant.model || getDefaultModel()
-
-    const systemMessage = {
+    const model = getTopNamingModel() || assistant.model || getDefaultModel()
+    const systemMessage: OpenAI.Responses.EasyInputMessage = {
       role: 'system',
       content: assistant.prompt
     }
-
     const messageContents = messages.map((m) => getMainTextContent(m))
     const userMessageContent = messageContents.join('\n')
-
-    const userMessage = {
+    const userMessage: OpenAI.Responses.EasyInputMessage = {
       role: 'user',
       content: userMessageContent
     }
-    console.debug('[summaryForSearch] reqMessages', model.id, [systemMessage, userMessage])
-
     const lastUserMessage = messages[messages.length - 1]
-    console.log('lastUserMessage?.id', lastUserMessage?.id)
     const { abortController, cleanup } = this.createAbortController(lastUserMessage?.id)
     const { signal } = abortController
 
-    const response = await this.sdk.chat.completions
-      // @ts-ignore key is not typed
+    const response = await this.sdk.responses
       .create(
         {
           model: model.id,
-          messages: [systemMessage, userMessage] as ChatCompletionMessageParam[],
+          input: [systemMessage, userMessage],
           stream: false,
-          keep_alive: this.keepAliveTime,
-          max_tokens: 1000
+          max_output_tokens: 1000
         },
         {
-          timeout: 20 * 1000,
-          signal: signal
+          signal,
+          timeout: 20 * 1000
         }
       )
       .finally(cleanup)
 
-    // 针对思考类模型的返回，总结仅截取</think>之后的内容
-    let content = response.choices[0].message?.content || ''
-    content = content.replace(/^<think>(.*?)<\/think>/s, '')
-
-    return content
+    return response.output_text
   }
 
   /**
-   * Generate text
-   * @param prompt - The prompt
-   * @param content - The content
-   * @returns The generated text
-   */
-  public async generateText({ prompt, content }: { prompt: string; content: string }): Promise<string> {
-    const model = getDefaultModel()
-
-    await this.checkIsCopilot()
-
-    const response = await this.sdk.chat.completions.create({
-      model: model.id,
-      stream: false,
-      messages: [
-        { role: 'system', content: prompt },
-        { role: 'user', content }
-      ]
-    })
-
-    return response.choices[0].message?.content || ''
-  }
-
-  /**
-   * Generate suggestions
+   *  Generate suggestions
    * @param messages - The messages
    * @param assistant - The assistant
    * @returns The suggestions
@@ -968,8 +809,6 @@ export default class OpenAIProvider extends BaseProvider {
       return []
     }
 
-    await this.checkIsCopilot()
-
     const userMessagesForApi = messages
       .filter((m) => m.role === 'user')
       .map((m) => ({
@@ -992,50 +831,61 @@ export default class OpenAIProvider extends BaseProvider {
     return response?.questions?.filter(Boolean)?.map((q: any) => ({ content: q })) || []
   }
 
+  /**
+   * Generate text
+   * @param prompt - The prompt
+   * @param content - The content
+   * @returns The generated text
+   */
+  public async generateText({ prompt, content }: { prompt: string; content: string }): Promise<string> {
+    const model = getDefaultModel()
+    const response = await this.sdk.responses.create({
+      model: model.id,
+      stream: false,
+      input: [
+        { role: 'system', content: prompt },
+        { role: 'user', content }
+      ]
+    })
+    return response.output_text
+  }
+
   /**
    * Check if the model is valid
    * @param model - The model
    * @param stream - Whether to use streaming interface
    * @returns The validity of the model
    */
-  public async check(model: Model, stream: boolean = false): Promise<{ valid: boolean; error: Error | null }> {
+  public async check(model: Model, stream: boolean): Promise<{ valid: boolean; error: Error | null }> {
     if (!model) {
       return { valid: false, error: new Error('No model found') }
     }
-    const body = {
-      model: model.id,
-      messages: [{ role: 'user', content: 'hi' }],
-      stream
-    }
-
-    try {
-      await this.checkIsCopilot()
-      console.debug('[checkModel] body', model.id, body)
-      if (!stream) {
-        const response = await this.sdk.chat.completions.create(body as ChatCompletionCreateParamsNonStreaming)
-        if (!response?.choices[0].message) {
-          throw new Error('Empty response')
+    if (stream) {
+      const response = await this.sdk.responses.create({
+        model: model.id,
+        input: [{ role: 'user', content: 'hi' }],
+        stream: true
+      })
+      let hasContent = false
+      for await (const chunk of response) {
+        if (chunk.type === 'response.output_text.delta') {
+          hasContent = true
         }
+      }
+      if (hasContent) {
         return { valid: true, error: null }
-      } else {
-        const response: any = await this.sdk.chat.completions.create(body as any)
-        // 等待整个流式响应结束
-        let hasContent = false
-        for await (const chunk of response) {
-          if (chunk.choices?.[0]?.delta?.content) {
-            hasContent = true
-          }
-        }
-        if (hasContent) {
-          return { valid: true, error: null }
-        }
-        throw new Error('Empty streaming response')
       }
-    } catch (error: any) {
-      return {
-        valid: false,
-        error
+      throw new Error('Empty streaming response')
+    } else {
+      const response = await this.sdk.responses.create({
+        model: model.id,
+        input: [{ role: 'user', content: 'hi' }],
+        stream: false
+      })
+      if (!response.output_text) {
+        throw new Error('Empty response')
       }
+      return { valid: true, error: null }
     }
   }
 
@@ -1045,39 +895,11 @@ export default class OpenAIProvider extends BaseProvider {
    */
   public async models(): Promise<OpenAI.Models.Model[]> {
     try {
-      await this.checkIsCopilot()
-
       const response = await this.sdk.models.list()
-
-      if (this.provider.id === 'github') {
-        // @ts-ignore key is not typed
-        return response.body
-          .map((model) => ({
-            id: model.name,
-            description: model.summary,
-            object: 'model',
-            owned_by: model.publisher
-          }))
-          .filter(isSupportedModel)
-      }
-
-      if (this.provider.id === 'together') {
-        // @ts-ignore key is not typed
-        return response?.body
-          .map((model: any) => ({
-            id: model.id,
-            description: model.display_name,
-            object: 'model',
-            owned_by: model.organization
-          }))
-          .filter(isSupportedModel)
-      }
-
-      const models = response?.data || []
+      const models = response.data || []
       models.forEach((model) => {
         model.id = model.id.trim()
       })
-
       return models.filter(isSupportedModel)
     } catch (error) {
       return []
@@ -1121,29 +943,6 @@ export default class OpenAIProvider extends BaseProvider {
     return response.data.map((item) => item.url)
   }
 
-  /**
-   * Get the embedding dimensions
-   * @param model - The model
-   * @returns The embedding dimensions
-   */
-  public async getEmbeddingDimensions(model: Model): Promise<number> {
-    await this.checkIsCopilot()
-
-    const data = await this.sdk.embeddings.create({
-      model: model.id,
-      input: model?.provider === 'baidu-cloud' ? ['hi'] : 'hi'
-    })
-    return data.data[0].embedding.length
-  }
-
-  public async checkIsCopilot() {
-    if (this.provider.id !== 'copilot') return
-    const defaultHeaders = store.getState().copilot.defaultHeaders
-    // copilot每次请求前需要重新获取token，因为token中附带时间戳
-    const { token } = await window.api.copilot.getToken(defaultHeaders)
-    this.sdk.apiKey = token
-  }
-
   public async generateImageByChat({ messages, assistant, onChunk }: CompletionsParams): Promise<void> {
     const defaultModel = getDefaultModel()
     const model = assistant.model || defaultModel
@@ -1170,10 +969,9 @@ export default class OpenAIProvider extends BaseProvider {
             // f.file is guaranteed to exist here due to the filter above
             const fileInfo = f.file!
             const binaryData = await FileManager.readBinaryImage(fileInfo)
-            const file = await toFile(binaryData, fileInfo.origin_name || 'image.png', {
+            return await toFile(binaryData, fileInfo.origin_name || 'image.png', {
               type: 'image/png'
             })
-            return file
           })
         )
         images = images.concat(userImages)
@@ -1190,10 +988,9 @@ export default class OpenAIProvider extends BaseProvider {
             for (let i = 0; i < binary.length; i++) {
               bytes[i] = binary.charCodeAt(i)
             }
-            const file = await toFile(bytes, 'assistant_image.png', {
+            return await toFile(bytes, 'assistant_image.png', {
               type: 'image/png'
             })
-            return file
           })
         )
         images = images.concat(assistantImages.filter(Boolean) as FileLike[])
@@ -1261,4 +1058,17 @@ export default class OpenAIProvider extends BaseProvider {
       })
     }
   }
+
+  /**
+   * Get the embedding dimensions
+   * @param model - The model
+   * @returns The embedding dimensions
+   */
+  public async getEmbeddingDimensions(model: Model): Promise<number> {
+    const data = await this.sdk.embeddings.create({
+      model: model.id,
+      input: 'hi'
+    })
+    return data.data[0].embedding.length
+  }
 }
diff --git a/src/renderer/src/providers/AiProvider/ProviderFactory.ts b/src/renderer/src/providers/AiProvider/ProviderFactory.ts
index c730020a41..ff3515e119 100644
--- a/src/renderer/src/providers/AiProvider/ProviderFactory.ts
+++ b/src/renderer/src/providers/AiProvider/ProviderFactory.ts
@@ -3,17 +3,22 @@ import { Provider } from '@renderer/types'
 import AnthropicProvider from './AnthropicProvider'
 import BaseProvider from './BaseProvider'
 import GeminiProvider from './GeminiProvider'
+import OpenAICompatibleProvider from './OpenAICompatibleProvider'
 import OpenAIProvider from './OpenAIProvider'
 
 export default class ProviderFactory {
   static create(provider: Provider): BaseProvider {
     switch (provider.type) {
+      case 'openai':
+        return new OpenAIProvider(provider)
+      case 'openai-compatible':
+        return new OpenAICompatibleProvider(provider)
       case 'anthropic':
         return new AnthropicProvider(provider)
       case 'gemini':
         return new GeminiProvider(provider)
       default:
-        return new OpenAIProvider(provider)
+        return new OpenAICompatibleProvider(provider)
     }
   }
 }
diff --git a/src/renderer/src/store/index.ts b/src/renderer/src/store/index.ts
index 8faad7884c..e8f4c2ac32 100644
--- a/src/renderer/src/store/index.ts
+++ b/src/renderer/src/store/index.ts
@@ -46,7 +46,7 @@ const persistedReducer = persistReducer(
   {
     key: 'cherry-studio',
     storage,
-    version: 97,
+    version: 98,
     blacklist: ['runtime', 'messages', 'messageBlocks'],
     migrate
   },
diff --git a/src/renderer/src/store/llm.ts b/src/renderer/src/store/llm.ts
index 09407d535a..805acaf941 100644
--- a/src/renderer/src/store/llm.ts
+++ b/src/renderer/src/store/llm.ts
@@ -28,7 +28,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'silicon',
     name: 'Silicon',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.siliconflow.cn',
     models: SYSTEM_MODELS.silicon,
@@ -48,7 +48,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'ocoolai',
     name: 'ocoolAI',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.ocoolai.com',
     models: SYSTEM_MODELS.ocoolai,
@@ -58,7 +58,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'deepseek',
     name: 'deepseek',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.deepseek.com',
     models: SYSTEM_MODELS.deepseek,
@@ -78,7 +78,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'ppio',
     name: 'PPIO',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.ppinfra.com/v3/openai',
     models: SYSTEM_MODELS.ppio,
@@ -88,7 +88,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'alayanew',
     name: 'AlayaNew',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://deepseek.alayanew.com',
     models: SYSTEM_MODELS.alayanew,
@@ -98,7 +98,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'infini',
     name: 'Infini',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://cloud.infini-ai.com/maas',
     models: SYSTEM_MODELS.infini,
@@ -108,7 +108,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'qiniu',
     name: 'Qiniu',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.qnaigc.com',
     models: SYSTEM_MODELS.qiniu,
@@ -118,7 +118,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'dmxapi',
     name: 'DMXAPI',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://www.dmxapi.cn',
     models: SYSTEM_MODELS.dmxapi,
@@ -128,7 +128,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'o3',
     name: 'O3',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.o3.fan',
     models: SYSTEM_MODELS.o3,
@@ -138,7 +138,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'ollama',
     name: 'Ollama',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'http://localhost:11434',
     models: SYSTEM_MODELS.ollama,
@@ -148,7 +148,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'lmstudio',
     name: 'LM Studio',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'http://localhost:1234',
     models: SYSTEM_MODELS.lmstudio,
@@ -178,7 +178,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'azure-openai',
     name: 'Azure OpenAI',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: '',
     apiVersion: '',
@@ -199,7 +199,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'zhipu',
     name: 'ZhiPu',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://open.bigmodel.cn/api/paas/v4/',
     models: SYSTEM_MODELS.zhipu,
@@ -209,7 +209,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'github',
     name: 'Github Models',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://models.inference.ai.azure.com/',
     models: SYSTEM_MODELS.github,
@@ -219,7 +219,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'copilot',
     name: 'Github Copilot',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.githubcopilot.com/',
     models: SYSTEM_MODELS.copilot,
@@ -230,7 +230,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'yi',
     name: 'Yi',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.lingyiwanwu.com',
     models: SYSTEM_MODELS.yi,
@@ -240,7 +240,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'moonshot',
     name: 'Moonshot AI',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.moonshot.cn',
     models: SYSTEM_MODELS.moonshot,
@@ -250,7 +250,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'baichuan',
     name: 'BAICHUAN AI',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.baichuan-ai.com',
     models: SYSTEM_MODELS.baichuan,
@@ -260,7 +260,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'dashscope',
     name: 'Bailian',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://dashscope.aliyuncs.com/compatible-mode/v1/',
     models: SYSTEM_MODELS.bailian,
@@ -270,7 +270,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'stepfun',
     name: 'StepFun',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.stepfun.com',
     models: SYSTEM_MODELS.stepfun,
@@ -280,7 +280,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'doubao',
     name: 'doubao',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://ark.cn-beijing.volces.com/api/v3/',
     models: SYSTEM_MODELS.doubao,
@@ -290,7 +290,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'minimax',
     name: 'MiniMax',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.minimax.chat/v1/',
     models: SYSTEM_MODELS.minimax,
@@ -300,7 +300,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'groq',
     name: 'Groq',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.groq.com/openai',
     models: SYSTEM_MODELS.groq,
@@ -310,7 +310,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'together',
     name: 'Together',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.together.xyz',
     models: SYSTEM_MODELS.together,
@@ -320,7 +320,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'fireworks',
     name: 'Fireworks',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.fireworks.ai/inference',
     models: SYSTEM_MODELS.fireworks,
@@ -330,7 +330,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'zhinao',
     name: 'zhinao',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.360.cn',
     models: SYSTEM_MODELS.zhinao,
@@ -340,7 +340,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'hunyuan',
     name: 'hunyuan',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.hunyuan.cloud.tencent.com',
     models: SYSTEM_MODELS.hunyuan,
@@ -350,7 +350,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'nvidia',
     name: 'nvidia',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://integrate.api.nvidia.com',
     models: SYSTEM_MODELS.nvidia,
@@ -360,7 +360,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'grok',
     name: 'Grok',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.x.ai',
     models: SYSTEM_MODELS.grok,
@@ -370,7 +370,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'hyperbolic',
     name: 'Hyperbolic',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.hyperbolic.xyz',
     models: SYSTEM_MODELS.hyperbolic,
@@ -380,7 +380,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'mistral',
     name: 'Mistral',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.mistral.ai',
     models: SYSTEM_MODELS.mistral,
@@ -390,7 +390,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'jina',
     name: 'Jina',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.jina.ai',
     models: SYSTEM_MODELS.jina,
@@ -400,7 +400,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'gitee-ai',
     name: 'gitee ai',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://ai.gitee.com',
     models: SYSTEM_MODELS['gitee-ai'],
@@ -410,7 +410,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'perplexity',
     name: 'Perplexity',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.perplexity.ai/',
     models: SYSTEM_MODELS.perplexity,
@@ -420,7 +420,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'modelscope',
     name: 'ModelScope',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api-inference.modelscope.cn/v1/',
     models: SYSTEM_MODELS.modelscope,
@@ -430,7 +430,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'xirang',
     name: 'Xirang',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://wishub-x1.ctyun.cn',
     models: SYSTEM_MODELS.xirang,
@@ -440,7 +440,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'tencent-cloud-ti',
     name: 'Tencent Cloud TI',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.lkeap.cloud.tencent.com',
     models: SYSTEM_MODELS['tencent-cloud-ti'],
@@ -450,7 +450,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'baidu-cloud',
     name: 'Baidu Cloud',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://qianfan.baidubce.com/v2/',
     models: SYSTEM_MODELS['baidu-cloud'],
@@ -460,7 +460,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'gpustack',
     name: 'GPUStack',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: '',
     models: SYSTEM_MODELS.gpustack,
@@ -470,7 +470,7 @@ export const INITIAL_PROVIDERS: Provider[] = [
   {
     id: 'voyageai',
     name: 'VoyageAI',
-    type: 'openai',
+    type: 'openai-compatible',
     apiKey: '',
     apiHost: 'https://api.voyageai.com',
     models: SYSTEM_MODELS.voyageai,
diff --git a/src/renderer/src/store/messageBlock.ts b/src/renderer/src/store/messageBlock.ts
index 354e91a4c2..aa484e0bf3 100644
--- a/src/renderer/src/store/messageBlock.ts
+++ b/src/renderer/src/store/messageBlock.ts
@@ -98,6 +98,25 @@ const formatCitationsFromBlock = (block: CitationMessageBlock | undefined): Cita
           })) || []
         break
       case WebSearchSource.OPENAI:
+        formattedCitations =
+          (block.response.results as OpenAI.Responses.ResponseOutputText.URLCitation[])?.map((result, index) => {
+            let hostname: string | undefined
+            try {
+              hostname = result.title ? undefined : new URL(result.url).hostname
+            } catch {
+              hostname = result.url
+            }
+            return {
+              number: index + 1,
+              url: result.url,
+              title: result.title,
+              hostname: hostname,
+              showFavicon: true,
+              type: 'websearch'
+            }
+          }) || []
+        break
+      case WebSearchSource.OPENAI_COMPATIBLE:
         formattedCitations =
           (block.response.results as OpenAI.Chat.Completions.ChatCompletionMessage.Annotation[])?.map((url, index) => {
             const urlCitation = url.url_citation
diff --git a/src/renderer/src/store/migrate.ts b/src/renderer/src/store/migrate.ts
index 995e462805..fa412c55ba 100644
--- a/src/renderer/src/store/migrate.ts
+++ b/src/renderer/src/store/migrate.ts
@@ -1240,6 +1240,18 @@ const migrateConfig = {
     } catch (error) {
       return state
     }
+  },
+  '98': (state: RootState) => {
+    try {
+      state.llm.providers.forEach((provider) => {
+        if (provider.type === 'openai' && provider.id !== 'openai') {
+          provider.type = 'openai-compatible'
+        }
+      })
+      return state
+    } catch (error) {
+      return state
+    }
   }
 }
 
diff --git a/src/renderer/src/types/index.ts b/src/renderer/src/types/index.ts
index 8e1b7ecab9..fc122d1da0 100644
--- a/src/renderer/src/types/index.ts
+++ b/src/renderer/src/types/index.ts
@@ -157,7 +157,7 @@ export type Provider = {
   notes?: string
 }
 
-export type ProviderType = 'openai' | 'anthropic' | 'gemini' | 'qwenlm' | 'azure-openai'
+export type ProviderType = 'openai' | 'openai-compatible' | 'anthropic' | 'gemini' | 'qwenlm' | 'azure-openai'
 
 export type ModelType = 'text' | 'vision' | 'embedding' | 'reasoning' | 'function_calling' | 'web_search'
 
@@ -448,11 +448,13 @@ export type WebSearchResults =
   | WebSearchProviderResponse
   | GroundingMetadata
   | OpenAI.Chat.Completions.ChatCompletionMessage.Annotation.URLCitation[]
+  | OpenAI.Responses.ResponseOutputText.URLCitation[]
   | any[]
 
 export enum WebSearchSource {
   WEBSEARCH = 'websearch',
   OPENAI = 'openai',
+  OPENAI_COMPATIBLE = 'openai-compatible',
   OPENROUTER = 'openrouter',
   GEMINI = 'gemini',
   PERPLEXITY = 'perplexity',
diff --git a/src/renderer/src/utils/mcp-tools.ts b/src/renderer/src/utils/mcp-tools.ts
index 5eeb7201c0..95495b7d45 100644
--- a/src/renderer/src/utils/mcp-tools.ts
+++ b/src/renderer/src/utils/mcp-tools.ts
@@ -5,6 +5,7 @@ import store from '@renderer/store'
 import { MCPCallToolResponse, MCPServer, MCPTool, MCPToolResponse } from '@renderer/types'
 import type { MCPToolCompleteChunk, MCPToolInProgressChunk } from '@renderer/types/chunk'
 import { ChunkType } from '@renderer/types/chunk'
+import OpenAI from 'openai'
 import { ChatCompletionContentPart, ChatCompletionMessageParam, ChatCompletionMessageToolCall } from 'openai/resources'
 
 import { CompletionsParams } from '../providers/AiProvider'
@@ -401,11 +402,11 @@ export async function parseAndCallTools(
     toolCallId: string,
     resp: MCPCallToolResponse,
     isVisionModel: boolean
-  ) => ChatCompletionMessageParam | MessageParam | Content,
+  ) => ChatCompletionMessageParam | MessageParam | Content | OpenAI.Responses.EasyInputMessage,
   mcpTools?: MCPTool[],
   isVisionModel: boolean = false
-): Promise<(ChatCompletionMessageParam | MessageParam | Content)[]> {
-  const toolResults: (ChatCompletionMessageParam | MessageParam | Content)[] = []
+): Promise<(ChatCompletionMessageParam | MessageParam | Content | OpenAI.Responses.EasyInputMessage)[]> {
+  const toolResults: (ChatCompletionMessageParam | MessageParam | Content | OpenAI.Responses.EasyInputMessage)[] = []
   // process tool use
   const tools = parseToolUse(content, mcpTools || [])
   if (!tools || tools.length === 0) {
@@ -448,7 +449,7 @@ export async function parseAndCallTools(
   return toolResults
 }
 
-export function mcpToolCallResponseToOpenAIMessage(
+export function mcpToolCallResponseToOpenAICompatibleMessage(
   toolCallId: string,
   resp: MCPCallToolResponse,
   isVisionModel: boolean = false
@@ -515,6 +516,62 @@ export function mcpToolCallResponseToOpenAIMessage(
   return message
 }
 
+export function mcpToolCallResponseToOpenAIMessage(
+  toolCallId: string,
+  resp: MCPCallToolResponse,
+  isVisionModel: boolean = false
+): OpenAI.Responses.EasyInputMessage {
+  const message = {
+    role: 'user'
+  } as OpenAI.Responses.EasyInputMessage
+
+  if (resp.isError) {
+    message.content = JSON.stringify(resp.content)
+  } else {
+    const content: OpenAI.Responses.ResponseInputContent[] = [
+      {
+        type: 'input_text',
+        text: `Here is the result of tool call ${toolCallId}:`
+      }
+    ]
+
+    if (isVisionModel) {
+      for (const item of resp.content) {
+        switch (item.type) {
+          case 'text':
+            content.push({
+              type: 'input_text',
+              text: item.text || 'no content'
+            })
+            break
+          case 'image':
+            content.push({
+              type: 'input_image',
+              image_url: `data:${item.mimeType};base64,${item.data}`,
+              detail: 'auto'
+            })
+            break
+          default:
+            content.push({
+              type: 'input_text',
+              text: `Unsupported type: ${item.type}`
+            })
+            break
+        }
+      }
+    } else {
+      content.push({
+        type: 'input_text',
+        text: JSON.stringify(resp.content)
+      })
+    }
+
+    message.content = content
+  }
+
+  return message
+}
+
 export function mcpToolCallResponseToAnthropicMessage(
   toolCallId: string,
   resp: MCPCallToolResponse,