diff --git a/src/renderer/src/components/DraggableVoiceCallWindow.tsx b/src/renderer/src/components/DraggableVoiceCallWindow.tsx
new file mode 100644
index 0000000000..8e030a1920
--- /dev/null
+++ b/src/renderer/src/components/DraggableVoiceCallWindow.tsx
@@ -0,0 +1,388 @@
+import React, { useState, useEffect, useRef } from 'react';
+import { Button, Space, Tooltip } from 'antd';
+import {
+  AudioMutedOutlined,
+  AudioOutlined,
+  CloseOutlined,
+  PauseCircleOutlined,
+  PlayCircleOutlined,
+  SoundOutlined,
+  DragOutlined
+} from '@ant-design/icons';
+import styled from 'styled-components';
+import { useTranslation } from 'react-i18next';
+import VoiceVisualizer from './VoiceVisualizer';
+import { VoiceCallService } from '../services/VoiceCallService';
+
+interface Props {
+  visible: boolean;
+  onClose: () => void;
+  position?: { x: number, y: number };
+  onPositionChange?: (position: { x: number, y: number }) => void;
+}
+
+const DraggableVoiceCallWindow: React.FC<Props> = ({
+  visible,
+  onClose,
+  position = { x: 20, y: 20 },
+  onPositionChange
+}) => {
+  const { t } = useTranslation();
+  const [isDragging, setIsDragging] = useState(false);
+  const [currentPosition, setCurrentPosition] = useState(position);
+  const [dragOffset, setDragOffset] = useState({ x: 0, y: 0 });
+  const containerRef = useRef<HTMLDivElement>(null);
+
+  // 语音通话状态
+  const [transcript, setTranscript] = useState('');
+  const [isListening, setIsListening] = useState(false);
+  const [isSpeaking, setIsSpeaking] = useState(false);
+  const [isRecording, setIsRecording] = useState(false);
+  const [isProcessing, setIsProcessing] = useState(false);
+  const [isPaused, setIsPaused] = useState(false);
+  const [isMuted, setIsMuted] = useState(false);
+
+  useEffect(() => {
+    const startVoiceCall = async () => {
+      try {
+        // 显示加载中提示
+        window.message.loading({ content: t('voice_call.initializing'), key: 'voice-call-init' });
+
+        // 预先初始化语音识别服务
+        try {
+          await VoiceCallService.initialize();
+        } catch (initError) {
+          console.warn('语音识别服务初始化警告:', initError);
+          // 不抛出异常，允许程序继续运行
+        }
+
+        // 启动语音通话
+        await VoiceCallService.startCall({
+          onTranscript: (text) => setTranscript(text),
+          onResponse: (text) => {
+            // 这里不设置response，因为响应会显示在聊天界面中
+          },
+          onListeningStateChange: setIsListening,
+          onSpeakingStateChange: setIsSpeaking,
+        });
+
+        // 关闭加载中提示
+        window.message.success({ content: t('voice_call.ready'), key: 'voice-call-init' });
+      } catch (error) {
+        console.error('Voice call error:', error);
+        window.message.error({ content: t('voice_call.error'), key: 'voice-call-init' });
+        onClose();
+      }
+    };
+
+    // 添加TTS状态变化事件监听器
+    const handleTTSStateChange = (event: CustomEvent) => {
+      const { isPlaying } = event.detail;
+      console.log('TTS状态变化事件:', isPlaying);
+      setIsSpeaking(isPlaying);
+    };
+
+    if (visible) {
+      startVoiceCall();
+      // 添加事件监听器
+      window.addEventListener('tts-state-change', handleTTSStateChange as EventListener);
+    }
+
+    return () => {
+      VoiceCallService.endCall();
+      // 移除事件监听器
+      window.removeEventListener('tts-state-change', handleTTSStateChange as EventListener);
+    };
+  }, [visible, t]);
+
+  // 拖拽相关处理
+  const handleDragStart = (e: React.MouseEvent) => {
+    if (containerRef.current) {
+      setIsDragging(true);
+      const rect = containerRef.current.getBoundingClientRect();
+      setDragOffset({
+        x: e.clientX - rect.left,
+        y: e.clientY - rect.top
+      });
+    }
+  };
+
+  const handleDrag = (e: MouseEvent) => {
+    if (isDragging) {
+      const newPosition = {
+        x: e.clientX - dragOffset.x,
+        y: e.clientY - dragOffset.y
+      };
+      setCurrentPosition(newPosition);
+      onPositionChange?.(newPosition);
+    }
+  };
+
+  const handleDragEnd = () => {
+    setIsDragging(false);
+  };
+
+  useEffect(() => {
+    if (isDragging) {
+      document.addEventListener('mousemove', handleDrag);
+      document.addEventListener('mouseup', handleDragEnd);
+    }
+    return () => {
+      document.removeEventListener('mousemove', handleDrag);
+      document.removeEventListener('mouseup', handleDragEnd);
+    };
+  }, [isDragging]);
+
+  // 语音通话相关处理
+  const toggleMute = () => {
+    setIsMuted(!isMuted);
+    VoiceCallService.setMuted(!isMuted);
+  };
+
+  const togglePause = () => {
+    const newPauseState = !isPaused;
+    setIsPaused(newPauseState);
+    VoiceCallService.setPaused(newPauseState);
+  };
+
+  // 长按说话相关处理
+  const handleRecordStart = async (e: React.MouseEvent | React.TouchEvent) => {
+    e.preventDefault(); // 防止触摸事件的默认行为
+
+    if (isProcessing || isPaused) return;
+
+    // 先清除之前的语音识别结果
+    setTranscript('');
+
+    // 无论是否正在播放，都强制停止TTS
+    VoiceCallService.stopTTS();
+    setIsSpeaking(false);
+
+    // 更新UI状态
+    setIsRecording(true);
+    setIsProcessing(true); // 设置处理状态，防止重复点击
+
+    // 开始录音
+    try {
+      await VoiceCallService.startRecording();
+      console.log('开始录音');
+      setIsProcessing(false); // 录音开始后取消处理状态
+    } catch (error) {
+      console.error('开始录音出错:', error);
+      window.message.error({ content: '启动语音识别失败，请确保语音识别服务已启动', key: 'voice-call-error' });
+      setIsRecording(false);
+      setIsProcessing(false);
+    }
+  };
+
+  const handleRecordEnd = async (e: React.MouseEvent | React.TouchEvent) => {
+    e.preventDefault(); // 防止触摸事件的默认行为
+
+    if (!isRecording) return;
+
+    // 立即更新UI状态
+    setIsRecording(false);
+    setIsProcessing(true);
+
+    // 无论是否正在播放，都强制停止TTS
+    VoiceCallService.stopTTS();
+    setIsSpeaking(false);
+
+    // 确保录音完全停止
+    try {
+      // 传递 true 参数，表示将结果发送到聊天界面
+      const success = await VoiceCallService.stopRecordingAndSendToChat();
+      console.log('录音已停止，结果已发送到聊天界面', success ? '成功' : '失败');
+
+      if (success) {
+        // 显示成功消息
+        window.message.success({ content: '语音识别已完成，正在发送消息...', key: 'voice-call-send' });
+      } else {
+        // 显示失败消息
+        window.message.error({ content: '发送语音识别结果失败', key: 'voice-call-error' });
+      }
+    } catch (error) {
+      console.error('停止录音出错:', error);
+      window.message.error({ content: '停止录音出错', key: 'voice-call-error' });
+    } finally {
+      // 无论成功与否，都确保在一定时间后重置处理状态
+      setTimeout(() => {
+        setIsProcessing(false);
+      }, 1000); // 增加延迟时间，确保有足够时间处理结果
+    }
+  };
+
+  // 处理鼠标/触摸离开按钮的情况
+  const handleRecordCancel = async (e: React.MouseEvent | React.TouchEvent) => {
+    e.preventDefault();
+
+    if (isRecording) {
+      // 立即更新UI状态
+      setIsRecording(false);
+      setIsProcessing(true);
+
+      // 无论是否正在播放，都强制停止TTS
+      VoiceCallService.stopTTS();
+      setIsSpeaking(false);
+
+      // 取消录音，不发送给AI
+      try {
+        await VoiceCallService.cancelRecording();
+        console.log('录音已取消');
+
+        // 清除输入文本
+        setTranscript('');
+      } catch (error) {
+        console.error('取消录音出错:', error);
+      } finally {
+        // 无论成功与否，都确保在一定时间后重置处理状态
+        setTimeout(() => {
+          setIsProcessing(false);
+        }, 1000);
+      }
+    }
+  };
+
+  if (!visible) return null;
+
+  return (
+    <Container
+      ref={containerRef}
+      style={{
+        left: `${currentPosition.x}px`,
+        top: `${currentPosition.y}px`,
+        position: 'fixed',
+        zIndex: 1000
+      }}
+    >
+      <Header onMouseDown={handleDragStart}>
+        <DragOutlined style={{ cursor: 'move', marginRight: 8 }} />
+        {t('voice_call.title')}
+        <CloseButton onClick={onClose}>
+          <CloseOutlined />
+        </CloseButton>
+      </Header>
+
+      <Content>
+        <VisualizerContainer>
+          <VoiceVisualizer isActive={isListening || isRecording} type="input" />
+          <VoiceVisualizer isActive={isSpeaking} type="output" />
+        </VisualizerContainer>
+
+        <TranscriptContainer>
+          {transcript && (
+            <TranscriptText>
+              <UserLabel>{t('voice_call.you')}:</UserLabel> {transcript}
+            </TranscriptText>
+          )}
+        </TranscriptContainer>
+
+        <ControlsContainer>
+          <Space>
+            <Button
+              type="text"
+              icon={isMuted ? <AudioMutedOutlined /> : <AudioOutlined />}
+              onClick={toggleMute}
+              size="large"
+              title={isMuted ? t('voice_call.unmute') : t('voice_call.mute')}
+            />
+            <Button
+              type="text"
+              icon={isPaused ? <PlayCircleOutlined /> : <PauseCircleOutlined />}
+              onClick={togglePause}
+              size="large"
+              title={isPaused ? t('voice_call.resume') : t('voice_call.pause')}
+            />
+            <Tooltip title={t('voice_call.press_to_talk')}>
+              <RecordButton
+                type={isRecording ? "primary" : "default"}
+                icon={<SoundOutlined />}
+                onMouseDown={handleRecordStart}
+                onMouseUp={handleRecordEnd}
+                onMouseLeave={handleRecordCancel}
+                onTouchStart={handleRecordStart}
+                onTouchEnd={handleRecordEnd}
+                onTouchCancel={handleRecordCancel}
+                size="large"
+                disabled={isProcessing || isPaused}
+              >
+                {isRecording ? t('voice_call.release_to_send') : t('voice_call.press_to_talk')}
+              </RecordButton>
+            </Tooltip>
+          </Space>
+        </ControlsContainer>
+      </Content>
+    </Container>
+  );
+};
+
+// 样式组件
+const Container = styled.div`
+  width: 300px;
+  background-color: var(--color-background);
+  border-radius: 8px;
+  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
+  overflow: hidden;
+  display: flex;
+  flex-direction: column;
+`;
+
+const Header = styled.div`
+  padding: 8px 12px;
+  background-color: var(--color-primary);
+  color: white;
+  font-weight: bold;
+  display: flex;
+  align-items: center;
+  cursor: move;
+`;
+
+const CloseButton = styled.div`
+  margin-left: auto;
+  cursor: pointer;
+`;
+
+const Content = styled.div`
+  display: flex;
+  flex-direction: column;
+  gap: 10px;
+  padding: 12px;
+`;
+
+const VisualizerContainer = styled.div`
+  display: flex;
+  justify-content: space-between;
+  height: 60px;
+`;
+
+const TranscriptContainer = styled.div`
+  flex: 1;
+  min-height: 60px;
+  max-height: 100px;
+  overflow-y: auto;
+  border: 1px solid var(--color-border);
+  border-radius: 8px;
+  padding: 8px;
+  background-color: var(--color-background-2);
+`;
+
+const TranscriptText = styled.div`
+  margin-bottom: 8px;
+`;
+
+const UserLabel = styled.span`
+  font-weight: bold;
+  color: var(--color-primary);
+`;
+
+const ControlsContainer = styled.div`
+  display: flex;
+  justify-content: center;
+  padding: 8px 0;
+`;
+
+const RecordButton = styled(Button)`
+  min-width: 120px;
+`;
+
+export default DraggableVoiceCallWindow;
diff --git a/src/renderer/src/components/VoiceCallButton.tsx b/src/renderer/src/components/VoiceCallButton.tsx
index 00d784b4ed..ded74af2f8 100644
--- a/src/renderer/src/components/VoiceCallButton.tsx
+++ b/src/renderer/src/components/VoiceCallButton.tsx
@@ -2,8 +2,8 @@ import React, { useState } from 'react';
 import { Button, Tooltip } from 'antd';
 import { PhoneOutlined, LoadingOutlined } from '@ant-design/icons';
 import { useTranslation } from 'react-i18next';
-import VoiceCallModal from './VoiceCallModal';
 import { VoiceCallService } from '../services/VoiceCallService';
+import DraggableVoiceCallWindow from './DraggableVoiceCallWindow';
 
 interface Props {
   disabled?: boolean;
@@ -12,17 +12,18 @@ interface Props {
 
 const VoiceCallButton: React.FC<Props> = ({ disabled = false, style }) => {
   const { t } = useTranslation();
-  const [isModalVisible, setIsModalVisible] = useState(false);
+  const [isWindowVisible, setIsWindowVisible] = useState(false);
   const [isLoading, setIsLoading] = useState(false);
+  const [windowPosition, setWindowPosition] = useState({ x: 20, y: 20 });
 
   const handleClick = async () => {
     if (disabled || isLoading) return;
-    
+
     setIsLoading(true);
     try {
       // 初始化语音服务
       await VoiceCallService.initialize();
-      setIsModalVisible(true);
+      setIsWindowVisible(true);
     } catch (error) {
       console.error('Failed to initialize voice call:', error);
       window.message.error(t('voice_call.initialization_failed'));
@@ -42,12 +43,12 @@ const VoiceCallButton: React.FC<Props> = ({ disabled = false, style }) => {
           style={style}
         />
       </Tooltip>
-      {isModalVisible && (
-        <VoiceCallModal
-          visible={isModalVisible}
-          onClose={() => setIsModalVisible(false)}
-        />
-      )}
+      <DraggableVoiceCallWindow
+        visible={isWindowVisible}
+        onClose={() => setIsWindowVisible(false)}
+        position={windowPosition}
+        onPositionChange={setWindowPosition}
+      />
     </>
   );
 };
diff --git a/src/renderer/src/pages/home/Inputbar/Inputbar.tsx b/src/renderer/src/pages/home/Inputbar/Inputbar.tsx
index dd74864781..1971245e08 100644
--- a/src/renderer/src/pages/home/Inputbar/Inputbar.tsx
+++ b/src/renderer/src/pages/home/Inputbar/Inputbar.tsx
@@ -712,10 +712,52 @@ const Inputbar: FC<Props> = ({ assistant: _assistant, setActiveTopic, topic }) =
           return newText
         })
         textareaRef.current?.focus()
+      }),
+      // 监听语音通话消息
+      EventEmitter.on(EVENT_NAMES.VOICE_CALL_MESSAGE, (data: { text: string, model: string }) => {
+        console.log('收到语音通话消息:', data);
+
+        // 先设置输入框文本
+        setText(data.text);
+
+        // 如果有指定模型，切换到该模型
+        if (data.model && data.model !== model) {
+          setModel(data.model);
+        }
+
+        // 使用延时确保文本已经设置到输入框
+        setTimeout(() => {
+          // 直接调用发送消息函数，而不检查inputEmpty
+          console.log('准备自动发送语音识别消息:', data.text);
+
+          // 直接使用正确的方式发送消息
+          // 创建用户消息
+          const userMessage = getUserMessage({
+            assistant,
+            topic,
+            type: 'text',
+            content: data.text
+          });
+
+          // 如果有指定模型，设置模型
+          if (data.model && data.model !== model) {
+            userMessage.model = { id: data.model };
+          }
+
+          // 分发发送消息的action
+          dispatch(
+            _sendMessage(userMessage, assistant, topic, {})
+          );
+
+          // 清空输入框
+          setText('');
+
+          console.log('已触发发送消息事件');
+        }, 300);
       })
     ]
     return () => unsubscribes.forEach((unsub) => unsub())
-  }, [addNewTopic, resizeTextArea])
+  }, [addNewTopic, resizeTextArea, sendMessage, model, inputEmpty, loading, dispatch, assistant, topic, setText, getUserMessage, _sendMessage])
 
   useEffect(() => {
     textareaRef.current?.focus()
diff --git a/src/renderer/src/pages/home/Messages/Message.tsx b/src/renderer/src/pages/home/Messages/Message.tsx
index 9da880f17b..bfcd546371 100644
--- a/src/renderer/src/pages/home/Messages/Message.tsx
+++ b/src/renderer/src/pages/home/Messages/Message.tsx
@@ -2,11 +2,15 @@ import { FONT_FAMILY } from '@renderer/config/constant'
 import { useAssistant } from '@renderer/hooks/useAssistant'
 import { useModel } from '@renderer/hooks/useModel'
 import { useMessageStyle, useSettings } from '@renderer/hooks/useSettings'
+import { useRuntime } from '@renderer/hooks/useRuntime'
 import { EVENT_NAMES, EventEmitter } from '@renderer/services/EventService'
 import { getMessageModelId } from '@renderer/services/MessagesService'
 import { getModelUniqId } from '@renderer/services/ModelService'
+import TTSService from '@renderer/services/TTSService'
+import { RootState } from '@renderer/store'
 import { Assistant, Message, Topic } from '@renderer/types'
 import { classNames } from '@renderer/utils'
+import { useSelector } from 'react-redux'
 import { Divider, Dropdown } from 'antd'
 import { Dispatch, FC, memo, SetStateAction, useCallback, useEffect, useMemo, useRef, useState } from 'react'
 import { useTranslation } from 'react-i18next'
@@ -46,10 +50,14 @@ const MessageItem: FC<Props> = ({
   const model = useModel(getMessageModelId(message), message.model?.provider) || message.model
   const { isBubbleStyle } = useMessageStyle()
   const { showMessageDivider, messageFont, fontSize } = useSettings()
+  const { generating } = useRuntime()
   const messageContainerRef = useRef<HTMLDivElement>(null)
   // const topic = useTopic(assistant, _topic?.id)
   const [contextMenuPosition, setContextMenuPosition] = useState<{ x: number; y: number } | null>(null)
   const [selectedQuoteText, setSelectedQuoteText] = useState<string>('')
+
+  // 获取TTS设置
+  const ttsEnabled = useSelector((state: RootState) => state.settings.ttsEnabled)
   const [selectedText, setSelectedText] = useState<string>('')
 
   const isLastMessage = index === 0
@@ -88,6 +96,21 @@ const MessageItem: FC<Props> = ({
     }
   }, [])
 
+  // 自动播放TTS的逻辑
+  useEffect(() => {
+    // 如果是最后一条助手消息，且消息状态为成功，且不是正在生成中，且TTS已启用
+    if (isLastMessage && isAssistantMessage && message.status === 'success' && !generating && ttsEnabled) {
+      // 检查消息是否有内容
+      if (message.content && message.content.trim()) {
+        console.log('自动播放最新助手消息的TTS:', message.id)
+        // 使用延时确保消息已完全加载
+        setTimeout(() => {
+          TTSService.speakFromMessage(message)
+        }, 500)
+      }
+    }
+  }, [isLastMessage, isAssistantMessage, message, generating, ttsEnabled])
+
   const messageHighlightHandler = useCallback((highlight: boolean = true) => {
     if (messageContainerRef.current) {
       messageContainerRef.current.scrollIntoView({ behavior: 'smooth' })
diff --git a/src/renderer/src/services/EventService.ts b/src/renderer/src/services/EventService.ts
index 39fc59c531..afc6a7212d 100644
--- a/src/renderer/src/services/EventService.ts
+++ b/src/renderer/src/services/EventService.ts
@@ -25,5 +25,6 @@ export const EVENT_NAMES = {
   ADD_NEW_TOPIC: 'ADD_NEW_TOPIC',
   RESEND_MESSAGE: 'RESEND_MESSAGE',
   SHOW_MODEL_SELECTOR: 'SHOW_MODEL_SELECTOR',
-  QUOTE_TEXT: 'QUOTE_TEXT'
+  QUOTE_TEXT: 'QUOTE_TEXT',
+  VOICE_CALL_MESSAGE: 'VOICE_CALL_MESSAGE'
 }
diff --git a/src/renderer/src/services/VoiceCallService.ts b/src/renderer/src/services/VoiceCallService.ts
index 2c8ad94be7..6a20dd0bb3 100644
--- a/src/renderer/src/services/VoiceCallService.ts
+++ b/src/renderer/src/services/VoiceCallService.ts
@@ -2,6 +2,7 @@ import { fetchChatCompletion } from '@renderer/services/ApiService'
 import ASRService from '@renderer/services/ASRService'
 import { getDefaultAssistant } from '@renderer/services/AssistantService'
 import { getAssistantMessage, getUserMessage } from '@renderer/services/MessagesService'
+import { EVENT_NAMES, EventEmitter } from '@renderer/services/EventService'
 import TTSService from '@renderer/services/TTSService'
 import store from '@renderer/store'
 // 导入类型
@@ -445,7 +446,12 @@ class VoiceCallServiceClass {
     }
   }
 
-  async handleUserSpeech(text: string) {
+  /**
+   * 处理用户语音输入
+   * @param text 语音识别结果文本
+   * @param sendToChat 是否将结果发送到聊天界面
+   */
+  async handleUserSpeech(text: string, sendToChat: boolean = false) {
     if (!this.isCallActive || this.isProcessingResponse || this.isPaused) return
 
     // 暂停语音识别，避免在AI回复时继续识别
@@ -469,6 +475,33 @@ class VoiceCallServiceClass {
         assistant.model = voiceCallModel
       }
 
+      // 如果需要发送到聊天界面，触发事件
+      if (sendToChat) {
+        console.log('将语音识别结果发送到聊天界面:', text)
+
+        try {
+          // 直接触发事件，将语音识别结果发送到聊天界面
+          EventEmitter.emit(EVENT_NAMES.VOICE_CALL_MESSAGE, {
+            text,
+            model: assistant.model
+          })
+
+          // 打印日志确认事件已触发
+          console.log('事件已触发，消息内容:', text, '模型:', assistant.model)
+
+          // 使用消息通知用户
+          window.message.success({ content: '语音识别已完成，正在发送消息...', key: 'voice-call-send' })
+        } catch (error) {
+          console.error('发送语音识别结果到聊天界面时出错:', error)
+          window.message.error({ content: '发送语音识别结果失败', key: 'voice-call-error' })
+        }
+
+        // 不在这里处理响应，因为聊天界面会处理
+        this.isProcessingResponse = false
+        return
+      }
+
+      // 以下是原有的处理逻辑，用于独立的语音通话窗口
       // 创建一个简单的Topic对象
       const topic = {
         id: 'voice-call',
@@ -610,6 +643,167 @@ class VoiceCallServiceClass {
     }
   }
 
+  /**
+   * 停止录音并将结果发送到聊天界面
+   * @returns Promise<boolean> 是否成功停止录音
+   */
+  async stopRecordingAndSendToChat(): Promise<boolean> {
+    if (!this.isCallActive || !this.isRecording) {
+      return false
+    }
+
+    // 清除录音超时定时器
+    if (this.recordingTimeout) {
+      clearTimeout(this.recordingTimeout)
+      this.recordingTimeout = null
+    }
+
+    // 获取当前ASR服务类型
+    const { asrServiceType } = store.getState().settings
+
+    try {
+      // 立即设置录音状态为false，防止重复处理
+      this.isRecording = false
+      this.callbacks?.onListeningStateChange(false)
+
+      // 存储当前的语音识别结果，用于松开按钮后发送给AI
+      const currentTranscript = this._currentTranscript
+      // 存储累积的语音识别结果
+      const accumulatedTranscript = this._accumulatedTranscript
+
+      if (asrServiceType === 'browser') {
+        // 浏览器ASR
+        if (!this.recognition) {
+          throw new Error('Browser speech recognition not initialized')
+        }
+
+        this.recognition.stop()
+
+        // 优先使用累积的文本，如果有的话
+        if (accumulatedTranscript && accumulatedTranscript.trim()) {
+          console.log('发送累积的语音识别结果给聊天界面:', accumulatedTranscript)
+          this.handleUserSpeech(accumulatedTranscript, true)
+        } else if (currentTranscript && currentTranscript.trim()) {
+          // 如果没有累积结果，使用当前结果
+          console.log('没有累积结果，使用当前结果发送给聊天界面:', currentTranscript)
+          this.handleUserSpeech(currentTranscript, true)
+        } else {
+          console.log('没有有效的语音识别结果，不发送消息')
+          window.message.info({ content: '没有收到语音输入', key: 'voice-call-empty' })
+        }
+
+        // 清除状态
+        this._currentTranscript = ''
+        this._accumulatedTranscript = ''
+      } else if (asrServiceType === 'local') {
+        // 本地服务器ASR
+        // 创建一个承诺，等待最终结果
+        const finalResultPromise = new Promise<string>((resolve) => {
+          // 设置一个超时器，确保不会无限等待
+          const timeoutId = setTimeout(() => {
+            console.log('等待最终结果超时，使用当前结果')
+            resolve(this._currentTranscript)
+          }, 1500) // 1.5秒超时
+
+          // 设置回调函数来接收最终结果
+          const resultCallback = (text: string, isFinal?: boolean) => {
+            // 如果是空字符串，表示只是重置状态，不处理
+            if (text === '') return
+
+            if (text) {
+              // 只处理最终结果，忽略中间结果
+              if (isFinal) {
+                clearTimeout(timeoutId)
+                console.log('收到最终语音识别结果:', text)
+                this._currentTranscript = text
+                this.callbacks?.onTranscript(text)
+                resolve(text)
+              } else {
+                // 对于中间结果，只更新显示，不解析Promise
+                console.log('收到中间语音识别结果:', text)
+                this.callbacks?.onTranscript(text)
+              }
+            }
+          }
+
+          // 停止录音，但不取消，以获取最终结果
+          ASRService.stopRecording(resultCallback)
+
+          // 添加额外的安全措施，在停止后立即发送重置命令
+          setTimeout(() => {
+            // 发送重置命令，确保浏览器不会继续发送结果
+            ASRService.cancelRecording()
+
+            // 清除ASRService中的回调函数，防止后续结果被处理
+            ASRService.resultCallback = null
+          }, 2000) // 2秒后强制取消，作为安全措施
+        })
+
+        // 等待最终结果，但最多等待3秒
+        const finalText = await finalResultPromise
+
+        // 优先使用累积的文本，如果有的话
+        if (accumulatedTranscript && accumulatedTranscript.trim()) {
+          console.log('发送累积的语音识别结果给聊天界面:', accumulatedTranscript)
+          this.handleUserSpeech(accumulatedTranscript, true)
+        } else if (finalText && finalText.trim()) {
+          // 如果没有累积结果，使用最终结果
+          console.log('发送最终语音识别结果给聊天界面:', finalText)
+          this.handleUserSpeech(finalText, true)
+        } else if (currentTranscript && currentTranscript.trim()) {
+          // 如果没有最终结果，使用当前结果
+          console.log('没有最终结果，使用当前结果发送给聊天界面:', currentTranscript)
+          this.handleUserSpeech(currentTranscript, true)
+        } else {
+          console.log('没有有效的语音识别结果，不发送消息')
+          window.message.info({ content: '没有收到语音输入', key: 'voice-call-empty' })
+        }
+
+        // 再次确保所有状态被重置
+        this._currentTranscript = ''
+        this._accumulatedTranscript = ''
+      } else if (asrServiceType === 'openai') {
+        // OpenAI ASR
+        await ASRService.stopRecording((text) => {
+          // 更新最终的语音识别结果
+          if (text) {
+            this._currentTranscript = text
+            this.callbacks?.onTranscript(text)
+          }
+        })
+
+        // 使用最新的语音识别结果
+        const finalTranscript = this._currentTranscript
+        if (finalTranscript && finalTranscript.trim()) {
+          console.log('发送OpenAI语音识别结果给聊天界面:', finalTranscript)
+          this.handleUserSpeech(finalTranscript, true)
+        } else {
+          console.log('没有有效的OpenAI语音识别结果，不发送消息')
+          window.message.info({ content: '没有收到语音输入', key: 'voice-call-empty' })
+        }
+
+        // 清除状态
+        this._currentTranscript = ''
+        this._accumulatedTranscript = ''
+      }
+
+      return true
+    } catch (error) {
+      console.error('Failed to stop recording:', error)
+      this.isRecording = false
+      this.callbacks?.onListeningStateChange(false)
+
+      // 确保在出错时也清除状态
+      this._currentTranscript = ''
+      this._accumulatedTranscript = ''
+
+      // 强制取消录音
+      ASRService.cancelRecording()
+
+      return false
+    }
+  }
+
   /**
    * 取消录音，不发送给AI
    * @returns Promise<boolean> 是否成功取消录音
diff --git a/yarn.lock b/yarn.lock
index 996d2b3487..82585346f5 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -3991,6 +3991,7 @@ __metadata:
     rc-virtual-list: "npm:^3.18.5"
     react: "npm:^19.0.0"
     react-dom: "npm:^19.0.0"
+    react-draggable: "npm:^4.4.6"
     react-hotkeys-hook: "npm:^4.6.1"
     react-i18next: "npm:^14.1.2"
     react-infinite-scroll-component: "npm:^6.1.0"
@@ -5290,6 +5291,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"clsx@npm:^1.1.1":
+  version: 1.2.1
+  resolution: "clsx@npm:1.2.1"
+  checksum: 10c0/34dead8bee24f5e96f6e7937d711978380647e936a22e76380290e35486afd8634966ce300fc4b74a32f3762c7d4c0303f442c3e259f4ce02374eb0c82834f27
+  languageName: node
+  linkType: hard
+
 "code-point-at@npm:^1.0.0":
   version: 1.1.0
   resolution: "code-point-at@npm:1.1.0"
@@ -9511,7 +9519,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"js-tokens@npm:^4.0.0":
+"js-tokens@npm:^3.0.0 || ^4.0.0, js-tokens@npm:^4.0.0":
   version: 4.0.0
   resolution: "js-tokens@npm:4.0.0"
   checksum: 10c0/e248708d377aa058eacf2037b07ded847790e6de892bbad3dac0abba2e759cb9f121b00099a65195616badcb6eca8d14d975cb3e89eb1cfda644756402c8aeed
@@ -10186,6 +10194,17 @@ __metadata:
   languageName: node
   linkType: hard
 
+"loose-envify@npm:^1.4.0":
+  version: 1.4.0
+  resolution: "loose-envify@npm:1.4.0"
+  dependencies:
+    js-tokens: "npm:^3.0.0 || ^4.0.0"
+  bin:
+    loose-envify: cli.js
+  checksum: 10c0/655d110220983c1a4b9c0c679a2e8016d4b67f6e9c7b5435ff5979ecdb20d0813f4dec0a08674fcbdd4846a3f07edbb50a36811fd37930b94aaa0d9daceb017e
+  languageName: node
+  linkType: hard
+
 "lop@npm:^0.4.1":
   version: 0.4.2
   resolution: "lop@npm:0.4.2"
@@ -11985,7 +12004,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"object-assign@npm:^4, object-assign@npm:^4.0.1, object-assign@npm:^4.1.0":
+"object-assign@npm:^4, object-assign@npm:^4.0.1, object-assign@npm:^4.1.0, object-assign@npm:^4.1.1":
   version: 4.1.1
   resolution: "object-assign@npm:4.1.1"
   checksum: 10c0/1f4df9945120325d041ccf7b86f31e8bcc14e73d29171e37a7903050e96b81323784ec59f93f102ec635bcf6fa8034ba3ea0a8c7e69fa202b87ae3b6cec5a414
@@ -12998,6 +13017,17 @@ __metadata:
   languageName: node
   linkType: hard
 
+"prop-types@npm:^15.8.1":
+  version: 15.8.1
+  resolution: "prop-types@npm:15.8.1"
+  dependencies:
+    loose-envify: "npm:^1.4.0"
+    object-assign: "npm:^4.1.1"
+    react-is: "npm:^16.13.1"
+  checksum: 10c0/59ece7ca2fb9838031d73a48d4becb9a7cc1ed10e610517c7d8f19a1e02fa47f7c27d557d8a5702bec3cfeccddc853579832b43f449e54635803f277b1c78077
+  languageName: node
+  linkType: hard
+
 "property-information@npm:^6.0.0":
   version: 6.5.0
   resolution: "property-information@npm:6.5.0"
@@ -13716,6 +13746,19 @@ __metadata:
   languageName: node
   linkType: hard
 
+"react-draggable@npm:^4.4.6":
+  version: 4.4.6
+  resolution: "react-draggable@npm:4.4.6"
+  dependencies:
+    clsx: "npm:^1.1.1"
+    prop-types: "npm:^15.8.1"
+  peerDependencies:
+    react: ">= 16.3.0"
+    react-dom: ">= 16.3.0"
+  checksum: 10c0/1e8cf47414a8554caa68447e5f27749bc40e1eabb4806e2dadcb39ab081d263f517d6aaec5231677e6b425603037c7e3386d1549898f9ffcc98a86cabafb2b9a
+  languageName: node
+  linkType: hard
+
 "react-hotkeys-hook@npm:^4.6.1":
   version: 4.6.1
   resolution: "react-hotkeys-hook@npm:4.6.1"
@@ -13755,7 +13798,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"react-is@npm:^16.7.0":
+"react-is@npm:^16.13.1, react-is@npm:^16.7.0":
   version: 16.13.1
   resolution: "react-is@npm:16.13.1"
   checksum: 10c0/33977da7a5f1a287936a0c85639fec6ca74f4f15ef1e59a6bc20338fc73dc69555381e211f7a3529b8150a1f71e4225525b41b60b52965bda53ce7d47377ada1