From b2a0a029d2ea6c10e95d27f927b3880e726d84d3 Mon Sep 17 00:00:00 2001
From: 1600822305 <1600822305@qq.com>
Date: Fri, 11 Apr 2025 21:49:47 +0800
Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86=20TTS=20=E7=9B=B8?=
 =?UTF-8?q?=E5=85=B3=E6=9C=8D=E5=8A=A1=E5=B9=B6=E6=9B=B4=E6=96=B0=E4=BA=86?=
 =?UTF-8?q?=E8=AE=BE=E7=BD=AE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../components/DraggableVoiceCallWindow.tsx   | 836 ++++++++++--------
 src/renderer/src/i18n/locales/zh-cn.json      |   7 +-
 .../src/pages/home/Messages/Message.tsx       |  63 +-
 3 files changed, 544 insertions(+), 362 deletions(-)
diff --git a/src/renderer/src/components/DraggableVoiceCallWindow.tsx b/src/renderer/src/components/DraggableVoiceCallWindow.tsx
index 0194f31a1b..beae254619 100644
--- a/src/renderer/src/components/DraggableVoiceCallWindow.tsx
+++ b/src/renderer/src/components/DraggableVoiceCallWindow.tsx
@@ -5,10 +5,11 @@ import {
   DragOutlined,
   PauseCircleOutlined,
   PlayCircleOutlined,
+  SettingOutlined,
   SoundOutlined
 } from '@ant-design/icons'
 import { Button, Space, Tooltip } from 'antd'
-import React, { useEffect, useRef, useState } from 'react'
+import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import { useDispatch } from 'react-redux'
 import styled from 'styled-components'
@@ -24,349 +25,7 @@ interface Props {
   onPositionChange?: (position: { x: number; y: number }) => void
 }
 
-const DraggableVoiceCallWindow: React.FC<Props> = ({
-  visible,
-  onClose,
-  position = { x: 20, y: 20 },
-  onPositionChange
-}) => {
-  const { t } = useTranslation()
-  const dispatch = useDispatch()
-  const [isDragging, setIsDragging] = useState(false)
-  const [currentPosition, setCurrentPosition] = useState(position)
-  const [dragOffset, setDragOffset] = useState({ x: 0, y: 0 })
-  const containerRef = useRef<HTMLDivElement>(null)
-
-  // 语音通话状态
-  const [transcript, setTranscript] = useState('')
-  const [isListening, setIsListening] = useState(false)
-  const [isSpeaking, setIsSpeaking] = useState(false)
-  const [isRecording, setIsRecording] = useState(false)
-  const [isProcessing, setIsProcessing] = useState(false)
-  const [isPaused, setIsPaused] = useState(false)
-  const [isMuted, setIsMuted] = useState(false)
-
-  // 使用useRef跟踪窗口是否已经初始化，避免重复初始化
-  const isInitializedRef = useRef(false)
-
-  // 使用useRef跟踪是否已经设置了状态，避免重复设置
-  const stateSetRef = useRef(false)
-
-  // 单独处理状态设置，只在visible变化时执行一次
-  useEffect(() => {
-    if (visible) {
-      // 只有在状态没有设置过的情况下，才设置其他状态
-      if (!stateSetRef.current) {
-        // 设置状态标记为已完成
-        stateSetRef.current = true
-
-        // 更新语音通话窗口状态
-        dispatch(setIsVoiceCallActive(true))
-        // 重置最后播放的消息ID
-        dispatch(setLastPlayedMessageId(null))
-      }
-    } else if (!visible) {
-      // 当窗口关闭时重置状态标记
-      stateSetRef.current = false
-      isInitializedRef.current = false
-
-      // 更新语音通话窗口状态
-      dispatch(setIsVoiceCallActive(false))
-    }
-  }, [visible, dispatch])
-
-  // 单独的 useEffect 来设置 skipNextAutoTTS，确保每次窗口可见时都设置
-  useEffect(() => {
-    if (visible) {
-      // 每次窗口可见时，都设置跳过下一次自动TTS
-      console.log('设置 skipNextAutoTTS 为 true，确保打开窗口时不会自动播放最后一条消息')
-      // 使用 setTimeout 确保在所有消息组件渲染后设置
-      setTimeout(() => {
-        dispatch(setSkipNextAutoTTS(true))
-      }, 100)
-    }
-  }, [visible, dispatch])
-
-  // 处理语音通话初始化和清理
-  useEffect(() => {
-    // 添加TTS状态变化事件监听器
-    const handleTTSStateChange = (event: CustomEvent) => {
-      const { isPlaying } = event.detail
-      console.log('TTS状态变化事件:', isPlaying)
-      setIsSpeaking(isPlaying)
-    }
-
-    const startVoiceCall = async () => {
-      try {
-        // 显示加载中提示
-        window.message.loading({ content: t('voice_call.initializing'), key: 'voice-call-init' })
-
-        // 预先初始化语音识别服务
-        try {
-          await VoiceCallService.initialize()
-        } catch (initError) {
-          console.warn('语音识别服务初始化警告:', initError)
-          // 不抛出异常，允许程序继续运行
-        }
-
-        // 启动语音通话
-        await VoiceCallService.startCall({
-          onTranscript: (text) => setTranscript(text),
-          onResponse: () => {
-            // 这里不设置response，因为响应会显示在聊天界面中
-          },
-          onListeningStateChange: setIsListening,
-          onSpeakingStateChange: setIsSpeaking
-        })
-
-        // 关闭加载中提示
-        window.message.success({ content: t('voice_call.ready'), key: 'voice-call-init' })
-      } catch (error) {
-        console.error('Voice call error:', error)
-        window.message.error({ content: t('voice_call.error'), key: 'voice-call-init' })
-        onClose()
-      }
-    }
-
-    if (visible && !isInitializedRef.current) {
-      // 设置初始化标记为已完成
-      isInitializedRef.current = true
-
-      // 启动语音通话
-      startVoiceCall()
-      // 添加事件监听器
-      window.addEventListener('tts-state-change', handleTTSStateChange as EventListener)
-    }
-
-    return () => {
-      if (!visible) {
-        VoiceCallService.endCall()
-      }
-      // 移除事件监听器
-      window.removeEventListener('tts-state-change', handleTTSStateChange as EventListener)
-    }
-  }, [visible, t, onClose])
-
-  // 拖拽相关处理
-  const handleDragStart = (e: React.MouseEvent) => {
-    if (containerRef.current) {
-      setIsDragging(true)
-      const rect = containerRef.current.getBoundingClientRect()
-      setDragOffset({
-        x: e.clientX - rect.left,
-        y: e.clientY - rect.top
-      })
-    }
-  }
-
-  const handleDrag = (e: MouseEvent) => {
-    if (isDragging) {
-      const newPosition = {
-        x: e.clientX - dragOffset.x,
-        y: e.clientY - dragOffset.y
-      }
-      setCurrentPosition(newPosition)
-      onPositionChange?.(newPosition)
-    }
-  }
-
-  const handleDragEnd = () => {
-    setIsDragging(false)
-  }
-
-  useEffect(() => {
-    if (isDragging) {
-      document.addEventListener('mousemove', handleDrag)
-      document.addEventListener('mouseup', handleDragEnd)
-    }
-    return () => {
-      document.removeEventListener('mousemove', handleDrag)
-      document.removeEventListener('mouseup', handleDragEnd)
-    }
-  }, [isDragging, handleDrag])
-
-  // 语音通话相关处理
-  const toggleMute = () => {
-    setIsMuted(!isMuted)
-    VoiceCallService.setMuted(!isMuted)
-  }
-
-  const togglePause = () => {
-    const newPauseState = !isPaused
-    setIsPaused(newPauseState)
-    VoiceCallService.setPaused(newPauseState)
-  }
-
-  // 长按说话相关处理
-  const handleRecordStart = async (e: React.MouseEvent | React.TouchEvent) => {
-    e.preventDefault() // 防止触摸事件的默认行为
-
-    if (isProcessing || isPaused) return
-
-    // 先清除之前的语音识别结果
-    setTranscript('')
-
-    // 无论是否正在播放，都强制停止TTS
-    VoiceCallService.stopTTS()
-    setIsSpeaking(false)
-
-    // 更新UI状态
-    setIsRecording(true)
-    setIsProcessing(true) // 设置处理状态，防止重复点击
-
-    // 开始录音
-    try {
-      await VoiceCallService.startRecording()
-      console.log('开始录音')
-      setIsProcessing(false) // 录音开始后取消处理状态
-    } catch (error) {
-      console.error('开始录音出错:', error)
-      window.message.error({ content: '启动语音识别失败，请确保语音识别服务已启动', key: 'voice-call-error' })
-      setIsRecording(false)
-      setIsProcessing(false)
-    }
-  }
-
-  const handleRecordEnd = async (e: React.MouseEvent | React.TouchEvent) => {
-    e.preventDefault() // 防止触摸事件的默认行为
-
-    if (!isRecording) return
-
-    // 立即更新UI状态
-    setIsRecording(false)
-    setIsProcessing(true)
-
-    // 无论是否正在播放，都强制停止TTS
-    VoiceCallService.stopTTS()
-    setIsSpeaking(false)
-
-    // 确保录音完全停止
-    try {
-      // 传递 true 参数，表示将结果发送到聊天界面
-      const success = await VoiceCallService.stopRecordingAndSendToChat()
-      console.log('录音已停止，结果已发送到聊天界面', success ? '成功' : '失败')
-
-      if (success) {
-        // 显示成功消息
-        window.message.success({ content: '语音识别已完成，正在发送消息...', key: 'voice-call-send' })
-      } else {
-        // 显示失败消息
-        window.message.error({ content: '发送语音识别结果失败', key: 'voice-call-error' })
-      }
-    } catch (error) {
-      console.error('停止录音出错:', error)
-      window.message.error({ content: '停止录音出错', key: 'voice-call-error' })
-    } finally {
-      // 无论成功与否，都确保在一定时间后重置处理状态
-      setTimeout(() => {
-        setIsProcessing(false)
-      }, 1000) // 增加延迟时间，确保有足够时间处理结果
-    }
-  }
-
-  // 处理鼠标/触摸离开按钮的情况
-  const handleRecordCancel = async (e: React.MouseEvent | React.TouchEvent) => {
-    e.preventDefault()
-
-    if (isRecording) {
-      // 立即更新UI状态
-      setIsRecording(false)
-      setIsProcessing(true)
-
-      // 无论是否正在播放，都强制停止TTS
-      VoiceCallService.stopTTS()
-      setIsSpeaking(false)
-
-      // 取消录音，不发送给AI
-      try {
-        await VoiceCallService.cancelRecording()
-        console.log('录音已取消')
-
-        // 清除输入文本
-        setTranscript('')
-      } catch (error) {
-        console.error('取消录音出错:', error)
-      } finally {
-        // 无论成功与否，都确保在一定时间后重置处理状态
-        setTimeout(() => {
-          setIsProcessing(false)
-        }, 1000)
-      }
-    }
-  }
-
-  if (!visible) return null
-
-  return (
-    <Container
-      ref={containerRef}
-      style={{
-        left: `${currentPosition.x}px`,
-        top: `${currentPosition.y}px`,
-        position: 'fixed',
-        zIndex: 1000
-      }}>
-      <Header onMouseDown={handleDragStart}>
-        <DragOutlined style={{ cursor: 'move', marginRight: 8 }} />
-        {t('voice_call.title')}
-        <CloseButton onClick={onClose}>
-          <CloseOutlined />
-        </CloseButton>
-      </Header>
-
-      <Content>
-        <VisualizerContainer>
-          <VoiceVisualizer isActive={isListening || isRecording} type="input" />
-          <VoiceVisualizer isActive={isSpeaking} type="output" />
-        </VisualizerContainer>
-
-        <TranscriptContainer>
-          {transcript && (
-            <TranscriptText>
-              <UserLabel>{t('voice_call.you')}:</UserLabel> {transcript}
-            </TranscriptText>
-          )}
-        </TranscriptContainer>
-
-        <ControlsContainer>
-          <Space>
-            <Button
-              type="text"
-              icon={isMuted ? <AudioMutedOutlined /> : <AudioOutlined />}
-              onClick={toggleMute}
-              size="large"
-              title={isMuted ? t('voice_call.unmute') : t('voice_call.mute')}
-            />
-            <Button
-              type="text"
-              icon={isPaused ? <PlayCircleOutlined /> : <PauseCircleOutlined />}
-              onClick={togglePause}
-              size="large"
-              title={isPaused ? t('voice_call.resume') : t('voice_call.pause')}
-            />
-            <Tooltip title={t('voice_call.press_to_talk')}>
-              <RecordButton
-                type={isRecording ? 'primary' : 'default'}
-                icon={<SoundOutlined />}
-                onMouseDown={handleRecordStart}
-                onMouseUp={handleRecordEnd}
-                onMouseLeave={handleRecordCancel}
-                onTouchStart={handleRecordStart}
-                onTouchEnd={handleRecordEnd}
-                onTouchCancel={handleRecordCancel}
-                size="large"
-                disabled={isProcessing || isPaused}>
-                {isRecording ? t('voice_call.release_to_send') : t('voice_call.press_to_talk')}
-              </RecordButton>
-            </Tooltip>
-          </Space>
-        </ControlsContainer>
-      </Content>
-    </Container>
-  )
-}
-
-// 样式组件
+// --- 样式组件 ---
 const Container = styled.div`
   width: 300px;
   background-color: var(--color-background);
@@ -375,6 +34,13 @@ const Container = styled.div`
   overflow: hidden;
   display: flex;
   flex-direction: column;
+  transform-origin: top left;
+  will-change: transform;
+  position: fixed;
+  z-index: 1000;
+  left: 0;
+  top: 0;
+  cursor: default;
 `
 
 const Header = styled.div`
@@ -385,10 +51,35 @@ const Header = styled.div`
   display: flex;
   align-items: center;
   cursor: move;
+  user-select: none;
+  position: relative;
+
+  &::before {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: 0;
+    right: 0;
+    height: 4px;
+    background-color: rgba(255, 255, 255, 0.2);
+  }
+
+  &:hover::before {
+    background-color: rgba(255, 255, 255, 0.4);
+  }
+
+  .drag-icon {
+    margin-right: 8px; // DragOutlined 的样式
+  }
+
+  .settings-button {
+    margin-left: auto; // 推到最右边
+    color: white; // 设置按钮颜色
+  }
 `
 
 const CloseButton = styled.div`
-  margin-left: auto;
+  margin-left: 8px; // 与设置按钮保持间距
   cursor: pointer;
 `
 
@@ -435,4 +126,457 @@ const RecordButton = styled(Button)`
   min-width: 120px;
 `
 
-export default DraggableVoiceCallWindow
+// 设置面板的样式
+const SettingsPanel = styled.div`
+  margin-bottom: 10px;
+  padding: 10px;
+  border: 1px solid var(--color-border);
+  border-radius: 8px;
+`
+
+const SettingsTitle = styled.div`
+  margin-bottom: 8px;
+`
+
+const ShortcutKeyButton = styled(Button)`
+  min-width: 120px;
+`
+
+const SettingsTip = styled.div`
+  margin-top: 8px;
+  font-size: 12px;
+  color: var(--color-text-secondary);
+`
+// --- 样式组件结束 ---
+
+const DraggableVoiceCallWindow: React.FC<Props> = ({
+  visible,
+  onClose,
+  position = { x: 20, y: 20 },
+  onPositionChange
+}) => {
+  const { t } = useTranslation()
+  const dispatch = useDispatch()
+  const [isDragging, setIsDragging] = useState(false)
+  const [currentPosition, setCurrentPosition] = useState(position)
+  const dragStartRef = useRef<{ startX: number; startY: number; initialX: number; initialY: number } | null>(null)
+  const containerRef = useRef<HTMLDivElement>(null)
+
+  // --- 语音通话状态 ---
+  const [transcript, setTranscript] = useState('')
+  const [isListening, setIsListening] = useState(false)
+  const [isSpeaking, setIsSpeaking] = useState(false)
+  const [isRecording, setIsRecording] = useState(false)
+  const [isProcessing, setIsProcessing] = useState(false)
+  const [isPaused, setIsPaused] = useState(false)
+  const [isMuted, setIsMuted] = useState(false)
+  // --- 语音通话状态结束 ---
+
+  // --- 快捷键相关状态 ---
+  const [shortcutKey, setShortcutKey] = useState('Space')
+  const [isShortcutPressed, setIsShortcutPressed] = useState(false)
+  const [isSettingsVisible, setIsSettingsVisible] = useState(false)
+  const [tempShortcutKey, setTempShortcutKey] = useState(shortcutKey)
+  const [isRecordingShortcut, setIsRecordingShortcut] = useState(false)
+  // --- 快捷键相关状态结束 ---
+
+  const isInitializedRef = useRef(false)
+
+  // --- 拖拽逻辑 ---
+  const handleDragStart = useCallback(
+    (e: React.MouseEvent) => {
+      if ((e.target as HTMLElement).closest('button, input, a')) {
+        return
+      }
+      e.preventDefault()
+      setIsDragging(true)
+      dragStartRef.current = {
+        startX: e.clientX,
+        startY: e.clientY,
+        initialX: currentPosition.x,
+        initialY: currentPosition.y
+      }
+    },
+    [currentPosition]
+  )
+
+  const handleDrag = useCallback(
+    (e: MouseEvent) => {
+      if (!isDragging || !dragStartRef.current) return
+      e.preventDefault()
+
+      const deltaX = e.clientX - dragStartRef.current.startX
+      const deltaY = e.clientY - dragStartRef.current.startY
+
+      let newX = dragStartRef.current.initialX + deltaX
+      let newY = dragStartRef.current.initialY + deltaY
+
+      const windowWidth = window.innerWidth
+      const windowHeight = window.innerHeight
+      const containerWidth = containerRef.current?.offsetWidth || 300
+      const containerHeight = containerRef.current?.offsetHeight || 300
+
+      newX = Math.max(0, Math.min(newX, windowWidth - containerWidth))
+      newY = Math.max(0, Math.min(newY, windowHeight - containerHeight))
+
+      const newPosition = { x: newX, y: newY }
+      setCurrentPosition(newPosition)
+      onPositionChange?.(newPosition)
+    },
+    [isDragging, onPositionChange]
+  )
+
+  const handleDragEnd = useCallback(
+    (e: MouseEvent) => {
+      if (isDragging) {
+        e.preventDefault()
+        setIsDragging(false)
+        dragStartRef.current = null
+      }
+    },
+    [isDragging] // 移除了 currentPosition 依赖，因为它只在 handleDragStart 中读取一次
+  )
+
+  const throttle = useMemo(() => {
+    let lastCall = 0
+    const delay = 16 // ~60fps
+    return (func: (e: MouseEvent) => void) => {
+      return (e: MouseEvent) => {
+        const now = new Date().getTime()
+        if (now - lastCall < delay) {
+          return
+        }
+        lastCall = now
+        func(e)
+      }
+    }
+  }, [])
+
+  const throttledHandleDrag = useMemo(() => throttle(handleDrag), [handleDrag, throttle])
+
+  useEffect(() => {
+    if (isDragging) {
+      document.addEventListener('mousemove', throttledHandleDrag)
+      document.addEventListener('mouseup', handleDragEnd)
+      document.body.style.cursor = 'move'
+    } else {
+      document.removeEventListener('mousemove', throttledHandleDrag)
+      document.removeEventListener('mouseup', handleDragEnd)
+      document.body.style.cursor = 'default'
+    }
+    return () => {
+      document.removeEventListener('mousemove', throttledHandleDrag)
+      document.removeEventListener('mouseup', handleDragEnd)
+      document.body.style.cursor = 'default'
+    }
+  }, [isDragging, throttledHandleDrag, handleDragEnd])
+  // --- 拖拽逻辑结束 ---
+
+  // --- 状态和副作用管理 ---
+  useEffect(() => {
+    const handleTTSStateChange = (event: CustomEvent) => {
+      const { isPlaying } = event.detail
+      setIsSpeaking(isPlaying)
+    }
+
+    const startVoiceCall = async () => {
+      try {
+        window.message.loading({ content: t('voice_call.initializing'), key: 'voice-call-init' })
+        try {
+          await VoiceCallService.initialize()
+        } catch (initError) {
+          console.warn('语音识别服务初始化警告:', initError)
+        }
+        await VoiceCallService.startCall({
+          onTranscript: setTranscript,
+          onResponse: () => { /* 响应在聊天界面处理 */ },
+          onListeningStateChange: setIsListening,
+          onSpeakingStateChange: setIsSpeaking
+        })
+        window.message.success({ content: t('voice_call.ready'), key: 'voice-call-init' })
+        isInitializedRef.current = true
+      } catch (error) {
+        console.error('Voice call error:', error)
+        window.message.error({ content: t('voice_call.error'), key: 'voice-call-init' })
+        onClose()
+      }
+    }
+
+    if (visible) {
+      dispatch(setIsVoiceCallActive(true))
+      dispatch(setLastPlayedMessageId(null))
+      dispatch(setSkipNextAutoTTS(true))
+      if (!isInitializedRef.current) {
+        startVoiceCall()
+      }
+      window.addEventListener('tts-state-change', handleTTSStateChange as EventListener)
+    } else if (!visible && isInitializedRef.current) {
+      dispatch(setIsVoiceCallActive(false))
+      dispatch(setSkipNextAutoTTS(false))
+      VoiceCallService.endCall()
+      setTranscript('')
+      setIsListening(false)
+      setIsSpeaking(false)
+      setIsRecording(false)
+      setIsProcessing(false)
+      setIsPaused(false)
+      setIsMuted(false)
+      isInitializedRef.current = false
+      window.removeEventListener('tts-state-change', handleTTSStateChange as EventListener)
+    }
+
+    return () => {
+      window.removeEventListener('tts-state-change', handleTTSStateChange as EventListener)
+    }
+  }, [visible, dispatch, t, onClose])
+  // --- 状态和副作用管理结束 ---
+
+  // --- 语音通话控制函数 ---
+  const toggleMute = useCallback(() => {
+    const newMuteState = !isMuted
+    setIsMuted(newMuteState)
+    VoiceCallService.setMuted(newMuteState)
+  }, [isMuted]) // 添加依赖
+
+  const togglePause = useCallback(() => {
+    const newPauseState = !isPaused
+    setIsPaused(newPauseState)
+    VoiceCallService.setPaused(newPauseState)
+  }, [isPaused]) // 添加依赖
+
+  // !! 将这些函数定义移到 handleKeyDown/handleKeyUp 之前 !!
+  const handleRecordStart = useCallback(
+    async (e: React.MouseEvent | React.TouchEvent | KeyboardEvent) => {
+      e.preventDefault()
+      if (isProcessing || isPaused) return
+      setTranscript('')
+      VoiceCallService.stopTTS()
+      setIsSpeaking(false)
+      setIsRecording(true)
+      setIsProcessing(true)
+      try {
+        await VoiceCallService.startRecording()
+        setIsProcessing(false)
+      } catch (error) {
+        window.message.error({ content: '启动语音识别失败，请确保语音识别服务已启动', key: 'voice-call-error' })
+        setIsRecording(false)
+        setIsProcessing(false)
+      }
+    },
+    [isProcessing, isPaused]
+  )
+
+  const handleRecordEnd = useCallback(
+    async (e: React.MouseEvent | React.TouchEvent | KeyboardEvent) => {
+      e.preventDefault()
+      if (!isRecording) return
+      setIsRecording(false)
+      setIsProcessing(true)
+      VoiceCallService.stopTTS()
+      setIsSpeaking(false)
+      try {
+        const success = await VoiceCallService.stopRecordingAndSendToChat()
+        if (success) {
+          window.message.success({ content: '语音识别已完成，正在发送消息...', key: 'voice-call-send' })
+        } else {
+          window.message.error({ content: '发送语音识别结果失败', key: 'voice-call-error' })
+        }
+      } catch (error) {
+        window.message.error({ content: '停止录音出错', key: 'voice-call-error' })
+      } finally {
+        setTimeout(() => setIsProcessing(false), 500)
+      }
+    },
+    [isRecording]
+  )
+
+  const handleRecordCancel = useCallback(
+    async (e: React.MouseEvent | React.TouchEvent | KeyboardEvent) => {
+      e.preventDefault()
+      if (isRecording) {
+        setIsRecording(false)
+        setIsProcessing(true)
+        VoiceCallService.stopTTS()
+        setIsSpeaking(false)
+        try {
+          await VoiceCallService.cancelRecording()
+          setTranscript('')
+        } catch (error) {
+          console.error('取消录音出错:', error);
+        } finally {
+          setTimeout(() => setIsProcessing(false), 500)
+        }
+      }
+    },
+    [isRecording]
+  )
+  // --- 语音通话控制函数结束 ---
+
+
+  // --- 快捷键相关函数 ---
+  const getKeyDisplayName = (keyCode: string) => {
+    const keyMap: Record<string, string> = {
+      Space: '空格键', Enter: '回车键', ShiftLeft: '左Shift键', ShiftRight: '右Shift键',
+      ControlLeft: '左Ctrl键', ControlRight: '右Ctrl键', AltLeft: '左Alt键', AltRight: '右Alt键'
+    }
+    return keyMap[keyCode] || keyCode
+  }
+
+  const handleShortcutKeyChange = useCallback((e: KeyboardEvent) => {
+    e.preventDefault()
+    if (isRecordingShortcut) {
+      setTempShortcutKey(e.code)
+      setIsRecordingShortcut(false)
+    }
+  }, [isRecordingShortcut])
+
+  const saveShortcutKey = useCallback(() => {
+    setShortcutKey(tempShortcutKey)
+    localStorage.setItem('voiceCallShortcutKey', tempShortcutKey)
+    setIsSettingsVisible(false)
+  }, [tempShortcutKey])
+
+  // 现在可以安全地使用 handleRecordStart/End
+  const handleKeyDown = useCallback((e: KeyboardEvent) => {
+    if (isRecordingShortcut) {
+      handleShortcutKeyChange(e)
+      return
+    }
+    if (e.code === shortcutKey && !isProcessing && !isPaused && visible && !isShortcutPressed) {
+      e.preventDefault()
+      setIsShortcutPressed(true)
+      const mockEvent = new MouseEvent('mousedown') as unknown as React.MouseEvent // 类型断言
+      handleRecordStart(mockEvent) // 现在 handleRecordStart 已经定义
+    }
+  }, [
+    shortcutKey, isProcessing, isPaused, visible, isShortcutPressed,
+    handleRecordStart, // 依赖项
+    isRecordingShortcut, handleShortcutKeyChange
+  ])
+
+  const handleKeyUp = useCallback((e: KeyboardEvent) => {
+    if (e.code === shortcutKey && isShortcutPressed && visible) {
+      e.preventDefault()
+      setIsShortcutPressed(false)
+      const mockEvent = new MouseEvent('mouseup') as unknown as React.MouseEvent // 类型断言
+      handleRecordEnd(mockEvent) // 现在 handleRecordEnd 已经定义
+    }
+  }, [shortcutKey, isShortcutPressed, visible, handleRecordEnd]) // 依赖项
+
+  useEffect(() => {
+    const savedShortcut = localStorage.getItem('voiceCallShortcutKey')
+    if (savedShortcut) {
+      setShortcutKey(savedShortcut)
+      setTempShortcutKey(savedShortcut)
+    }
+  }, [])
+
+  useEffect(() => {
+    if (visible) {
+      window.addEventListener('keydown', handleKeyDown)
+      window.addEventListener('keyup', handleKeyUp)
+    }
+    return () => {
+      window.removeEventListener('keydown', handleKeyDown)
+      window.removeEventListener('keyup', handleKeyUp)
+    }
+  }, [visible, handleKeyDown, handleKeyUp])
+  // --- 快捷键相关函数结束 ---
+
+
+  // 如果不可见，直接返回 null
+  if (!visible) return null
+
+  // --- JSX 渲染 ---
+  return (
+    <Container
+      ref={containerRef}
+      style={{
+        transform: `translate(${currentPosition.x}px, ${currentPosition.y}px)` // 使用 transform 定位
+      }}>
+      {/* 将 onMouseDown 移到 Header 上 */}
+      <Header onMouseDown={handleDragStart}>
+        <DragOutlined className="drag-icon" /> {/* 应用样式类 */}
+        {t('voice_call.title')}
+        <Button
+          type="text"
+          icon={<SettingOutlined />}
+          onClick={() => setIsSettingsVisible(!isSettingsVisible)}
+          className="settings-button" // 应用样式类
+        />
+        <CloseButton onClick={onClose}>
+          <CloseOutlined />
+        </CloseButton>
+      </Header>
+
+      <Content>
+        {isSettingsVisible && (
+          <SettingsPanel> {/* 使用 styled-component */}
+            <SettingsTitle>{t('voice_call.shortcut_key_setting')}</SettingsTitle> {/* 使用 styled-component */}
+            <Space>
+              <ShortcutKeyButton onClick={() => setIsRecordingShortcut(true)}> {/* 使用 styled-component */}
+                {isRecordingShortcut ? t('voice_call.press_any_key') : getKeyDisplayName(tempShortcutKey)}
+              </ShortcutKeyButton>
+              <Button type="primary" onClick={saveShortcutKey}>
+                {t('voice_call.save')}
+              </Button>
+              <Button onClick={() => setIsSettingsVisible(false)}>{t('voice_call.cancel')}</Button>
+            </Space>
+            <SettingsTip> {/* 使用 styled-component */}
+              {t('voice_call.shortcut_key_tip')}
+            </SettingsTip>
+          </SettingsPanel>
+        )}
+        <VisualizerContainer>
+          <VoiceVisualizer isActive={isListening || isRecording} type="input" />
+          <VoiceVisualizer isActive={isSpeaking} type="output" />
+        </VisualizerContainer>
+
+        <TranscriptContainer>
+          {transcript && (
+            <TranscriptText>
+              <UserLabel>{t('voice_call.you')}:</UserLabel> {transcript}
+            </TranscriptText>
+          )}
+          {/* 可以在这里添加 AI 回复的显示 */}
+        </TranscriptContainer>
+
+        <ControlsContainer>
+          <Space>
+            <Button
+              type="text"
+              icon={isMuted ? <AudioMutedOutlined /> : <AudioOutlined />}
+              onClick={toggleMute}
+              size="large"
+              title={isMuted ? t('voice_call.unmute') : t('voice_call.mute')}
+            />
+            <Button
+              type="text"
+              icon={isPaused ? <PlayCircleOutlined /> : <PauseCircleOutlined />}
+              onClick={togglePause}
+              size="large"
+              title={isPaused ? t('voice_call.resume') : t('voice_call.pause')}
+            />
+            <Tooltip title={`${t('voice_call.press_to_talk')} (${getKeyDisplayName(shortcutKey)})`}>
+              <RecordButton
+                type={isRecording ? 'primary' : 'default'}
+                icon={<SoundOutlined />}
+                onMouseDown={handleRecordStart}
+                onMouseUp={handleRecordEnd}
+                onMouseLeave={handleRecordCancel}
+                onTouchStart={handleRecordStart}
+                onTouchEnd={handleRecordEnd}
+                onTouchCancel={handleRecordCancel}
+                size="large"
+                disabled={isProcessing || isPaused}>
+                {isRecording ? t('voice_call.release_to_send') : t('voice_call.press_to_talk')}
+              </RecordButton>
+            </Tooltip>
+          </Space>
+        </ControlsContainer>
+      </Content>
+    </Container>
+  )
+}
+
+export default DraggableVoiceCallWindow
\ No newline at end of file
diff --git a/src/renderer/src/i18n/locales/zh-cn.json b/src/renderer/src/i18n/locales/zh-cn.json
index 516b237a7c..0e78497cc8 100644
--- a/src/renderer/src/i18n/locales/zh-cn.json
+++ b/src/renderer/src/i18n/locales/zh-cn.json
@@ -15,7 +15,12 @@
       "initialization_failed": "初始化语音通话失败",
       "error": "语音通话出错",
       "initializing": "正在初始化语音通话...",
-      "ready": "语音通话已就绪"
+      "ready": "语音通话已就绪",
+      "shortcut_key_setting": "语音识别快捷键设置",
+      "press_any_key": "请按任意键...",
+      "save": "保存",
+      "cancel": "取消",
+      "shortcut_key_tip": "按下此快捷键开始录音，松开快捷键结束录音并发送"
     },
     "agents": {
       "add.button": "添加到助手",
diff --git a/src/renderer/src/pages/home/Messages/Message.tsx b/src/renderer/src/pages/home/Messages/Message.tsx
index b675789b77..565e4b3723 100644
--- a/src/renderer/src/pages/home/Messages/Message.tsx
+++ b/src/renderer/src/pages/home/Messages/Message.tsx
@@ -121,22 +121,35 @@ const MessageItem: FC<Props> = ({
     ) {
       console.log('新消息生成完成，消息ID:', message.id)
 
-      // 如果 skipNextAutoTTS 为 true，重置为 false，以便下一条消息可以自动播放
-      if (skipNextAutoTTS) {
-        console.log('重置 skipNextAutoTTS 为 false，以便下一条消息可以自动播放')
+      // 当新消息生成完成时，始终重置 skipNextAutoTTS 为 false
+      // 这样确保新生成的消息可以自动播放
+      console.log('新消息生成完成，重置 skipNextAutoTTS 为 false')
+      dispatch(setSkipNextAutoTTS(false))
+    }
+  }, [isLastMessage, isAssistantMessage, message.status, message.id, generating, dispatch, prevGeneratingRef])
+
+  // 当消息内容变化时，重置 skipNextAutoTTS
+  useEffect(() => {
+    // 如果是最后一条助手消息，且消息状态为成功，且消息内容不为空
+    if (
+      isLastMessage &&
+      isAssistantMessage &&
+      message.status === 'success' &&
+      message.content &&
+      message.content.trim()
+    ) {
+      // 如果是新生成的消息，重置 skipNextAutoTTS 为 false
+      if (message.id !== lastPlayedMessageId) {
+        console.log(
+          '检测到新消息，重置 skipNextAutoTTS 为 false，消息ID:',
+          message.id,
+          '消息内容前20个字符:',
+          message.content?.substring(0, 20)
+        )
         dispatch(setSkipNextAutoTTS(false))
       }
     }
-  }, [
-    isLastMessage,
-    isAssistantMessage,
-    message.status,
-    message.id,
-    generating,
-    skipNextAutoTTS,
-    dispatch,
-    prevGeneratingRef
-  ])
+  }, [isLastMessage, isAssistantMessage, message.status, message.content, message.id, lastPlayedMessageId, dispatch])
 
   // 自动播放TTS的逻辑
   useEffect(() => {
@@ -151,12 +164,32 @@ const MessageItem: FC<Props> = ({
     ) {
       // 检查是否需要跳过自动TTS
       if (skipNextAutoTTS) {
-        console.log('跳过自动TTS，因为 skipNextAutoTTS 为 true，消息ID:', message.id)
+        console.log(
+          '跳过自动TTS，因为 skipNextAutoTTS 为 true，消息ID:',
+          message.id,
+          '消息内容前20个字符:',
+          message.content?.substring(0, 20),
+          '消息状态:',
+          message.status,
+          '是否最后一条消息:',
+          isLastMessage,
+          '是否助手消息:',
+          isAssistantMessage,
+          '是否正在生成中:',
+          generating,
+          '语音通话窗口状态:',
+          isVoiceCallActive
+        )
         // 注意：不在这里重置 skipNextAutoTTS，而是在新消息生成时重置
         return
       }
 
-      console.log('准备自动播放TTS，因为 skipNextAutoTTS 为 false，消息ID:', message.id)
+      console.log(
+        '准备自动播放TTS，因为 skipNextAutoTTS 为 false，消息ID:',
+        message.id,
+        '消息内容前20个字符:',
+        message.content?.substring(0, 20)
+      )
 
       // 检查消息是否有内容，且消息是新的（不是上次播放过的消息）
       if (message.content && message.content.trim() && message.id !== lastPlayedMessageId) {