TTS语音通话功能

2025-12-25 11:20:07 +08:00 · 2025-04-11 16:02:17 +08:00 · 2025-04-11 16:02:17 +08:00 · 120282ea87
commit 120282ea87
parent a8f18caf0e
9 changed files with 465 additions and 71 deletions
--- a/src/renderer/src/components/VoiceCallModal.tsx
+++ b/src/renderer/src/components/VoiceCallModal.tsx
@ -32,25 +32,51 @@ const VoiceCallModal: React.FC<Props> = ({ visible, onClose }) => {
  useEffect(() => {
    const startVoiceCall = async () => {
      try {
+        // 显示加载中提示
+        window.message.loading({ content: t('voice_call.initializing'), key: 'voice-call-init' });
+
+        // 预先初始化语音识别服务
+        try {
+          await VoiceCallService.initialize();
+        } catch (initError) {
+          console.warn('语音识别服务初始化警告:', initError);
+          // 不抛出异常，允许程序继续运行
+        }
+
+        // 启动语音通话
        await VoiceCallService.startCall({
          onTranscript: (text) => setTranscript(text),
          onResponse: (text) => setResponse(text),
          onListeningStateChange: setIsListening,
          onSpeakingStateChange: setIsSpeaking,
        });
+
+        // 关闭加载中提示
+        window.message.success({ content: t('voice_call.ready'), key: 'voice-call-init' });
      } catch (error) {
        console.error('Voice call error:', error);
-        window.message.error(t('voice_call.error'));
+        window.message.error({ content: t('voice_call.error'), key: 'voice-call-init' });
        handleClose();
      }
    };

+    // 添加TTS状态变化事件监听器
+    const handleTTSStateChange = (event: CustomEvent) => {
+      const { isPlaying } = event.detail;
+      console.log('TTS状态变化事件:', isPlaying);
+      setIsSpeaking(isPlaying);
+    };
+
    if (visible) {
      startVoiceCall();
+      // 添加事件监听器
+      window.addEventListener('tts-state-change', handleTTSStateChange as EventListener);
    }

    return () => {
      VoiceCallService.endCall();
+      // 移除事件监听器
+      window.removeEventListener('tts-state-change', handleTTSStateChange as EventListener);
    };
  }, [visible, t]);

@ -77,8 +103,28 @@ const VoiceCallModal: React.FC<Props> = ({ visible, onClose }) => {

    if (isProcessing || isPaused) return;

+    // 先清除之前的语音识别结果
+    setTranscript('');
+
+    // 无论是否正在播放，都强制停止TTS
+    VoiceCallService.stopTTS();
+    setIsSpeaking(false);
+
+    // 更新UI状态
    setIsRecording(true);
-    await VoiceCallService.startRecording();
+    setIsProcessing(true); // 设置处理状态，防止重复点击
+
+    // 开始录音
+    try {
+      await VoiceCallService.startRecording();
+      console.log('开始录音');
+      setIsProcessing(false); // 录音开始后取消处理状态
+    } catch (error) {
+      console.error('开始录音出错:', error);
+      window.message.error({ content: '启动语音识别失败，请确保语音识别服务已启动', key: 'voice-call-error' });
+      setIsRecording(false);
+      setIsProcessing(false);
+    }
  };

  const handleRecordEnd = async (e: React.MouseEvent | React.TouchEvent) => {
@ -90,18 +136,22 @@ const VoiceCallModal: React.FC<Props> = ({ visible, onClose }) => {
    setIsRecording(false);
    setIsProcessing(true);

+    // 无论是否正在播放，都强制停止TTS
+    VoiceCallService.stopTTS();
+    setIsSpeaking(false);
+
    // 确保录音完全停止
    try {
      await VoiceCallService.stopRecording();
      console.log('录音已停止');
    } catch (error) {
      console.error('停止录音出错:', error);
+    } finally {
+      // 无论成功与否，都确保在一定时间后重置处理状态
+      setTimeout(() => {
+        setIsProcessing(false);
+      }, 1000); // 增加延迟时间，确保有足够时间处理结果
    }
-
-    // 处理结果会通过回调函数返回，不需要在这里处理
-    setTimeout(() => {
-      setIsProcessing(false);
-    }, 500); // 添加短暂延迟，防止用户立即再次点击
  };

  // 处理鼠标/触摸离开按钮的情况
@ -113,17 +163,25 @@ const VoiceCallModal: React.FC<Props> = ({ visible, onClose }) => {
      setIsRecording(false);
      setIsProcessing(true);

+      // 无论是否正在播放，都强制停止TTS
+      VoiceCallService.stopTTS();
+      setIsSpeaking(false);
+
      // 取消录音，不发送给AI
      try {
        await VoiceCallService.cancelRecording();
        console.log('录音已取消');
+
+        // 清除输入文本
+        setTranscript('');
      } catch (error) {
        console.error('取消录音出错:', error);
+      } finally {
+        // 无论成功与否，都确保在一定时间后重置处理状态
+        setTimeout(() => {
+          setIsProcessing(false);
+        }, 1000);
      }
-
-      setTimeout(() => {
-        setIsProcessing(false);
-      }, 500);
    }
  };

--- a/src/renderer/src/i18n/locales/en-us.json
+++ b/src/renderer/src/i18n/locales/en-us.json
@ -13,7 +13,9 @@
      "press_to_talk": "Press to Talk",
      "release_to_send": "Release to Send",
      "initialization_failed": "Failed to initialize voice call",
-      "error": "Voice call error"
+      "error": "Voice call error",
+      "initializing": "Initializing voice call...",
+      "ready": "Voice call ready"
    },
    "agents": {
      "add.button": "Add to Assistant",
@ -1479,9 +1481,21 @@
        }
      },
      "voice": {
-        "title": "[to be translated]:语音功能",
-        "help": "[to be translated]:语音功能包括文本转语音(TTS)和语音识别(ASR)。",
-        "learn_more": "[to be translated]:了解更多"
+        "title": "Voice Features",
+        "help": "Voice features include Text-to-Speech (TTS), Automatic Speech Recognition (ASR), and Voice Call.",
+        "learn_more": "Learn More"
+      },
+      "voice_call": {
+        "tab_title": "Voice Call",
+        "enable": "Enable Voice Call",
+        "enable.help": "Enable to use voice call feature to talk with AI",
+        "model": "Call Model",
+        "model.select": "Select Model",
+        "model.current": "Current Model: {{model}}",
+        "model.info": "Select the AI model for voice calls. Different models may provide different voice interaction experiences",
+        "asr_tts_info": "Voice call uses the Speech Recognition (ASR) and Text-to-Speech (TTS) settings above",
+        "test": "Test Voice Call",
+        "test_info": "Please use the voice call button on the right side of the input box to test"
      }
    },
    "translate": {
--- a/src/renderer/src/i18n/locales/zh-cn.json
+++ b/src/renderer/src/i18n/locales/zh-cn.json
@ -13,7 +13,9 @@
      "press_to_talk": "长按说话",
      "release_to_send": "松开发送",
      "initialization_failed": "初始化语音通话失败",
-      "error": "语音通话出错"
+      "error": "语音通话出错",
+      "initializing": "正在初始化语音通话...",
+      "ready": "语音通话已就绪"
    },
    "agents": {
      "add.button": "添加到助手",
@ -1354,7 +1356,7 @@
      },
      "voice": {
        "title": "语音功能",
-        "help": "语音功能包括文本转语音(TTS)和语音识别(ASR)。",
+        "help": "语音功能包括文本转语音(TTS)、语音识别(ASR)和语音通话。",
        "learn_more": "了解更多"
      },
      "tts": {
@ -1482,6 +1484,18 @@
          "start_failed": "开始录音失败",
          "transcribe_failed": "语音识别失败"
        }
+      },
+      "voice_call": {
+        "tab_title": "通话功能",
+        "enable": "启用语音通话",
+        "enable.help": "启用后可以使用语音通话功能与AI进行对话",
+        "model": "通话模型",
+        "model.select": "选择模型",
+        "model.current": "当前模型: {{model}}",
+        "model.info": "选择用于语音通话的AI模型，不同模型可能有不同的语音交互体验",
+        "asr_tts_info": "语音通话使用上面的语音识别(ASR)和语音合成(TTS)设置",
+        "test": "测试通话",
+        "test_info": "请使用输入框右侧的语音通话按钮进行测试"
      }
    },
    "translate": {
--- a/src/renderer/src/pages/settings/TTSSettings/TTSSettings.tsx
+++ b/src/renderer/src/pages/settings/TTSSettings/TTSSettings.tsx
@ -1,4 +1,4 @@
-import { AudioOutlined, PlusOutlined, ReloadOutlined, SoundOutlined } from '@ant-design/icons'
+import { AudioOutlined, PhoneOutlined, PlusOutlined, ReloadOutlined, SoundOutlined } from '@ant-design/icons'
 import { useTheme } from '@renderer/context/ThemeProvider'
 import TTSService from '@renderer/services/TTSService'
 import store, { useAppDispatch } from '@renderer/store'
@ -41,6 +41,7 @@ import {
  SettingTitle
 } from '..'
 import ASRSettings from './ASRSettings'
+import VoiceCallSettings from './VoiceCallSettings'

 // 预定义的浏览器 TTS音色列表
 const PREDEFINED_VOICES = [
@ -986,6 +987,15 @@ const TTSSettings: FC = () => {
              </span>
            ),
            children: <ASRSettings />
+          },
+          {
+            key: 'voice_call',
+            label: (
+              <span>
+                <PhoneOutlined /> {t('settings.voice_call.tab_title')}
+              </span>
+            ),
+            children: <VoiceCallSettings />
          }
        ]}
      />
--- a/src/renderer/src/pages/settings/TTSSettings/VoiceCallSettings.tsx
+++ b/src/renderer/src/pages/settings/TTSSettings/VoiceCallSettings.tsx
@ -0,0 +1,140 @@
+import { InfoCircleOutlined, PhoneOutlined } from '@ant-design/icons'
+import { getModelLogo } from '@renderer/config/models'
+import SelectModelPopup from '@renderer/components/Popups/SelectModelPopup'
+import { useAppDispatch } from '@renderer/store'
+import { setVoiceCallEnabled, setVoiceCallModel } from '@renderer/store/settings'
+import { Button, Form, Space, Switch, Tooltip as AntTooltip } from 'antd'
+import { FC, useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import { useSelector } from 'react-redux'
+import styled from 'styled-components'
+
+const VoiceCallSettings: FC = () => {
+  const { t } = useTranslation()
+  const dispatch = useAppDispatch()
+
+  // 从 Redux 获取通话功能设置
+  const voiceCallEnabled = useSelector((state: any) => state.settings.voiceCallEnabled ?? true)
+  const voiceCallModel = useSelector((state: any) => state.settings.voiceCallModel)
+
+  // 模型选择状态
+  const [, setIsSelectingModel] = useState(false)
+
+  // 选择模型
+  const handleSelectModel = async () => {
+    setIsSelectingModel(true)
+    try {
+      const model = await SelectModelPopup.show({})
+      if (model) {
+        dispatch(setVoiceCallModel(model))
+      }
+    } catch (error) {
+      console.error('选择模型失败:', error)
+    } finally {
+      setIsSelectingModel(false)
+    }
+  }
+
+  return (
+    <Container>
+      <Form layout="vertical">
+        {/* 通话功能开关 */}
+        <Form.Item>
+          <Space>
+            <Switch checked={voiceCallEnabled} onChange={(checked) => dispatch(setVoiceCallEnabled(checked))} />
+            <span>{t('settings.voice_call.enable')}</span>
+            <AntTooltip title={t('settings.voice_call.enable.help')}>
+              <InfoCircleOutlined style={{ color: 'var(--color-text-3)' }} />
+            </AntTooltip>
+          </Space>
+        </Form.Item>
+
+        {/* 模型选择 */}
+        <Form.Item label={t('settings.voice_call.model')} style={{ marginBottom: 16 }}>
+          <Space>
+            <Button
+              onClick={handleSelectModel}
+              disabled={!voiceCallEnabled}
+              icon={voiceCallModel ?
+                <ModelIcon src={getModelLogo(voiceCallModel.id)} alt="Model logo" /> :
+                <PhoneOutlined style={{ marginRight: 8 }} />
+              }
+            >
+              {voiceCallModel ? voiceCallModel.name : t('settings.voice_call.model.select')}
+            </Button>
+            {voiceCallModel && (
+              <InfoText>
+                {t('settings.voice_call.model.current', { model: voiceCallModel.name })}
+              </InfoText>
+            )}
+          </Space>
+          <InfoText>
+            {t('settings.voice_call.model.info')}
+          </InfoText>
+        </Form.Item>
+
+        {/* ASR 和 TTS 设置提示 */}
+        <Form.Item>
+          <Alert type="info">
+            {t('settings.voice_call.asr_tts_info')}
+          </Alert>
+        </Form.Item>
+
+        {/* 测试按钮 */}
+        <Form.Item>
+          <Button
+            type="primary"
+            icon={<PhoneOutlined />}
+            disabled={!voiceCallEnabled}
+            onClick={() => window.message.info({ content: t('settings.voice_call.test_info'), key: 'voice-call-test' })}
+          >
+            {t('settings.voice_call.test')}
+          </Button>
+        </Form.Item>
+      </Form>
+    </Container>
+  )
+}
+
+const Container = styled.div`
+  padding: 0 0 20px 0;
+`
+
+
+
+const InfoText = styled.div`
+  color: var(--color-text-3);
+  font-size: 12px;
+  margin-top: 4px;
+`
+
+const ModelIcon = styled.img`
+  width: 16px;
+  height: 16px;
+  margin-right: 8px;
+`
+
+const Alert = styled.div<{ type: 'info' | 'warning' | 'error' | 'success' }>`
+  padding: 8px 12px;
+  border-radius: 4px;
+  background-color: ${props =>
+    props.type === 'info' ? 'var(--color-info-bg)' :
+    props.type === 'warning' ? 'var(--color-warning-bg)' :
+    props.type === 'error' ? 'var(--color-error-bg)' :
+    'var(--color-success-bg)'
+  };
+  border: 1px solid ${props =>
+    props.type === 'info' ? 'var(--color-info-border)' :
+    props.type === 'warning' ? 'var(--color-warning-border)' :
+    props.type === 'error' ? 'var(--color-error-border)' :
+    'var(--color-success-border)'
+  };
+  color: ${props =>
+    props.type === 'info' ? 'var(--color-info-text)' :
+    props.type === 'warning' ? 'var(--color-warning-text)' :
+    props.type === 'error' ? 'var(--color-error-text)' :
+    'var(--color-success-text)'
+  };
+`
+
+export default VoiceCallSettings
--- a/src/renderer/src/services/ASRService.ts
+++ b/src/renderer/src/services/ASRService.ts
@ -130,15 +130,18 @@ class ASRService {
          // 语音识别已重置
          console.log('[ASRService] 语音识别已强制重置')
          this.isRecording = false
+
+          // 保存当前回调函数并立即清除
+          const tempCallback = this.resultCallback
          this.resultCallback = null

          // 显示重置完成消息
          window.message.info({ content: '语音识别已重置', key: 'asr-reset' })

          // 如果有回调函数，调用一次空字符串，触发按钮状态重置
-          if (this.resultCallback && typeof this.resultCallback === 'function') {
+          if (tempCallback && typeof tempCallback === 'function') {
            // 使用空字符串调用回调，不会影响输入框，但可以触发按钮状态重置
-            const callback = this.resultCallback as (text: string, isFinal?: boolean) => void // 明确指定类型
+            const callback = tempCallback as (text: string, isFinal?: boolean) => void // 明确指定类型
            setTimeout(() => {
              callback('', false)
            }, 100)
@ -147,14 +150,27 @@ class ASRService {
      } else if (data.type === 'result' && data.data) {
        // 处理识别结果
        console.log('[ASRService] 收到识别结果:', data.data)
+
+        // 如果已经停止录音但仍然收到结果，检查是否是最终结果
+        if (!this.isRecording && !data.data.isFinal) {
+          console.log('[ASRService] 已停止录音但收到非最终结果，忽略')
+          return
+        }
+
        if (this.resultCallback && typeof this.resultCallback === 'function') {
          // 将所有结果都传递给回调函数，并包含isFinal状态
          if (data.data.text && data.data.text.trim()) {
            if (data.data.isFinal) {
              console.log('[ASRService] 收到最终结果，调用回调函数，文本:', data.data.text)
-              this.resultCallback(data.data.text, true)
+
+              // 保存当前回调函数并立即清除，防止重复处理
+              const tempCallback = this.resultCallback
+              this.resultCallback = null
+
+              // 调用回调函数
+              tempCallback(data.data.text, true)
              window.message.success({ content: i18n.t('settings.asr.success'), key: 'asr-processing' })
-            } else {
+            } else if (this.isRecording) { // 只在录音中才处理中间结果
              // 非最终结果，也调用回调，但标记为非最终
              console.log('[ASRService] 收到中间结果，调用回调函数，文本:', data.data.text)
              this.resultCallback(data.data.text, false)
@ -234,6 +250,27 @@ class ASRService {
          let waitAttempts = 0
          const maxWaitAttempts = 5

+          // 尝试打开浏览器页面
+          try {
+            // 发送消息提示用户
+            window.message.info({
+              content: '正在准备语音识别服务...',
+              key: 'browser-status'
+            })
+
+            // 尝试自动打开浏览器页面
+            try {
+              // 使用ASRServerService获取服务器URL
+              const serverUrl = 'http://localhost:8080'
+              console.log('尝试打开语音识别服务器页面:', serverUrl)
+              window.open(serverUrl, '_blank')
+            } catch (error) {
+              console.error('获取服务器URL失败:', error)
+            }
+          } catch (error) {
+            console.error('打开语音识别浏览器页面失败:', error)
+          }
+
          while (!this.browserReady && waitAttempts < maxWaitAttempts) {
            window.message.loading({
              content: `等待浏览器准备就绪 (${waitAttempts + 1}/${maxWaitAttempts})...`,
@ -337,6 +374,12 @@ class ASRService {
              onTranscribed('', false)
            }, 100)
          }
+
+          // 添加额外的安全措施，确保在停止后也清除回调
+          setTimeout(() => {
+            // 在停止后的一段时间内清除回调，防止后续结果被处理
+            this.resultCallback = null
+          }, 3000) // 3秒后清除回调
        } else {
          throw new Error('WebSocket连接未就绪')
        }
@ -501,6 +544,14 @@ class ASRService {
    return this.isRecording
  }

+  /**
+   * 检查WebSocket是否已连接
+   * @returns boolean
+   */
+  isWebSocketConnected = (): boolean => {
+    return this.wsConnected && this.browserReady
+  }
+
  /**
   * 取消录音
   */
@ -509,7 +560,8 @@ class ASRService {

    // 如果是使用本地服务器
    if (asrServiceType === 'local') {
-      if (this.isRecording) {
+      // 修改条件，即使不在录音中也进行重置
+      if (this.isRecording || this.resultCallback) {
        // 先重置状态和回调，确保不会处理后续结果
        this.isRecording = false
        this.resultCallback = null
@ -517,11 +569,13 @@ class ASRService {
        // 发送停止命令
        if (this.ws && this.wsConnected) {
          this.ws.send(JSON.stringify({ type: 'stop' }))
+          console.log('发送停止命令到WebSocket服务器')

          // 发送一个额外的命令，要求浏览器强制重置语音识别
          setTimeout(() => {
            if (this.ws && this.wsConnected) {
              this.ws.send(JSON.stringify({ type: 'reset' }))
+              console.log('发送重置命令到WebSocket服务器')
            }
          }, 100)
        }
--- a/src/renderer/src/services/VoiceCallService.ts
+++ b/src/renderer/src/services/VoiceCallService.ts
@ -65,13 +65,17 @@ class VoiceCallServiceClass {
      // 如果使用本地服务器ASR，检查连接
      try {
        // 尝试连接本地ASR服务器
+        console.log('初始化时尝试连接语音识别服务器')
        const connected = await ASRService.connectToWebSocketServer()
        if (!connected) {
-          throw new Error('无法连接到语音识别服务')
+          console.warn('无法连接到语音识别服务，将在需要时重试')
+          // 不抛出异常，允许程序继续运行，在需要时重试
+        } else {
+          console.log('语音识别服务器连接成功')
        }
      } catch (error) {
-        console.error('Failed to connect to ASR server:', error)
-        throw new Error('Failed to connect to ASR server')
+        console.error('连接语音识别服务器失败:', error)
+        // 不抛出异常，允许程序继续运行，在需要时重试
      }
    }

@ -86,6 +90,22 @@ class VoiceCallServiceClass {
    // 获取当前ASR服务类型
    const { asrServiceType } = store.getState().settings

+    // 如果是本地服务器ASR，预先连接服务器
+    if (asrServiceType === 'local') {
+      try {
+        // 尝试连接WebSocket服务器
+        console.log('通话开始，预先连接语音识别服务器')
+        const connected = await ASRService.connectToWebSocketServer()
+        if (!connected) {
+          console.warn('无法连接到语音识别服务器，将在需要时重试')
+        } else {
+          console.log('语音识别服务器连接成功')
+        }
+      } catch (error) {
+        console.error('连接语音识别服务器失败:', error)
+      }
+    }
+
    // 根据不同的ASR服务类型进行初始化
    if (asrServiceType === 'browser') {
      if (!this.recognition) {
@ -203,36 +223,54 @@ class VoiceCallServiceClass {
        this.isRecording = true
      } else if (asrServiceType === 'local') {
        // 本地服务器ASR
-        await ASRService.startRecording((text, isFinal) => {
-          if (text) {
-            if (isFinal) {
-              // 如果是最终结果，累积到总结果中
-              if (this._accumulatedTranscript) {
-                // 如果已经有累积的文本，添加空格再追加
-                this._accumulatedTranscript += ' ' + text
-              } else {
-                // 如果是第一段文本，直接设置
-                this._accumulatedTranscript = text
-              }
-
-              // 更新当前的识别结果
-              this._currentTranscript = ''
-              // 显示累积的完整结果
-              this.callbacks?.onTranscript(this._accumulatedTranscript)
-            } else {
-              // 如果是临时结果，更新当前的识别结果
-              this._currentTranscript = text
-              // 显示累积结果 + 当前临时结果
-              this.callbacks?.onTranscript(this._accumulatedTranscript + ' ' + text)
+        try {
+          // 先检查连接状态，如果未连接则尝试重新连接
+          if (!ASRService.isWebSocketConnected()) {
+            console.log('语音识别服务器未连接，尝试重新连接')
+            const connected = await ASRService.connectToWebSocketServer()
+            if (!connected) {
+              throw new Error('无法连接到语音识别服务器')
            }

-            // 在录音过程中只更新transcript，不触发handleUserSpeech
-            // 松开按钮后才会处理完整的录音内容
+            // 等待一下，确保连接已建立
+            await new Promise(resolve => setTimeout(resolve, 500))
          }
-        })

-        this.isRecording = true
-        this.callbacks?.onListeningStateChange(true)
+          // 开始录音
+          await ASRService.startRecording((text, isFinal) => {
+            if (text) {
+              if (isFinal) {
+                // 如果是最终结果，累积到总结果中
+                if (this._accumulatedTranscript) {
+                  // 如果已经有累积的文本，添加空格再追加
+                  this._accumulatedTranscript += ' ' + text
+                } else {
+                  // 如果是第一段文本，直接设置
+                  this._accumulatedTranscript = text
+                }
+
+                // 更新当前的识别结果
+                this._currentTranscript = ''
+                // 显示累积的完整结果
+                this.callbacks?.onTranscript(this._accumulatedTranscript)
+              } else {
+                // 如果是临时结果，更新当前的识别结果
+                this._currentTranscript = text
+                // 显示累积结果 + 当前临时结果
+                this.callbacks?.onTranscript(this._accumulatedTranscript + ' ' + text)
+              }
+
+              // 在录音过程中只更新transcript，不触发handleUserSpeech
+              // 松开按钮后才会处理完整的录音内容
+            }
+          })
+
+          this.isRecording = true
+          this.callbacks?.onListeningStateChange(true)
+        } catch (error) {
+          console.error('启动语音识别失败:', error)
+          throw error
+        }
      } else if (asrServiceType === 'openai') {
        // OpenAI ASR
        await ASRService.startRecording()
@ -275,6 +313,10 @@ class VoiceCallServiceClass {
    const { asrServiceType } = store.getState().settings

    try {
+      // 立即设置录音状态为false，防止重复处理
+      this.isRecording = false
+      this.callbacks?.onListeningStateChange(false)
+
      // 存储当前的语音识别结果，用于松开按钮后发送给AI
      const currentTranscript = this._currentTranscript
      // 存储累积的语音识别结果
@ -287,9 +329,6 @@ class VoiceCallServiceClass {
        }

        this.recognition.stop()
-        // onend事件将设置isRecording = false
-        this.isRecording = false
-        this.callbacks?.onListeningStateChange(false)

        // 优先使用累积的文本，如果有的话
        if (accumulatedTranscript) {
@ -300,6 +339,10 @@ class VoiceCallServiceClass {
          console.log('没有累积结果，使用当前结果:', currentTranscript)
          this.handleUserSpeech(currentTranscript)
        }
+
+        // 清除状态
+        this._currentTranscript = ''
+        this._accumulatedTranscript = ''
      } else if (asrServiceType === 'local') {
        // 本地服务器ASR
        // 创建一个承诺，等待最终结果
@ -311,32 +354,40 @@ class VoiceCallServiceClass {
          }, 1500) // 1.5秒超时

          // 设置回调函数来接收最终结果
-          const resultCallback = (text: string) => {
+          const resultCallback = (text: string, isFinal?: boolean) => {
            // 如果是空字符串，表示只是重置状态，不处理
            if (text === '') return

            if (text) {
-              clearTimeout(timeoutId)
-              console.log('收到最终语音识别结果:', text)
-              this._currentTranscript = text
-              this.callbacks?.onTranscript(text)
-              resolve(text)
+              // 只处理最终结果，忽略中间结果
+              if (isFinal) {
+                clearTimeout(timeoutId)
+                console.log('收到最终语音识别结果:', text)
+                this._currentTranscript = text
+                this.callbacks?.onTranscript(text)
+                resolve(text)
+              } else {
+                // 对于中间结果，只更新显示，不解析Promise
+                console.log('收到中间语音识别结果:', text)
+                this.callbacks?.onTranscript(text)
+              }
            }
          }

          // 停止录音，但不取消，以获取最终结果
          ASRService.stopRecording(resultCallback)
-          this.isRecording = false
-          this.callbacks?.onListeningStateChange(false)

          // 添加额外的安全措施，在停止后立即发送重置命令
          setTimeout(() => {
            // 发送重置命令，确保浏览器不会继续发送结果
            ASRService.cancelRecording()
+
+            // 清除ASRService中的回调函数，防止后续结果被处理
+            ASRService.resultCallback = null
          }, 2000) // 2秒后强制取消，作为安全措施
        })

-        // 等待最终结果
+        // 等待最终结果，但最多等待3秒
        const finalText = await finalResultPromise

        // 优先使用累积的文本，如果有的话
@ -352,6 +403,10 @@ class VoiceCallServiceClass {
          console.log('没有最终结果，使用当前结果:', currentTranscript)
          this.handleUserSpeech(currentTranscript)
        }
+
+        // 再次确保所有状态被重置
+        this._currentTranscript = ''
+        this._accumulatedTranscript = ''
      } else if (asrServiceType === 'openai') {
        // OpenAI ASR
        await ASRService.stopRecording((text) => {
@ -362,14 +417,15 @@ class VoiceCallServiceClass {
          }
        })

-        this.isRecording = false
-        this.callbacks?.onListeningStateChange(false)
-
        // 使用最新的语音识别结果
        const finalTranscript = this._currentTranscript
        if (finalTranscript) {
          this.handleUserSpeech(finalTranscript)
        }
+
+        // 清除状态
+        this._currentTranscript = ''
+        this._accumulatedTranscript = ''
      }

      return true
@ -377,6 +433,14 @@ class VoiceCallServiceClass {
      console.error('Failed to stop recording:', error)
      this.isRecording = false
      this.callbacks?.onListeningStateChange(false)
+
+      // 确保在出错时也清除状态
+      this._currentTranscript = ''
+      this._accumulatedTranscript = ''
+
+      // 强制取消录音
+      ASRService.cancelRecording()
+
      return false
    }
  }
@ -398,6 +462,13 @@ class VoiceCallServiceClass {
      // 获取当前助手
      const assistant = getDefaultAssistant()

+      // 检查是否有自定义模型
+      const { voiceCallModel } = store.getState().settings
+      if (voiceCallModel) {
+        // 如果有自定义模型，覆盖默认助手的模型
+        assistant.model = voiceCallModel
+      }
+
      // 创建一个简单的Topic对象
      const topic = {
        id: 'voice-call',
@ -601,6 +672,20 @@ class VoiceCallServiceClass {
    }
  }

+  /**
+   * 停止TTS播放
+   * @returns void
+   */
+  stopTTS(): void {
+    // 无论是否正在播放，都强制停止TTS
+    this.ttsService.stop()
+    console.log('强制停止TTS播放')
+
+    // 手动触发TTS状态变化事件，确保 UI 状态更新
+    const event = new CustomEvent('tts-state-change', { detail: { isPlaying: false } })
+    window.dispatchEvent(event)
+  }
+
  setPaused(paused: boolean) {
    this.isPaused = paused

--- a/src/renderer/src/services/tts/TTSService.ts
+++ b/src/renderer/src/services/tts/TTSService.ts
@ -206,11 +206,16 @@ export class TTSService {
   * 停止播放
   */
  public stop(): void {
-    if (this.audioElement && this.isPlaying) {
+    // 无论是否正在播放，都强制停止
+    if (this.audioElement) {
      this.audioElement.pause()
      this.audioElement.currentTime = 0
      this.isPlaying = false
-      console.log('停止TTS播放')
+      console.log('强制停止TTS播放')
+
+      // 触发自定义事件，通知其他组件TTS已停止
+      const event = new CustomEvent('tts-state-change', { detail: { isPlaying: false } })
+      window.dispatchEvent(event)
    }
  }

--- a/src/renderer/src/store/settings.ts
+++ b/src/renderer/src/store/settings.ts
@ -1,6 +1,6 @@
 import { createSlice, PayloadAction } from '@reduxjs/toolkit'
 import { TRANSLATE_PROMPT } from '@renderer/config/prompts'
-import { CodeStyleVarious, LanguageVarious, ThemeMode, TranslateLanguageVarious } from '@renderer/types'
+import { CodeStyleVarious, LanguageVarious, Model, ThemeMode, TranslateLanguageVarious } from '@renderer/types'
 import { IpcChannel } from '@shared/IpcChannel'

 import { WebDAVSyncState } from './backup'
@ -145,6 +145,9 @@ export interface SettingsState {
  asrApiKey: string
  asrApiUrl: string
  asrModel: string
+  // 语音通话配置
+  voiceCallEnabled: boolean
+  voiceCallModel: Model | null
  // Quick Panel Triggers
  enableQuickPanelTriggers: boolean
  // Export Menu Options
@ -280,6 +283,9 @@ export const initialState: SettingsState = {
  asrApiKey: '',
  asrApiUrl: 'https://api.openai.com/v1/audio/transcriptions',
  asrModel: 'whisper-1',
+  // 语音通话配置
+  voiceCallEnabled: true,
+  voiceCallModel: null,
  // Quick Panel Triggers
  enableQuickPanelTriggers: false,
  // Export Menu Options
@ -702,6 +708,12 @@ const settingsSlice = createSlice({
    setAsrModel: (state, action: PayloadAction<string>) => {
      state.asrModel = action.payload
    },
+    setVoiceCallEnabled: (state, action: PayloadAction<boolean>) => {
+      state.voiceCallEnabled = action.payload
+    },
+    setVoiceCallModel: (state, action: PayloadAction<Model | null>) => {
+      state.voiceCallModel = action.payload
+    },
    // Quick Panel Triggers action
    setEnableQuickPanelTriggers: (state, action: PayloadAction<boolean>) => {
      state.enableQuickPanelTriggers = action.payload
@ -823,7 +835,9 @@ export const {
  setAsrServiceType,
  setAsrApiKey,
  setAsrApiUrl,
-  setAsrModel
+  setAsrModel,
+  setVoiceCallEnabled,
+  setVoiceCallModel
 } = settingsSlice.actions

 export default settingsSlice.reducer