添加了语音通话功能相关服务并更新了设置

2025-12-26 03:31:24 +08:00 · 2025-04-11 03:37:16 +08:00 · 2025-04-11 03:37:16 +08:00 · a86b4ba404
commit a86b4ba404
parent f6cc733421
10 changed files with 1173 additions and 17 deletions
--- a/src/renderer/src/assets/asr-server/index.html
+++ b/src/renderer/src/assets/asr-server/index.html
@ -64,6 +64,9 @@
                startRecognition();
            } else if (data.type === 'stop') {
                stopRecognition();
+            } else if (data.type === 'reset') {
+                // 强制重置语音识别
+                forceResetRecognition();
            } else {
                console.warn('[Browser Page] Received unknown command type:', data.type);
            }
@ -362,6 +365,30 @@
                updateStatus("识别未运行。");
            }
        }
+
+        function forceResetRecognition() {
+            console.log('[Browser Page] Force resetting recognition...');
+            updateStatus("强制重置语音识别...");
+
+            // 先尝试停止当前的识别
+            if (recognition) {
+                try {
+                    recognition.stop();
+                } catch (e) {
+                    console.error('[Browser Page] Error stopping recognition during reset:', e);
+                }
+            }
+
+            // 强制设置为null，丢弃所有后续结果
+            recognition = null;
+
+            // 通知服务器已重置
+            if (ws.readyState === WebSocket.OPEN) {
+                ws.send(JSON.stringify({ type: 'status', message: 'reset_complete' }));
+            }
+
+            updateStatus("语音识别已重置，等待新指令。");
+        }
    </script>
 </body>

--- a/src/renderer/src/assets/asr-server/server.js
+++ b/src/renderer/src/assets/asr-server/server.js
@ -124,6 +124,13 @@ wss.on('connection', (ws) => {
      } else {
        console.log('[Server] Cannot relay STOP: Browser not connected')
      }
+    } else if (data.type === 'reset' && ws === electronConnection) {
+      if (browserConnection && browserConnection.readyState === WebSocket.OPEN) {
+        console.log('[Server] Relaying RESET command to browser')
+        browserConnection.send(JSON.stringify({ type: 'reset' }))
+      } else {
+        console.log('[Server] Cannot relay RESET: Browser not connected')
+      }
    }
    // 浏览器发送识别结果
    else if (data.type === 'result' && ws === browserConnection) {
--- a/src/renderer/src/components/VoiceCallButton.tsx
+++ b/src/renderer/src/components/VoiceCallButton.tsx
@ -0,0 +1,55 @@
+import React, { useState } from 'react';
+import { Button, Tooltip } from 'antd';
+import { PhoneOutlined, LoadingOutlined } from '@ant-design/icons';
+import { useTranslation } from 'react-i18next';
+import VoiceCallModal from './VoiceCallModal';
+import { VoiceCallService } from '../services/VoiceCallService';
+
+interface Props {
+  disabled?: boolean;
+  style?: React.CSSProperties;
+}
+
+const VoiceCallButton: React.FC<Props> = ({ disabled = false, style }) => {
+  const { t } = useTranslation();
+  const [isModalVisible, setIsModalVisible] = useState(false);
+  const [isLoading, setIsLoading] = useState(false);
+
+  const handleClick = async () => {
+    if (disabled || isLoading) return;
+    
+    setIsLoading(true);
+    try {
+      // 初始化语音服务
+      await VoiceCallService.initialize();
+      setIsModalVisible(true);
+    } catch (error) {
+      console.error('Failed to initialize voice call:', error);
+      window.message.error(t('voice_call.initialization_failed'));
+    } finally {
+      setIsLoading(false);
+    }
+  };
+
+  return (
+    <>
+      <Tooltip title={t('voice_call.start')}>
+        <Button
+          type="text"
+          icon={isLoading ? <LoadingOutlined /> : <PhoneOutlined />}
+          onClick={handleClick}
+          disabled={disabled || isLoading}
+          style={style}
+        />
+      </Tooltip>
+      {isModalVisible && (
+        <VoiceCallModal
+          visible={isModalVisible}
+          onClose={() => setIsModalVisible(false)}
+        />
+      )}
+    </>
+  );
+};
+
+export default VoiceCallButton;
--- a/src/renderer/src/components/VoiceCallModal.tsx
+++ b/src/renderer/src/components/VoiceCallModal.tsx
@ -0,0 +1,263 @@
+import React, { useEffect, useState } from 'react';
+import { Modal, Button, Space, Tooltip } from 'antd';
+import {
+  AudioMutedOutlined,
+  AudioOutlined,
+  CloseOutlined,
+  PauseCircleOutlined,
+  PlayCircleOutlined,
+  SoundOutlined
+} from '@ant-design/icons';
+import styled from 'styled-components';
+import { useTranslation } from 'react-i18next';
+import VoiceVisualizer from './VoiceVisualizer';
+import { VoiceCallService } from '../services/VoiceCallService';
+
+interface Props {
+  visible: boolean;
+  onClose: () => void;
+}
+
+const VoiceCallModal: React.FC<Props> = ({ visible, onClose }) => {
+  const { t } = useTranslation();
+  const [isMuted, setIsMuted] = useState(false);
+  const [isPaused, setIsPaused] = useState(false);
+  const [transcript, setTranscript] = useState('');
+  const [response, setResponse] = useState('');
+  const [isListening, setIsListening] = useState(false);
+  const [isSpeaking, setIsSpeaking] = useState(false);
+  const [isRecording, setIsRecording] = useState(false);
+  const [isProcessing, setIsProcessing] = useState(false);
+
+  useEffect(() => {
+    const startVoiceCall = async () => {
+      try {
+        await VoiceCallService.startCall({
+          onTranscript: (text) => setTranscript(text),
+          onResponse: (text) => setResponse(text),
+          onListeningStateChange: setIsListening,
+          onSpeakingStateChange: setIsSpeaking,
+        });
+      } catch (error) {
+        console.error('Voice call error:', error);
+        window.message.error(t('voice_call.error'));
+        handleClose();
+      }
+    };
+
+    if (visible) {
+      startVoiceCall();
+    }
+
+    return () => {
+      VoiceCallService.endCall();
+    };
+  }, [visible, t]);
+
+  const handleClose = () => {
+    VoiceCallService.endCall();
+    onClose();
+  };
+
+  const toggleMute = () => {
+    const newMuteState = !isMuted;
+    setIsMuted(newMuteState);
+    VoiceCallService.setMuted(newMuteState);
+  };
+
+  const togglePause = () => {
+    const newPauseState = !isPaused;
+    setIsPaused(newPauseState);
+    VoiceCallService.setPaused(newPauseState);
+  };
+
+  // 长按说话相关处理
+  const handleRecordStart = async (e: React.MouseEvent | React.TouchEvent) => {
+    e.preventDefault(); // 防止触摸事件的默认行为
+
+    if (isProcessing || isPaused) return;
+
+    setIsRecording(true);
+    await VoiceCallService.startRecording();
+  };
+
+  const handleRecordEnd = async (e: React.MouseEvent | React.TouchEvent) => {
+    e.preventDefault(); // 防止触摸事件的默认行为
+
+    if (!isRecording) return;
+
+    // 立即更新UI状态
+    setIsRecording(false);
+    setIsProcessing(true);
+
+    // 确保录音完全停止
+    try {
+      await VoiceCallService.stopRecording();
+      console.log('录音已停止');
+    } catch (error) {
+      console.error('停止录音出错:', error);
+    }
+
+    // 处理结果会通过回调函数返回，不需要在这里处理
+    setTimeout(() => {
+      setIsProcessing(false);
+    }, 500); // 添加短暂延迟，防止用户立即再次点击
+  };
+
+  // 处理鼠标/触摸离开按钮的情况
+  const handleRecordCancel = async (e: React.MouseEvent | React.TouchEvent) => {
+    e.preventDefault();
+
+    if (isRecording) {
+      // 立即更新UI状态
+      setIsRecording(false);
+      setIsProcessing(true);
+
+      // 取消录音，不发送给AI
+      try {
+        await VoiceCallService.cancelRecording();
+        console.log('录音已取消');
+      } catch (error) {
+        console.error('取消录音出错:', error);
+      }
+
+      setTimeout(() => {
+        setIsProcessing(false);
+      }, 500);
+    }
+  };
+
+  return (
+    <Modal
+      title={t('voice_call.title')}
+      open={visible}
+      onCancel={handleClose}
+      footer={null}
+      width={500}
+      centered
+      maskClosable={false}
+    >
+      <Container>
+        <VisualizerContainer>
+          <VoiceVisualizer isActive={isListening || isRecording} type="input" />
+          <VoiceVisualizer isActive={isSpeaking} type="output" />
+        </VisualizerContainer>
+
+        <TranscriptContainer>
+          {transcript && (
+            <TranscriptText>
+              <UserLabel>{t('voice_call.you')}:</UserLabel> {transcript}
+            </TranscriptText>
+          )}
+          {response && (
+            <ResponseText>
+              <AILabel>{t('voice_call.ai')}:</AILabel> {response}
+            </ResponseText>
+          )}
+        </TranscriptContainer>
+
+        <ControlsContainer>
+          <Space>
+            <Button
+              type="text"
+              icon={isMuted ? <AudioMutedOutlined /> : <AudioOutlined />}
+              onClick={toggleMute}
+              size="large"
+              title={isMuted ? t('voice_call.unmute') : t('voice_call.mute')}
+            />
+            <Button
+              type="text"
+              icon={isPaused ? <PlayCircleOutlined /> : <PauseCircleOutlined />}
+              onClick={togglePause}
+              size="large"
+              title={isPaused ? t('voice_call.resume') : t('voice_call.pause')}
+            />
+            <Tooltip title={t('voice_call.press_to_talk')}>
+              <RecordButton
+                type={isRecording ? "primary" : "default"}
+                icon={<SoundOutlined />}
+                onMouseDown={handleRecordStart}
+                onMouseUp={handleRecordEnd}
+                onMouseLeave={handleRecordCancel}
+                onTouchStart={handleRecordStart}
+                onTouchEnd={handleRecordEnd}
+                onTouchCancel={handleRecordCancel}
+                size="large"
+                disabled={isProcessing || isPaused}
+              >
+                {isRecording ? t('voice_call.release_to_send') : t('voice_call.press_to_talk')}
+              </RecordButton>
+            </Tooltip>
+            <Button
+              type="primary"
+              icon={<CloseOutlined />}
+              onClick={handleClose}
+              danger
+              size="large"
+              title={t('voice_call.end')}
+            />
+          </Space>
+        </ControlsContainer>
+      </Container>
+    </Modal>
+  );
+};
+
+const Container = styled.div`
+  display: flex;
+  flex-direction: column;
+  gap: 20px;
+  height: 400px;
+`;
+
+const VisualizerContainer = styled.div`
+  display: flex;
+  justify-content: space-between;
+  height: 100px;
+`;
+
+const TranscriptContainer = styled.div`
+  flex: 1;
+  overflow-y: auto;
+  border: 1px solid var(--color-border);
+  border-radius: 8px;
+  padding: 16px;
+  background-color: var(--color-background-2);
+`;
+
+const TranscriptText = styled.p`
+  margin-bottom: 8px;
+  color: var(--color-text-1);
+`;
+
+const ResponseText = styled.p`
+  margin-bottom: 8px;
+  color: var(--color-primary);
+`;
+
+const UserLabel = styled.span`
+  font-weight: bold;
+  color: var(--color-text-1);
+`;
+
+const AILabel = styled.span`
+  font-weight: bold;
+  color: var(--color-primary);
+`;
+
+const ControlsContainer = styled.div`
+  display: flex;
+  justify-content: center;
+  padding: 10px 0;
+`;
+
+const RecordButton = styled(Button)`
+  min-width: 150px;
+  transition: all 0.2s;
+
+  &:active {
+    transform: scale(0.95);
+  }
+`;
+
+export default VoiceCallModal;
--- a/src/renderer/src/components/VoiceVisualizer.tsx
+++ b/src/renderer/src/components/VoiceVisualizer.tsx
@ -0,0 +1,97 @@
+import React, { useEffect, useRef } from 'react';
+import styled from 'styled-components';
+import { useTranslation } from 'react-i18next';
+
+interface Props {
+  isActive: boolean;
+  type: 'input' | 'output';
+}
+
+const VoiceVisualizer: React.FC<Props> = ({ isActive, type }) => {
+  const { t } = useTranslation();
+  const canvasRef = useRef<HTMLCanvasElement>(null);
+  const animationRef = useRef<number | undefined>(undefined);
+
+  useEffect(() => {
+    const canvas = canvasRef.current;
+    if (!canvas) return;
+
+    const ctx = canvas.getContext('2d');
+    if (!ctx) return;
+
+    const width = canvas.width;
+    const height = canvas.height;
+
+    const drawVisualizer = () => {
+      ctx.clearRect(0, 0, width, height);
+
+      if (!isActive) {
+        // 绘制静态波形
+        ctx.beginPath();
+        ctx.moveTo(0, height / 2);
+        ctx.lineTo(width, height / 2);
+        ctx.strokeStyle = type === 'input' ? 'var(--color-text-2)' : 'var(--color-primary)';
+        ctx.lineWidth = 2;
+        ctx.stroke();
+        return;
+      }
+
+      // 绘制动态波形
+      const barCount = 30;
+      const barWidth = width / barCount;
+      const color = type === 'input' ? 'var(--color-text-1)' : 'var(--color-primary)';
+
+      for (let i = 0; i < barCount; i++) {
+        const barHeight = Math.random() * (height / 2) + 10;
+        const x = i * barWidth;
+        const y = height / 2 - barHeight / 2;
+
+        ctx.fillStyle = color;
+        ctx.fillRect(x, y, barWidth - 2, barHeight);
+      }
+
+      animationRef.current = requestAnimationFrame(drawVisualizer);
+    };
+
+    drawVisualizer();
+
+    return () => {
+      if (animationRef.current) {
+        cancelAnimationFrame(animationRef.current);
+      }
+    };
+  }, [isActive, type]);
+
+  return (
+    <Container $type={type}>
+      <Label>{type === 'input' ? t('voice_call.you') : t('voice_call.ai')}</Label>
+      <Canvas ref={canvasRef} width={200} height={50} />
+    </Container>
+  );
+};
+
+const Container = styled.div<{ $type: 'input' | 'output' }>`
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  width: 45%;
+  border-radius: 8px;
+  padding: 10px;
+  background-color: ${props =>
+    props.$type === 'input'
+      ? 'var(--color-background-3)'
+      : 'var(--color-primary-bg)'
+  };
+`;
+
+const Label = styled.div`
+  margin-bottom: 8px;
+  font-weight: bold;
+`;
+
+const Canvas = styled.canvas`
+  width: 100%;
+  height: 50px;
+`;
+
+export default VoiceVisualizer;
--- a/src/renderer/src/i18n/locales/en-us.json
+++ b/src/renderer/src/i18n/locales/en-us.json
@ -1,5 +1,20 @@
 {
  "translation": {
+    "voice_call": {
+      "title": "Voice Call",
+      "start": "Start Voice Call",
+      "end": "End Call",
+      "mute": "Mute",
+      "unmute": "Unmute",
+      "pause": "Pause",
+      "resume": "Resume",
+      "you": "You",
+      "ai": "AI",
+      "press_to_talk": "Press to Talk",
+      "release_to_send": "Release to Send",
+      "initialization_failed": "Failed to initialize voice call",
+      "error": "Voice call error"
+    },
    "agents": {
      "add.button": "Add to Assistant",
      "add.knowledge_base": "Knowledge Base",
--- a/src/renderer/src/i18n/locales/zh-cn.json
+++ b/src/renderer/src/i18n/locales/zh-cn.json
@ -1,5 +1,20 @@
 {
  "translation": {
+    "voice_call": {
+      "title": "语音通话",
+      "start": "开始语音通话",
+      "end": "结束通话",
+      "mute": "静音",
+      "unmute": "取消静音",
+      "pause": "暂停",
+      "resume": "继续",
+      "you": "您",
+      "ai": "AI",
+      "press_to_talk": "长按说话",
+      "release_to_send": "松开发送",
+      "initialization_failed": "初始化语音通话失败",
+      "error": "语音通话出错"
+    },
    "agents": {
      "add.button": "添加到助手",
      "add.knowledge_base": "知识库",
--- a/src/renderer/src/pages/home/Inputbar/Inputbar.tsx
+++ b/src/renderer/src/pages/home/Inputbar/Inputbar.tsx
@ -16,6 +16,7 @@ import {
 import ASRButton from '@renderer/components/ASRButton'
 import { QuickPanelListItem, QuickPanelView, useQuickPanel } from '@renderer/components/QuickPanel'
 import TranslateButton from '@renderer/components/TranslateButton'
+import VoiceCallButton from '@renderer/components/VoiceCallButton'
 import { isGenerateImageModel, isVisionModel, isWebSearchModel } from '@renderer/config/models'
 import db from '@renderer/databases'
 import { useAssistant } from '@renderer/hooks/useAssistant'
@ -1024,6 +1025,7 @@ const Inputbar: FC<Props> = ({ assistant: _assistant, setActiveTopic, topic }) =
                  })
                }}
              />
+              <VoiceCallButton disabled={loading} />
              {loading && (
                <Tooltip placement="top" title={t('chat.input.pause')} arrow>
                  <ToolbarButton type="text" onClick={onPause} style={{ marginRight: -2, marginTop: 1 }}>
--- a/src/renderer/src/services/ASRService.ts
+++ b/src/renderer/src/services/ASRService.ts
@ -126,25 +126,36 @@ class ASRService {

          // 如果没有收到最终结果，显示处理完成消息
          window.message.success({ content: i18n.t('settings.asr.completed'), key: 'asr-processing' })
+        } else if (data.message === 'reset_complete') {
+          // 语音识别已重置
+          console.log('[ASRService] 语音识别已强制重置')
+          this.isRecording = false
+          this.resultCallback = null
+
+          // 显示重置完成消息
+          window.message.info({ content: '语音识别已重置', key: 'asr-reset' })

          // 如果有回调函数，调用一次空字符串，触发按钮状态重置
          if (this.resultCallback && typeof this.resultCallback === 'function') {
            // 使用空字符串调用回调，不会影响输入框，但可以触发按钮状态重置
-            this.resultCallback('')
+            setTimeout(() => this.resultCallback(''), 100)
          }
        }
      } else if (data.type === 'result' && data.data) {
        // 处理识别结果
        console.log('[ASRService] 收到识别结果:', data.data)
        if (this.resultCallback && typeof this.resultCallback === 'function') {
-          // 只在收到最终结果时才调用回调
-          if (data.data.isFinal && data.data.text && data.data.text.trim()) {
-            console.log('[ASRService] 收到最终结果，调用回调函数，文本:', data.data.text)
-            this.resultCallback(data.data.text)
-            window.message.success({ content: i18n.t('settings.asr.success'), key: 'asr-processing' })
-          } else if (!data.data.isFinal) {
-            // 非最终结果，只输出日志，不调用回调
-            console.log('[ASRService] 收到中间结果，文本:', data.data.text)
+          // 将所有结果都传递给回调函数，并包含isFinal状态
+          if (data.data.text && data.data.text.trim()) {
+            if (data.data.isFinal) {
+              console.log('[ASRService] 收到最终结果，调用回调函数，文本:', data.data.text)
+              this.resultCallback(data.data.text, true)
+              window.message.success({ content: i18n.t('settings.asr.success'), key: 'asr-processing' })
+            } else {
+              // 非最终结果，也调用回调，但标记为非最终
+              console.log('[ASRService] 收到中间结果，调用回调函数，文本:', data.data.text)
+              this.resultCallback(data.data.text, false)
+            }
          } else {
            console.log('[ASRService] 识别结果为空，不调用回调')
          }
@ -189,9 +200,9 @@ class ASRService {
  }

  // 存储结果回调函数
-  resultCallback: ((text: string) => void) | null = null
+  resultCallback: ((text: string, isFinal?: boolean) => void) | null = null

-  startRecording = async (onTranscribed?: (text: string) => void): Promise<void> => {
+  startRecording = async (onTranscribed?: (text: string, isFinal?: boolean) => void): Promise<void> => {
    try {
      const { asrEnabled, asrServiceType } = store.getState().settings

@ -295,7 +306,7 @@ class ASRService {
   * @param onTranscribed 转录完成后的回调函数
   * @returns Promise<void>
   */
-  stopRecording = async (onTranscribed: (text: string) => void): Promise<void> => {
+  stopRecording = async (onTranscribed: (text: string, isFinal?: boolean) => void): Promise<void> => {
    const { asrServiceType } = store.getState().settings

    // 如果是使用本地服务器
@ -318,7 +329,8 @@ class ASRService {
          // 立即调用回调函数，使按钮状态立即更新
          if (onTranscribed) {
            // 使用空字符串调用回调，不会影响输入框，但可以触发按钮状态重置
-            setTimeout(() => onTranscribed(''), 100)
+            // 传递false表示这不是最终结果，只是状态更新
+            setTimeout(() => onTranscribed('', false), 100)
          }
        } else {
          throw new Error('WebSocket连接未就绪')
@ -493,14 +505,21 @@ class ASRService {
    // 如果是使用本地服务器
    if (asrServiceType === 'local') {
      if (this.isRecording) {
+        // 先重置状态和回调，确保不会处理后续结果
+        this.isRecording = false
+        this.resultCallback = null
+
        // 发送停止命令
        if (this.ws && this.wsConnected) {
          this.ws.send(JSON.stringify({ type: 'stop' }))
-        }

-        // 重置状态
-        this.isRecording = false
-        this.resultCallback = null
+          // 发送一个额外的命令，要求浏览器强制重置语音识别
+          setTimeout(() => {
+            if (this.ws && this.wsConnected) {
+              this.ws.send(JSON.stringify({ type: 'reset' }))
+            }
+          }, 100)
+        }

        console.log('语音识别已取消')
        window.message.info({ content: i18n.t('settings.asr.canceled'), key: 'asr-recording' })
--- a/src/renderer/src/services/VoiceCallService.ts
+++ b/src/renderer/src/services/VoiceCallService.ts
@ -0,0 +1,656 @@
+import store from '@renderer/store';
+import { fetchChatCompletion } from '@renderer/services/ApiService';
+import { getAssistantMessage, getUserMessage } from '@renderer/services/MessagesService';
+import { getDefaultAssistant } from '@renderer/services/AssistantService';
+import TTSService from '@renderer/services/TTSService';
+import ASRService from '@renderer/services/ASRService';
+// 导入类型
+import type { Message } from '@renderer/types';
+
+interface VoiceCallCallbacks {
+  onTranscript: (text: string) => void;
+  onResponse: (text: string) => void;
+  onListeningStateChange: (isListening: boolean) => void;
+  onSpeakingStateChange: (isSpeaking: boolean) => void;
+}
+
+// 为TypeScript添加SpeechRecognition类型
+declare global {
+  interface Window {
+    SpeechRecognition: any;
+    webkitSpeechRecognition: any;
+  }
+}
+
+class VoiceCallServiceClass {
+  private recognition: any = null;
+  private isCallActive = false;
+  private isRecording = false; // 新增录音状态
+  private isMuted = false;
+  private isPaused = false;
+  private callbacks: VoiceCallCallbacks | null = null;
+  private _currentTranscript = '';  // 使用下划线前缀避免未使用警告
+  private _accumulatedTranscript = '';  // 累积的语音识别结果
+  private conversationHistory: { role: string; content: string }[] = [];
+  private isProcessingResponse = false;
+  private ttsService = TTSService;
+  private recordingTimeout: NodeJS.Timeout | null = null; // 录音超时定时器
+
+  async initialize() {
+    // 检查麦克风权限
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      stream.getTracks().forEach(track => track.stop());
+    } catch (error) {
+      console.error('Microphone permission denied:', error);
+      throw new Error('Microphone permission denied');
+    }
+
+    // 获取当前ASR服务类型
+    const { asrServiceType } = store.getState().settings;
+
+    // 如果使用浏览器ASR，检查浏览器支持
+    if (asrServiceType === 'browser') {
+      if (!('webkitSpeechRecognition' in window) && !('SpeechRecognition' in window)) {
+        throw new Error('Speech recognition not supported in this browser');
+      }
+
+      // 初始化浏览器语音识别
+      const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
+      this.recognition = new SpeechRecognition();
+      this.recognition.continuous = true;
+      this.recognition.interimResults = true;
+      this.recognition.lang = navigator.language || 'zh-CN';
+    } else if (asrServiceType === 'local') {
+      // 如果使用本地服务器ASR，检查连接
+      try {
+        // 尝试连接本地ASR服务器
+        const connected = await ASRService.connectToWebSocketServer();
+        if (!connected) {
+          throw new Error('无法连接到语音识别服务');
+        }
+      } catch (error) {
+        console.error('Failed to connect to ASR server:', error);
+        throw new Error('Failed to connect to ASR server');
+      }
+    }
+
+    return true;
+  }
+
+  async startCall(callbacks: VoiceCallCallbacks) {
+    this.callbacks = callbacks;
+    this.isCallActive = true;
+    this.conversationHistory = [];
+
+    // 获取当前ASR服务类型
+    const { asrServiceType } = store.getState().settings;
+
+    // 根据不同的ASR服务类型进行初始化
+    if (asrServiceType === 'browser') {
+      if (!this.recognition) {
+        throw new Error('Browser speech recognition not initialized');
+      }
+
+      // 设置浏览器语音识别事件处理
+      this.recognition.onresult = (event: any) => {
+        let interimTranscript = '';
+        let finalTranscript = '';
+
+        for (let i = event.resultIndex; i < event.results.length; ++i) {
+          if (event.results[i].isFinal) {
+            finalTranscript += event.results[i][0].transcript;
+          } else {
+            interimTranscript += event.results[i][0].transcript;
+          }
+        }
+
+        if (interimTranscript) {
+          // 更新当前的临时识别结果
+          this._currentTranscript = interimTranscript;
+          // 显示累积结果 + 当前临时结果
+          this.callbacks?.onTranscript(this._accumulatedTranscript + ' ' + interimTranscript);
+        }
+
+        if (finalTranscript) {
+          // 将最终结果累积到总结果中
+          if (this._accumulatedTranscript) {
+            // 如果已经有累积的文本，添加空格再追加
+            this._accumulatedTranscript += ' ' + finalTranscript;
+          } else {
+            // 如果是第一段文本，直接设置
+            this._accumulatedTranscript = finalTranscript;
+          }
+
+          // 更新当前的识别结果
+          this._currentTranscript = '';
+          // 显示累积的完整结果
+          this.callbacks?.onTranscript(this._accumulatedTranscript);
+
+          // 在录音过程中只更新transcript，不触发handleUserSpeech
+          // 松开按钮后才会处理完整的录音内容
+        }
+      };
+
+      this.recognition.onstart = () => {
+        this.isRecording = true;
+        this.callbacks?.onListeningStateChange(true);
+      };
+
+      this.recognition.onend = () => {
+        this.isRecording = false;
+        this.callbacks?.onListeningStateChange(false);
+      };
+
+      this.recognition.onerror = (event: any) => {
+        console.error('Speech recognition error', event.error);
+        this.isRecording = false;
+        this.callbacks?.onListeningStateChange(false);
+      };
+    }
+
+    // 播放欢迎语音
+    const welcomeMessage = '您好，我是您的AI助手，请长按说话按钮进行对话。';
+    this.callbacks?.onResponse(welcomeMessage);
+
+    // 监听TTS状态
+    const ttsStateHandler = (isPlaying: boolean) => {
+      this.callbacks?.onSpeakingStateChange(isPlaying);
+    };
+
+    // 监听TTS播放状态
+    window.addEventListener('tts-state-change', (event: any) => {
+      ttsStateHandler(event.detail.isPlaying);
+    });
+
+    // 播放欢迎语音，并手动设置初始状态
+    this.callbacks?.onSpeakingStateChange(true);
+    this.ttsService.speak(welcomeMessage);
+
+    // 确保欢迎语音结束后状态正确
+    setTimeout(() => {
+      if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) {
+        this.callbacks?.onSpeakingStateChange(false);
+      }
+    }, 5000); // 5秒后检查TTS状态
+
+    return true;
+  }
+
+  /**
+   * 开始录音
+   * @returns Promise<boolean> 是否成功开始录音
+   */
+  async startRecording(): Promise<boolean> {
+    if (!this.isCallActive || this.isPaused || this.isProcessingResponse || this.isRecording) {
+      return false;
+    }
+
+    // 重置累积的文本
+    this._accumulatedTranscript = '';
+
+    // 获取当前ASR服务类型
+    const { asrServiceType } = store.getState().settings;
+
+    try {
+      if (asrServiceType === 'browser') {
+        // 浏览器ASR
+        if (!this.recognition) {
+          throw new Error('Browser speech recognition not initialized');
+        }
+
+        this.recognition.start();
+        this.isRecording = true;
+
+      } else if (asrServiceType === 'local') {
+        // 本地服务器ASR
+        await ASRService.startRecording((text, isFinal) => {
+          if (text) {
+            if (isFinal) {
+              // 如果是最终结果，累积到总结果中
+              if (this._accumulatedTranscript) {
+                // 如果已经有累积的文本，添加空格再追加
+                this._accumulatedTranscript += ' ' + text;
+              } else {
+                // 如果是第一段文本，直接设置
+                this._accumulatedTranscript = text;
+              }
+
+              // 更新当前的识别结果
+              this._currentTranscript = '';
+              // 显示累积的完整结果
+              this.callbacks?.onTranscript(this._accumulatedTranscript);
+            } else {
+              // 如果是临时结果，更新当前的识别结果
+              this._currentTranscript = text;
+              // 显示累积结果 + 当前临时结果
+              this.callbacks?.onTranscript(this._accumulatedTranscript + ' ' + text);
+            }
+
+            // 在录音过程中只更新transcript，不触发handleUserSpeech
+            // 松开按钮后才会处理完整的录音内容
+          }
+        });
+
+        this.isRecording = true;
+        this.callbacks?.onListeningStateChange(true);
+
+      } else if (asrServiceType === 'openai') {
+        // OpenAI ASR
+        await ASRService.startRecording();
+        this.isRecording = true;
+        this.callbacks?.onListeningStateChange(true);
+      }
+
+      // 设置最长录音时间，防止用户忘记松开
+      this.recordingTimeout = setTimeout(() => {
+        if (this.isRecording) {
+          this.stopRecording();
+        }
+      }, 60000); // 60秒最长录音时间
+
+      return true;
+    } catch (error) {
+      console.error('Failed to start recording:', error);
+      this.isRecording = false;
+      this.callbacks?.onListeningStateChange(false);
+      return false;
+    }
+  }
+
+  /**
+   * 停止录音并处理结果，将录音内容发送给AI
+   * @returns Promise<boolean> 是否成功停止录音
+   */
+  async stopRecording(): Promise<boolean> {
+    if (!this.isCallActive || !this.isRecording) {
+      return false;
+    }
+
+    // 清除录音超时定时器
+    if (this.recordingTimeout) {
+      clearTimeout(this.recordingTimeout);
+      this.recordingTimeout = null;
+    }
+
+    // 获取当前ASR服务类型
+    const { asrServiceType } = store.getState().settings;
+
+    try {
+      // 存储当前的语音识别结果，用于松开按钮后发送给AI
+      const currentTranscript = this._currentTranscript;
+      // 存储累积的语音识别结果
+      const accumulatedTranscript = this._accumulatedTranscript;
+
+      if (asrServiceType === 'browser') {
+        // 浏览器ASR
+        if (!this.recognition) {
+          throw new Error('Browser speech recognition not initialized');
+        }
+
+        this.recognition.stop();
+        // onend事件将设置isRecording = false
+        this.isRecording = false;
+        this.callbacks?.onListeningStateChange(false);
+
+        // 优先使用累积的文本，如果有的话
+        if (accumulatedTranscript) {
+          console.log('发送累积的语音识别结果给AI:', accumulatedTranscript);
+          this.handleUserSpeech(accumulatedTranscript);
+        } else if (currentTranscript) {
+          // 如果没有累积结果，使用当前结果
+          console.log('没有累积结果，使用当前结果:', currentTranscript);
+          this.handleUserSpeech(currentTranscript);
+        }
+
+      } else if (asrServiceType === 'local') {
+        // 本地服务器ASR
+        // 创建一个承诺，等待最终结果
+        const finalResultPromise = new Promise<string>((resolve) => {
+          // 设置一个超时器，确保不会无限等待
+          const timeoutId = setTimeout(() => {
+            console.log('等待最终结果超时，使用当前结果');
+            resolve(this._currentTranscript);
+          }, 1500); // 1.5秒超时
+
+          // 设置回调函数来接收最终结果
+          const resultCallback = (text: string) => {
+            // 如果是空字符串，表示只是重置状态，不处理
+            if (text === '') return;
+
+            if (text) {
+              clearTimeout(timeoutId);
+              console.log('收到最终语音识别结果:', text);
+              this._currentTranscript = text;
+              this.callbacks?.onTranscript(text);
+              resolve(text);
+            }
+          };
+
+          // 停止录音，但不取消，以获取最终结果
+          ASRService.stopRecording(resultCallback);
+          this.isRecording = false;
+          this.callbacks?.onListeningStateChange(false);
+
+          // 添加额外的安全措施，在停止后立即发送重置命令
+          setTimeout(() => {
+            // 发送重置命令，确保浏览器不会继续发送结果
+            ASRService.cancelRecording();
+          }, 2000); // 2秒后强制取消，作为安全措施
+        });
+
+        // 等待最终结果
+        const finalText = await finalResultPromise;
+
+        // 优先使用累积的文本，如果有的话
+        if (accumulatedTranscript) {
+          console.log('发送累积的语音识别结果给AI:', accumulatedTranscript);
+          this.handleUserSpeech(accumulatedTranscript);
+        } else if (finalText) {
+          // 如果没有累积结果，使用最终结果
+          console.log('发送最终语音识别结果给AI:', finalText);
+          this.handleUserSpeech(finalText);
+        } else if (currentTranscript) {
+          // 如果没有最终结果，使用当前结果
+          console.log('没有最终结果，使用当前结果:', currentTranscript);
+          this.handleUserSpeech(currentTranscript);
+        }
+
+      } else if (asrServiceType === 'openai') {
+        // OpenAI ASR
+        await ASRService.stopRecording((text) => {
+          // 更新最终的语音识别结果
+          if (text) {
+            this._currentTranscript = text;
+            this.callbacks?.onTranscript(text);
+          }
+        });
+
+        this.isRecording = false;
+        this.callbacks?.onListeningStateChange(false);
+
+        // 使用最新的语音识别结果
+        const finalTranscript = this._currentTranscript;
+        if (finalTranscript) {
+          this.handleUserSpeech(finalTranscript);
+        }
+      }
+
+      return true;
+    } catch (error) {
+      console.error('Failed to stop recording:', error);
+      this.isRecording = false;
+      this.callbacks?.onListeningStateChange(false);
+      return false;
+    }
+  }
+
+  async handleUserSpeech(text: string) {
+    if (!this.isCallActive || this.isProcessingResponse || this.isPaused) return;
+
+    // 暂停语音识别，避免在AI回复时继续识别
+    const { asrServiceType } = store.getState().settings;
+    if (asrServiceType === 'browser') {
+      this.recognition?.stop();
+    } else if (asrServiceType === 'local' || asrServiceType === 'openai') {
+      ASRService.cancelRecording();
+    }
+
+    this.isProcessingResponse = true;
+
+    try {
+      // 获取当前助手
+      const assistant = getDefaultAssistant();
+
+      // 创建一个简单的Topic对象
+      const topic = {
+        id: 'voice-call',
+        assistantId: assistant.id,
+        name: 'Voice Call',
+        createdAt: new Date().toISOString(),
+        updatedAt: new Date().toISOString(),
+        messages: []
+      };
+
+      // 创建用户消息
+      const userMessage = getUserMessage({
+        assistant,
+        topic,
+        type: 'text',
+        content: text
+      });
+
+      // 创建助手消息
+      const assistantMessage = getAssistantMessage({
+        assistant,
+        topic
+      });
+
+      // 更新对话历史
+      this.conversationHistory.push({ role: 'user', content: text });
+
+      // 构建消息列表
+      // 将历史消息转换为正确的Message对象
+      const historyMessages = this.conversationHistory.map(msg => {
+        if (msg.role === 'user') {
+          return getUserMessage({
+            assistant,
+            topic,
+            type: 'text',
+            content: msg.content
+          });
+        } else {
+          const assistantMsg = getAssistantMessage({
+            assistant,
+            topic
+          });
+          return { ...assistantMsg, content: msg.content, status: 'success' };
+        }
+      });
+
+      // 修改用户消息，添加语音通话提示
+      const voiceCallPrompt = `当前是语音通话模式。请注意：
+1. 简洁直接地回答问题，避免冗长的引导和总结。
+2. 避免使用复杂的格式化内容，如表格、代码块、Markdown等。
+3. 使用自然、口语化的表达方式，就像与人对话一样。
+4. 如果需要列出要点，使用简单的数字或文字标记，而不是复杂的格式。
+5. 回答应该简短有力，便于用户通过语音理解。
+6. 避免使用特殊符号、表情符号、标点符号等，因为这些在语音播放时会影响理解。
+7. 使用完整的句子而非简单的关键词列表。
+8. 尽量使用常见词汇，避免生僻或专业术语，除非用户特别询问。`;
+
+      // 创建系统指令消息
+      const systemMessage = getUserMessage({
+        assistant,
+        topic,
+        type: 'text',
+        content: voiceCallPrompt
+      });
+
+      // 修改用户消息的内容
+      userMessage.content = text;
+
+      // 构建最终消息列表
+      // 使用类型断言解决类型问题
+      const messages = [systemMessage, ...historyMessages, userMessage] as Message[];
+
+      // 流式响应处理
+      let fullResponse = '';
+
+      try {
+        // 调用真实的LLM API
+        await fetchChatCompletion({
+          message: assistantMessage,
+          messages,
+          assistant,
+          onResponse: async (msg) => {
+            if (msg.content && msg.content !== fullResponse) {
+              fullResponse = msg.content;
+
+              // 更新UI
+              this.callbacks?.onResponse(fullResponse);
+
+              // 如果TTS正在播放，停止它
+              if (this.ttsService.isCurrentlyPlaying()) {
+                this.ttsService.stop();
+              }
+            }
+          }
+        });
+
+        // 播放完整响应
+        if (!this.isMuted && this.isCallActive) {
+          // 手动设置语音状态
+          this.callbacks?.onSpeakingStateChange(true);
+          this.ttsService.speak(fullResponse);
+
+          // 确保语音结束后状态正确
+          setTimeout(() => {
+            if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) {
+              this.callbacks?.onSpeakingStateChange(false);
+            }
+          }, 1000); // 1秒后检查TTS状态
+        }
+
+        // 更新对话历史
+        this.conversationHistory.push({ role: 'assistant', content: fullResponse });
+
+      } catch (innerError) {
+        console.error('Error generating response:', innerError);
+        // 如果出错，使用一个简单的回复
+        fullResponse = `抱歉，处理您的请求时出错了。`;
+        this.callbacks?.onResponse(fullResponse);
+
+        if (!this.isMuted && this.isCallActive) {
+          // 手动设置语音状态
+          this.callbacks?.onSpeakingStateChange(true);
+          this.ttsService.speak(fullResponse);
+
+          // 确保语音结束后状态正确
+          setTimeout(() => {
+            if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) {
+              this.callbacks?.onSpeakingStateChange(false);
+            }
+          }, 1000); // 1秒后检查TTS状态
+        }
+      }
+
+    } catch (error) {
+      console.error('Error processing voice response:', error);
+    } finally {
+      this.isProcessingResponse = false;
+
+      // 不自动恢复语音识别，等待用户长按按钮
+      // 长按说话模式下，我们不需要自动恢复语音识别
+    }
+  }
+
+  /**
+   * 取消录音，不发送给AI
+   * @returns Promise<boolean> 是否成功取消录音
+   */
+  async cancelRecording(): Promise<boolean> {
+    if (!this.isCallActive || !this.isRecording) {
+      return false;
+    }
+
+    // 清除录音超时定时器
+    if (this.recordingTimeout) {
+      clearTimeout(this.recordingTimeout);
+      this.recordingTimeout = null;
+    }
+
+    // 获取当前ASR服务类型
+    const { asrServiceType } = store.getState().settings;
+
+    try {
+      if (asrServiceType === 'browser') {
+        // 浏览器ASR
+        if (!this.recognition) {
+          throw new Error('Browser speech recognition not initialized');
+        }
+
+        this.recognition.stop();
+        this.isRecording = false;
+        this.callbacks?.onListeningStateChange(false);
+
+      } else if (asrServiceType === 'local') {
+        // 本地服务器ASR
+        ASRService.cancelRecording();
+        this.isRecording = false;
+        this.callbacks?.onListeningStateChange(false);
+
+      } else if (asrServiceType === 'openai') {
+        // OpenAI ASR
+        ASRService.cancelRecording();
+        this.isRecording = false;
+        this.callbacks?.onListeningStateChange(false);
+      }
+
+      // 清除当前识别结果
+      this._currentTranscript = '';
+      this.callbacks?.onTranscript('');
+
+      return true;
+    } catch (error) {
+      console.error('Failed to cancel recording:', error);
+      this.isRecording = false;
+      this.callbacks?.onListeningStateChange(false);
+      return false;
+    }
+  }
+
+  setMuted(muted: boolean) {
+    this.isMuted = muted;
+
+    // 如果设置为静音，停止当前TTS播放
+    if (muted && this.ttsService.isCurrentlyPlaying()) {
+      this.ttsService.stop();
+    }
+  }
+
+  setPaused(paused: boolean) {
+    this.isPaused = paused;
+
+    // 获取当前ASR服务类型
+    const { asrServiceType } = store.getState().settings;
+
+    if (paused) {
+      // 暂停语音识别
+      if (asrServiceType === 'browser') {
+        this.recognition?.stop();
+      } else if (asrServiceType === 'local' || asrServiceType === 'openai') {
+        ASRService.cancelRecording();
+      }
+
+      // 暂停TTS
+      if (this.ttsService.isCurrentlyPlaying()) {
+        this.ttsService.stop();
+      }
+    }
+    // 不自动恢复语音识别，等待用户长按按钮
+  }
+
+  endCall() {
+    this.isCallActive = false;
+
+    // 获取当前ASR服务类型
+    const { asrServiceType } = store.getState().settings;
+
+    // 停止语音识别
+    if (asrServiceType === 'browser') {
+      this.recognition?.stop();
+    } else if (asrServiceType === 'local' || asrServiceType === 'openai') {
+      ASRService.cancelRecording();
+    }
+
+    // 停止TTS
+    if (this.ttsService.isCurrentlyPlaying()) {
+      this.ttsService.stop();
+    }
+
+    this.callbacks = null;
+  }
+}
+
+export const VoiceCallService = new VoiceCallServiceClass();