添加了语音通话功能相关服务并更新了设置

2026-01-07 05:39:05 +08:00 · 2025-04-11 03:37:16 +08:00 · 2025-04-11 03:37:16 +08:00 · a86b4ba404
commit a86b4ba404
parent f6cc733421
10 changed files with 1173 additions and 17 deletions
--- a/src/renderer/src/assets/asr-server/index.html
+++ b/src/renderer/src/assets/asr-server/index.html
@ -64,6 +64,9 @@
                startRecognition();
            } else if (data.type === 'stop') {
                stopRecognition();
            } else if (data.type === 'reset') {
                // 强制重置语音识别
                forceResetRecognition();
            } else {
                console.warn('[Browser Page] Received unknown command type:', data.type);
            }
@ -362,6 +365,30 @@
                updateStatus("识别未运行。");
            }
        }
        function forceResetRecognition() {
            console.log('[Browser Page] Force resetting recognition...');
            updateStatus("强制重置语音识别...");
            // 先尝试停止当前的识别
            if (recognition) {
                try {
                    recognition.stop();
                } catch (e) {
                    console.error('[Browser Page] Error stopping recognition during reset:', e);
                }
            }
            // 强制设置为null，丢弃所有后续结果
            recognition = null;
            // 通知服务器已重置
            if (ws.readyState === WebSocket.OPEN) {
                ws.send(JSON.stringify({ type: 'status', message: 'reset_complete' }));
            }
            updateStatus("语音识别已重置，等待新指令。");
        }
    </script>
 </body>
--- a/src/renderer/src/assets/asr-server/server.js
+++ b/src/renderer/src/assets/asr-server/server.js
@ -124,6 +124,13 @@ wss.on('connection', (ws) => {
      } else {
        console.log('[Server] Cannot relay STOP: Browser not connected')
      }
    } else if (data.type === 'reset' && ws === electronConnection) {
      if (browserConnection && browserConnection.readyState === WebSocket.OPEN) {
        console.log('[Server] Relaying RESET command to browser')
        browserConnection.send(JSON.stringify({ type: 'reset' }))
      } else {
        console.log('[Server] Cannot relay RESET: Browser not connected')
      }
    }
    // 浏览器发送识别结果
    else if (data.type === 'result' && ws === browserConnection) {
--- a/src/renderer/src/components/VoiceCallButton.tsx
+++ b/src/renderer/src/components/VoiceCallButton.tsx
@ -0,0 +1,55 @@
 import React, { useState } from 'react';
 import { Button, Tooltip } from 'antd';
 import { PhoneOutlined, LoadingOutlined } from '@ant-design/icons';
 import { useTranslation } from 'react-i18next';
 import VoiceCallModal from './VoiceCallModal';
 import { VoiceCallService } from '../services/VoiceCallService';
 interface Props {
  disabled?: boolean;
  style?: React.CSSProperties;
 }
 const VoiceCallButton: React.FC<Props> = ({ disabled = false, style }) => {
  const { t } = useTranslation();
  const [isModalVisible, setIsModalVisible] = useState(false);
  const [isLoading, setIsLoading] = useState(false);
  const handleClick = async () => {
    if (disabled || isLoading) return;
    setIsLoading(true);
    try {
      // 初始化语音服务
      await VoiceCallService.initialize();
      setIsModalVisible(true);
    } catch (error) {
      console.error('Failed to initialize voice call:', error);
      window.message.error(t('voice_call.initialization_failed'));
    } finally {
      setIsLoading(false);
    }
  };
  return (
    <>
      <Tooltip title={t('voice_call.start')}>
        <Button
          type="text"
          icon={isLoading ? <LoadingOutlined /> : <PhoneOutlined />}
          onClick={handleClick}
          disabled={disabled || isLoading}
          style={style}
        />
      </Tooltip>
      {isModalVisible && (
        <VoiceCallModal
          visible={isModalVisible}
          onClose={() => setIsModalVisible(false)}
        />
      )}
    </>
  );
 };
 export default VoiceCallButton;
--- a/src/renderer/src/components/VoiceCallModal.tsx
+++ b/src/renderer/src/components/VoiceCallModal.tsx
@ -0,0 +1,263 @@
 import React, { useEffect, useState } from 'react';
 import { Modal, Button, Space, Tooltip } from 'antd';
 import {
  AudioMutedOutlined,
  AudioOutlined,
  CloseOutlined,
  PauseCircleOutlined,
  PlayCircleOutlined,
  SoundOutlined
 } from '@ant-design/icons';
 import styled from 'styled-components';
 import { useTranslation } from 'react-i18next';
 import VoiceVisualizer from './VoiceVisualizer';
 import { VoiceCallService } from '../services/VoiceCallService';
 interface Props {
  visible: boolean;
  onClose: () => void;
 }
 const VoiceCallModal: React.FC<Props> = ({ visible, onClose }) => {
  const { t } = useTranslation();
  const [isMuted, setIsMuted] = useState(false);
  const [isPaused, setIsPaused] = useState(false);
  const [transcript, setTranscript] = useState('');
  const [response, setResponse] = useState('');
  const [isListening, setIsListening] = useState(false);
  const [isSpeaking, setIsSpeaking] = useState(false);
  const [isRecording, setIsRecording] = useState(false);
  const [isProcessing, setIsProcessing] = useState(false);
  useEffect(() => {
    const startVoiceCall = async () => {
      try {
        await VoiceCallService.startCall({
          onTranscript: (text) => setTranscript(text),
          onResponse: (text) => setResponse(text),
          onListeningStateChange: setIsListening,
          onSpeakingStateChange: setIsSpeaking,
        });
      } catch (error) {
        console.error('Voice call error:', error);
        window.message.error(t('voice_call.error'));
        handleClose();
      }
    };
    if (visible) {
      startVoiceCall();
    }
    return () => {
      VoiceCallService.endCall();
    };
  }, [visible, t]);
  const handleClose = () => {
    VoiceCallService.endCall();
    onClose();
  };
  const toggleMute = () => {
    const newMuteState = !isMuted;
    setIsMuted(newMuteState);
    VoiceCallService.setMuted(newMuteState);
  };
  const togglePause = () => {
    const newPauseState = !isPaused;
    setIsPaused(newPauseState);
    VoiceCallService.setPaused(newPauseState);
  };
  // 长按说话相关处理
  const handleRecordStart = async (e: React.MouseEvent | React.TouchEvent) => {
    e.preventDefault(); // 防止触摸事件的默认行为
    if (isProcessing || isPaused) return;
    setIsRecording(true);
    await VoiceCallService.startRecording();
  };
  const handleRecordEnd = async (e: React.MouseEvent | React.TouchEvent) => {
    e.preventDefault(); // 防止触摸事件的默认行为
    if (!isRecording) return;
    // 立即更新UI状态
    setIsRecording(false);
    setIsProcessing(true);
    // 确保录音完全停止
    try {
      await VoiceCallService.stopRecording();
      console.log('录音已停止');
    } catch (error) {
      console.error('停止录音出错:', error);
    }
    // 处理结果会通过回调函数返回，不需要在这里处理
    setTimeout(() => {
      setIsProcessing(false);
    }, 500); // 添加短暂延迟，防止用户立即再次点击
  };
  // 处理鼠标/触摸离开按钮的情况
  const handleRecordCancel = async (e: React.MouseEvent | React.TouchEvent) => {
    e.preventDefault();
    if (isRecording) {
      // 立即更新UI状态
      setIsRecording(false);
      setIsProcessing(true);
      // 取消录音，不发送给AI
      try {
        await VoiceCallService.cancelRecording();
        console.log('录音已取消');
      } catch (error) {
        console.error('取消录音出错:', error);
      }
      setTimeout(() => {
        setIsProcessing(false);
      }, 500);
    }
  };
  return (
    <Modal
      title={t('voice_call.title')}
      open={visible}
      onCancel={handleClose}
      footer={null}
      width={500}
      centered
      maskClosable={false}
    >
      <Container>
        <VisualizerContainer>
          <VoiceVisualizer isActive={isListening || isRecording} type="input" />
          <VoiceVisualizer isActive={isSpeaking} type="output" />
        </VisualizerContainer>
        <TranscriptContainer>
          {transcript && (
            <TranscriptText>
              <UserLabel>{t('voice_call.you')}:</UserLabel> {transcript}
            </TranscriptText>
          )}
          {response && (
            <ResponseText>
              <AILabel>{t('voice_call.ai')}:</AILabel> {response}
            </ResponseText>
          )}
        </TranscriptContainer>
        <ControlsContainer>
          <Space>
            <Button
              type="text"
              icon={isMuted ? <AudioMutedOutlined /> : <AudioOutlined />}
              onClick={toggleMute}
              size="large"
              title={isMuted ? t('voice_call.unmute') : t('voice_call.mute')}
            />
            <Button
              type="text"
              icon={isPaused ? <PlayCircleOutlined /> : <PauseCircleOutlined />}
              onClick={togglePause}
              size="large"
              title={isPaused ? t('voice_call.resume') : t('voice_call.pause')}
            />
            <Tooltip title={t('voice_call.press_to_talk')}>
              <RecordButton
                type={isRecording ? "primary" : "default"}
                icon={<SoundOutlined />}
                onMouseDown={handleRecordStart}
                onMouseUp={handleRecordEnd}
                onMouseLeave={handleRecordCancel}
                onTouchStart={handleRecordStart}
                onTouchEnd={handleRecordEnd}
                onTouchCancel={handleRecordCancel}
                size="large"
                disabled={isProcessing || isPaused}
              >
                {isRecording ? t('voice_call.release_to_send') : t('voice_call.press_to_talk')}
              </RecordButton>
            </Tooltip>
            <Button
              type="primary"
              icon={<CloseOutlined />}
              onClick={handleClose}
              danger
              size="large"
              title={t('voice_call.end')}
            />
          </Space>
        </ControlsContainer>
      </Container>
    </Modal>
  );
 };
 const Container = styled.div`
  display: flex;
  flex-direction: column;
  gap: 20px;
  height: 400px;
 `;
 const VisualizerContainer = styled.div`
  display: flex;
  justify-content: space-between;
  height: 100px;
 `;
 const TranscriptContainer = styled.div`
  flex: 1;
  overflow-y: auto;
  border: 1px solid var(--color-border);
  border-radius: 8px;
  padding: 16px;
  background-color: var(--color-background-2);
 `;
 const TranscriptText = styled.p`
  margin-bottom: 8px;
  color: var(--color-text-1);
 `;
 const ResponseText = styled.p`
  margin-bottom: 8px;
  color: var(--color-primary);
 `;
 const UserLabel = styled.span`
  font-weight: bold;
  color: var(--color-text-1);
 `;
 const AILabel = styled.span`
  font-weight: bold;
  color: var(--color-primary);
 `;
 const ControlsContainer = styled.div`
  display: flex;
  justify-content: center;
  padding: 10px 0;
 `;
 const RecordButton = styled(Button)`
  min-width: 150px;
  transition: all 0.2s;
  &:active {
    transform: scale(0.95);
  }
 `;
 export default VoiceCallModal;
--- a/src/renderer/src/components/VoiceVisualizer.tsx
+++ b/src/renderer/src/components/VoiceVisualizer.tsx
@ -0,0 +1,97 @@
 import React, { useEffect, useRef } from 'react';
 import styled from 'styled-components';
 import { useTranslation } from 'react-i18next';
 interface Props {
  isActive: boolean;
  type: 'input' | 'output';
 }
 const VoiceVisualizer: React.FC<Props> = ({ isActive, type }) => {
  const { t } = useTranslation();
  const canvasRef = useRef<HTMLCanvasElement>(null);
  const animationRef = useRef<number | undefined>(undefined);
  useEffect(() => {
    const canvas = canvasRef.current;
    if (!canvas) return;
    const ctx = canvas.getContext('2d');
    if (!ctx) return;
    const width = canvas.width;
    const height = canvas.height;
    const drawVisualizer = () => {
      ctx.clearRect(0, 0, width, height);
      if (!isActive) {
        // 绘制静态波形
        ctx.beginPath();
        ctx.moveTo(0, height / 2);
        ctx.lineTo(width, height / 2);
        ctx.strokeStyle = type === 'input' ? 'var(--color-text-2)' : 'var(--color-primary)';
        ctx.lineWidth = 2;
        ctx.stroke();
        return;
      }
      // 绘制动态波形
      const barCount = 30;
      const barWidth = width / barCount;
      const color = type === 'input' ? 'var(--color-text-1)' : 'var(--color-primary)';
      for (let i = 0; i < barCount; i++) {
        const barHeight = Math.random() * (height / 2) + 10;
        const x = i * barWidth;
        const y = height / 2 - barHeight / 2;
        ctx.fillStyle = color;
        ctx.fillRect(x, y, barWidth - 2, barHeight);
      }
      animationRef.current = requestAnimationFrame(drawVisualizer);
    };
    drawVisualizer();
    return () => {
      if (animationRef.current) {
        cancelAnimationFrame(animationRef.current);
      }
    };
  }, [isActive, type]);
  return (
    <Container $type={type}>
      <Label>{type === 'input' ? t('voice_call.you') : t('voice_call.ai')}</Label>
      <Canvas ref={canvasRef} width={200} height={50} />
    </Container>
  );
 };
 const Container = styled.div<{ $type: 'input' | 'output' }>`
  display: flex;
  flex-direction: column;
  align-items: center;
  width: 45%;
  border-radius: 8px;
  padding: 10px;
  background-color: ${props =>
    props.$type === 'input'
      ? 'var(--color-background-3)'
      : 'var(--color-primary-bg)'
  };
 `;
 const Label = styled.div`
  margin-bottom: 8px;
  font-weight: bold;
 `;
 const Canvas = styled.canvas`
  width: 100%;
  height: 50px;
 `;
 export default VoiceVisualizer;
--- a/src/renderer/src/i18n/locales/en-us.json
+++ b/src/renderer/src/i18n/locales/en-us.json
@ -1,5 +1,20 @@
 {
  "translation": {
    "voice_call": {
      "title": "Voice Call",
      "start": "Start Voice Call",
      "end": "End Call",
      "mute": "Mute",
      "unmute": "Unmute",
      "pause": "Pause",
      "resume": "Resume",
      "you": "You",
      "ai": "AI",
      "press_to_talk": "Press to Talk",
      "release_to_send": "Release to Send",
      "initialization_failed": "Failed to initialize voice call",
      "error": "Voice call error"
    },
    "agents": {
      "add.button": "Add to Assistant",
      "add.knowledge_base": "Knowledge Base",
--- a/src/renderer/src/i18n/locales/zh-cn.json
+++ b/src/renderer/src/i18n/locales/zh-cn.json
@ -1,5 +1,20 @@
 {
  "translation": {
    "voice_call": {
      "title": "语音通话",
      "start": "开始语音通话",
      "end": "结束通话",
      "mute": "静音",
      "unmute": "取消静音",
      "pause": "暂停",
      "resume": "继续",
      "you": "您",
      "ai": "AI",
      "press_to_talk": "长按说话",
      "release_to_send": "松开发送",
      "initialization_failed": "初始化语音通话失败",
      "error": "语音通话出错"
    },
    "agents": {
      "add.button": "添加到助手",
      "add.knowledge_base": "知识库",
--- a/src/renderer/src/pages/home/Inputbar/Inputbar.tsx
+++ b/src/renderer/src/pages/home/Inputbar/Inputbar.tsx
@ -16,6 +16,7 @@ import {
 import ASRButton from '@renderer/components/ASRButton'
 import { QuickPanelListItem, QuickPanelView, useQuickPanel } from '@renderer/components/QuickPanel'
 import TranslateButton from '@renderer/components/TranslateButton'
 import VoiceCallButton from '@renderer/components/VoiceCallButton'
 import { isGenerateImageModel, isVisionModel, isWebSearchModel } from '@renderer/config/models'
 import db from '@renderer/databases'
 import { useAssistant } from '@renderer/hooks/useAssistant'
@ -1024,6 +1025,7 @@ const Inputbar: FC<Props> = ({ assistant: _assistant, setActiveTopic, topic }) =
                  })
                }}
              />
              <VoiceCallButton disabled={loading} />
              {loading && (
                <Tooltip placement="top" title={t('chat.input.pause')} arrow>
                  <ToolbarButton type="text" onClick={onPause} style={{ marginRight: -2, marginTop: 1 }}>
--- a/src/renderer/src/services/ASRService.ts
+++ b/src/renderer/src/services/ASRService.ts
@ -126,25 +126,36 @@ class ASRService {
          // 如果没有收到最终结果，显示处理完成消息
          window.message.success({ content: i18n.t('settings.asr.completed'), key: 'asr-processing' })
        } else if (data.message === 'reset_complete') {
          // 语音识别已重置
          console.log('[ASRService] 语音识别已强制重置')
          this.isRecording = false
          this.resultCallback = null
          // 显示重置完成消息
          window.message.info({ content: '语音识别已重置', key: 'asr-reset' })
          // 如果有回调函数，调用一次空字符串，触发按钮状态重置
          if (this.resultCallback && typeof this.resultCallback === 'function') {
            // 使用空字符串调用回调，不会影响输入框，但可以触发按钮状态重置
-            this.resultCallback('')
+            setTimeout(() => this.resultCallback(''), 100)
          }
        }
      } else if (data.type === 'result' && data.data) {
        // 处理识别结果
        console.log('[ASRService] 收到识别结果:', data.data)
        if (this.resultCallback && typeof this.resultCallback === 'function') {
-          // 只在收到最终结果时才调用回调
+          // 将所有结果都传递给回调函数，并包含isFinal状态
-          if (data.data.isFinal && data.data.text && data.data.text.trim()) {
+          if (data.data.text && data.data.text.trim()) {
-            console.log('[ASRService] 收到最终结果，调用回调函数，文本:', data.data.text)
+            if (data.data.isFinal) {
-            this.resultCallback(data.data.text)
+              console.log('[ASRService] 收到最终结果，调用回调函数，文本:', data.data.text)
-            window.message.success({ content: i18n.t('settings.asr.success'), key: 'asr-processing' })
+              this.resultCallback(data.data.text, true)
-          } else if (!data.data.isFinal) {
+              window.message.success({ content: i18n.t('settings.asr.success'), key: 'asr-processing' })
-            // 非最终结果，只输出日志，不调用回调
+            } else {
-            console.log('[ASRService] 收到中间结果，文本:', data.data.text)
+              // 非最终结果，也调用回调，但标记为非最终
              console.log('[ASRService] 收到中间结果，调用回调函数，文本:', data.data.text)
              this.resultCallback(data.data.text, false)
            }
          } else {
            console.log('[ASRService] 识别结果为空，不调用回调')
          }
@ -189,9 +200,9 @@ class ASRService {
  }
  // 存储结果回调函数
-  resultCallback: ((text: string) => void) | null = null
+  resultCallback: ((text: string, isFinal?: boolean) => void) | null = null
-  startRecording = async (onTranscribed?: (text: string) => void): Promise<void> => {
+  startRecording = async (onTranscribed?: (text: string, isFinal?: boolean) => void): Promise<void> => {
    try {
      const { asrEnabled, asrServiceType } = store.getState().settings
@ -295,7 +306,7 @@ class ASRService {
   * @param onTranscribed 转录完成后的回调函数
   * @returns Promise<void>
   */
-  stopRecording = async (onTranscribed: (text: string) => void): Promise<void> => {
+  stopRecording = async (onTranscribed: (text: string, isFinal?: boolean) => void): Promise<void> => {
    const { asrServiceType } = store.getState().settings
    // 如果是使用本地服务器
@ -318,7 +329,8 @@ class ASRService {
          // 立即调用回调函数，使按钮状态立即更新
          if (onTranscribed) {
            // 使用空字符串调用回调，不会影响输入框，但可以触发按钮状态重置
-            setTimeout(() => onTranscribed(''), 100)
+            // 传递false表示这不是最终结果，只是状态更新
            setTimeout(() => onTranscribed('', false), 100)
          }
        } else {
          throw new Error('WebSocket连接未就绪')
@ -493,14 +505,21 @@ class ASRService {
    // 如果是使用本地服务器
    if (asrServiceType === 'local') {
      if (this.isRecording) {
        // 先重置状态和回调，确保不会处理后续结果
        this.isRecording = false
        this.resultCallback = null
        // 发送停止命令
        if (this.ws && this.wsConnected) {
          this.ws.send(JSON.stringify({ type: 'stop' }))
        }
-        // 重置状态
+          // 发送一个额外的命令，要求浏览器强制重置语音识别
-        this.isRecording = false
+          setTimeout(() => {
-        this.resultCallback = null
+            if (this.ws && this.wsConnected) {
              this.ws.send(JSON.stringify({ type: 'reset' }))
            }
          }, 100)
        }
        console.log('语音识别已取消')
        window.message.info({ content: i18n.t('settings.asr.canceled'), key: 'asr-recording' })
--- a/src/renderer/src/services/VoiceCallService.ts
+++ b/src/renderer/src/services/VoiceCallService.ts
@ -0,0 +1,656 @@
 import store from '@renderer/store';
 import { fetchChatCompletion } from '@renderer/services/ApiService';
 import { getAssistantMessage, getUserMessage } from '@renderer/services/MessagesService';
 import { getDefaultAssistant } from '@renderer/services/AssistantService';
 import TTSService from '@renderer/services/TTSService';
 import ASRService from '@renderer/services/ASRService';
 // 导入类型
 import type { Message } from '@renderer/types';
 interface VoiceCallCallbacks {
  onTranscript: (text: string) => void;
  onResponse: (text: string) => void;
  onListeningStateChange: (isListening: boolean) => void;
  onSpeakingStateChange: (isSpeaking: boolean) => void;
 }
 // 为TypeScript添加SpeechRecognition类型
 declare global {
  interface Window {
    SpeechRecognition: any;
    webkitSpeechRecognition: any;
  }
 }
 class VoiceCallServiceClass {
  private recognition: any = null;
  private isCallActive = false;
  private isRecording = false; // 新增录音状态
  private isMuted = false;
  private isPaused = false;
  private callbacks: VoiceCallCallbacks | null = null;
  private _currentTranscript = '';  // 使用下划线前缀避免未使用警告
  private _accumulatedTranscript = '';  // 累积的语音识别结果
  private conversationHistory: { role: string; content: string }[] = [];
  private isProcessingResponse = false;
  private ttsService = TTSService;
  private recordingTimeout: NodeJS.Timeout | null = null; // 录音超时定时器
  async initialize() {
    // 检查麦克风权限
    try {
      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
      stream.getTracks().forEach(track => track.stop());
    } catch (error) {
      console.error('Microphone permission denied:', error);
      throw new Error('Microphone permission denied');
    }
    // 获取当前ASR服务类型
    const { asrServiceType } = store.getState().settings;
    // 如果使用浏览器ASR，检查浏览器支持
    if (asrServiceType === 'browser') {
      if (!('webkitSpeechRecognition' in window) && !('SpeechRecognition' in window)) {
        throw new Error('Speech recognition not supported in this browser');
      }
      // 初始化浏览器语音识别
      const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
      this.recognition = new SpeechRecognition();
      this.recognition.continuous = true;
      this.recognition.interimResults = true;
      this.recognition.lang = navigator.language || 'zh-CN';
    } else if (asrServiceType === 'local') {
      // 如果使用本地服务器ASR，检查连接
      try {
        // 尝试连接本地ASR服务器
        const connected = await ASRService.connectToWebSocketServer();
        if (!connected) {
          throw new Error('无法连接到语音识别服务');
        }
      } catch (error) {
        console.error('Failed to connect to ASR server:', error);
        throw new Error('Failed to connect to ASR server');
      }
    }
    return true;
  }
  async startCall(callbacks: VoiceCallCallbacks) {
    this.callbacks = callbacks;
    this.isCallActive = true;
    this.conversationHistory = [];
    // 获取当前ASR服务类型
    const { asrServiceType } = store.getState().settings;
    // 根据不同的ASR服务类型进行初始化
    if (asrServiceType === 'browser') {
      if (!this.recognition) {
        throw new Error('Browser speech recognition not initialized');
      }
      // 设置浏览器语音识别事件处理
      this.recognition.onresult = (event: any) => {
        let interimTranscript = '';
        let finalTranscript = '';
        for (let i = event.resultIndex; i < event.results.length; ++i) {
          if (event.results[i].isFinal) {
            finalTranscript += event.results[i][0].transcript;
          } else {
            interimTranscript += event.results[i][0].transcript;
          }
        }
        if (interimTranscript) {
          // 更新当前的临时识别结果
          this._currentTranscript = interimTranscript;
          // 显示累积结果 + 当前临时结果
          this.callbacks?.onTranscript(this._accumulatedTranscript + ' ' + interimTranscript);
        }
        if (finalTranscript) {
          // 将最终结果累积到总结果中
          if (this._accumulatedTranscript) {
            // 如果已经有累积的文本，添加空格再追加
            this._accumulatedTranscript += ' ' + finalTranscript;
          } else {
            // 如果是第一段文本，直接设置
            this._accumulatedTranscript = finalTranscript;
          }
          // 更新当前的识别结果
          this._currentTranscript = '';
          // 显示累积的完整结果
          this.callbacks?.onTranscript(this._accumulatedTranscript);
          // 在录音过程中只更新transcript，不触发handleUserSpeech
          // 松开按钮后才会处理完整的录音内容
        }
      };
      this.recognition.onstart = () => {
        this.isRecording = true;
        this.callbacks?.onListeningStateChange(true);
      };
      this.recognition.onend = () => {
        this.isRecording = false;
        this.callbacks?.onListeningStateChange(false);
      };
      this.recognition.onerror = (event: any) => {
        console.error('Speech recognition error', event.error);
        this.isRecording = false;
        this.callbacks?.onListeningStateChange(false);
      };
    }
    // 播放欢迎语音
    const welcomeMessage = '您好，我是您的AI助手，请长按说话按钮进行对话。';
    this.callbacks?.onResponse(welcomeMessage);
    // 监听TTS状态
    const ttsStateHandler = (isPlaying: boolean) => {
      this.callbacks?.onSpeakingStateChange(isPlaying);
    };
    // 监听TTS播放状态
    window.addEventListener('tts-state-change', (event: any) => {
      ttsStateHandler(event.detail.isPlaying);
    });
    // 播放欢迎语音，并手动设置初始状态
    this.callbacks?.onSpeakingStateChange(true);
    this.ttsService.speak(welcomeMessage);
    // 确保欢迎语音结束后状态正确
    setTimeout(() => {
      if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) {
        this.callbacks?.onSpeakingStateChange(false);
      }
    }, 5000); // 5秒后检查TTS状态
    return true;
  }
  /**
   * 开始录音
   * @returns Promise<boolean> 是否成功开始录音
   */
  async startRecording(): Promise<boolean> {
    if (!this.isCallActive || this.isPaused || this.isProcessingResponse || this.isRecording) {
      return false;
    }
    // 重置累积的文本
    this._accumulatedTranscript = '';
    // 获取当前ASR服务类型
    const { asrServiceType } = store.getState().settings;
    try {
      if (asrServiceType === 'browser') {
        // 浏览器ASR
        if (!this.recognition) {
          throw new Error('Browser speech recognition not initialized');
        }
        this.recognition.start();
        this.isRecording = true;
      } else if (asrServiceType === 'local') {
        // 本地服务器ASR
        await ASRService.startRecording((text, isFinal) => {
          if (text) {
            if (isFinal) {
              // 如果是最终结果，累积到总结果中
              if (this._accumulatedTranscript) {
                // 如果已经有累积的文本，添加空格再追加
                this._accumulatedTranscript += ' ' + text;
              } else {
                // 如果是第一段文本，直接设置
                this._accumulatedTranscript = text;
              }
              // 更新当前的识别结果
              this._currentTranscript = '';
              // 显示累积的完整结果
              this.callbacks?.onTranscript(this._accumulatedTranscript);
            } else {
              // 如果是临时结果，更新当前的识别结果
              this._currentTranscript = text;
              // 显示累积结果 + 当前临时结果
              this.callbacks?.onTranscript(this._accumulatedTranscript + ' ' + text);
            }
            // 在录音过程中只更新transcript，不触发handleUserSpeech
            // 松开按钮后才会处理完整的录音内容
          }
        });
        this.isRecording = true;
        this.callbacks?.onListeningStateChange(true);
      } else if (asrServiceType === 'openai') {
        // OpenAI ASR
        await ASRService.startRecording();
        this.isRecording = true;
        this.callbacks?.onListeningStateChange(true);
      }
      // 设置最长录音时间，防止用户忘记松开
      this.recordingTimeout = setTimeout(() => {
        if (this.isRecording) {
          this.stopRecording();
        }
      }, 60000); // 60秒最长录音时间
      return true;
    } catch (error) {
      console.error('Failed to start recording:', error);
      this.isRecording = false;
      this.callbacks?.onListeningStateChange(false);
      return false;
    }
  }
  /**
   * 停止录音并处理结果，将录音内容发送给AI
   * @returns Promise<boolean> 是否成功停止录音
   */
  async stopRecording(): Promise<boolean> {
    if (!this.isCallActive || !this.isRecording) {
      return false;
    }
    // 清除录音超时定时器
    if (this.recordingTimeout) {
      clearTimeout(this.recordingTimeout);
      this.recordingTimeout = null;
    }
    // 获取当前ASR服务类型
    const { asrServiceType } = store.getState().settings;
    try {
      // 存储当前的语音识别结果，用于松开按钮后发送给AI
      const currentTranscript = this._currentTranscript;
      // 存储累积的语音识别结果
      const accumulatedTranscript = this._accumulatedTranscript;
      if (asrServiceType === 'browser') {
        // 浏览器ASR
        if (!this.recognition) {
          throw new Error('Browser speech recognition not initialized');
        }
        this.recognition.stop();
        // onend事件将设置isRecording = false
        this.isRecording = false;
        this.callbacks?.onListeningStateChange(false);
        // 优先使用累积的文本，如果有的话
        if (accumulatedTranscript) {
          console.log('发送累积的语音识别结果给AI:', accumulatedTranscript);
          this.handleUserSpeech(accumulatedTranscript);
        } else if (currentTranscript) {
          // 如果没有累积结果，使用当前结果
          console.log('没有累积结果，使用当前结果:', currentTranscript);
          this.handleUserSpeech(currentTranscript);
        }
      } else if (asrServiceType === 'local') {
        // 本地服务器ASR
        // 创建一个承诺，等待最终结果
        const finalResultPromise = new Promise<string>((resolve) => {
          // 设置一个超时器，确保不会无限等待
          const timeoutId = setTimeout(() => {
            console.log('等待最终结果超时，使用当前结果');
            resolve(this._currentTranscript);
          }, 1500); // 1.5秒超时
          // 设置回调函数来接收最终结果
          const resultCallback = (text: string) => {
            // 如果是空字符串，表示只是重置状态，不处理
            if (text === '') return;
            if (text) {
              clearTimeout(timeoutId);
              console.log('收到最终语音识别结果:', text);
              this._currentTranscript = text;
              this.callbacks?.onTranscript(text);
              resolve(text);
            }
          };
          // 停止录音，但不取消，以获取最终结果
          ASRService.stopRecording(resultCallback);
          this.isRecording = false;
          this.callbacks?.onListeningStateChange(false);
          // 添加额外的安全措施，在停止后立即发送重置命令
          setTimeout(() => {
            // 发送重置命令，确保浏览器不会继续发送结果
            ASRService.cancelRecording();
          }, 2000); // 2秒后强制取消，作为安全措施
        });
        // 等待最终结果
        const finalText = await finalResultPromise;
        // 优先使用累积的文本，如果有的话
        if (accumulatedTranscript) {
          console.log('发送累积的语音识别结果给AI:', accumulatedTranscript);
          this.handleUserSpeech(accumulatedTranscript);
        } else if (finalText) {
          // 如果没有累积结果，使用最终结果
          console.log('发送最终语音识别结果给AI:', finalText);
          this.handleUserSpeech(finalText);
        } else if (currentTranscript) {
          // 如果没有最终结果，使用当前结果
          console.log('没有最终结果，使用当前结果:', currentTranscript);
          this.handleUserSpeech(currentTranscript);
        }
      } else if (asrServiceType === 'openai') {
        // OpenAI ASR
        await ASRService.stopRecording((text) => {
          // 更新最终的语音识别结果
          if (text) {
            this._currentTranscript = text;
            this.callbacks?.onTranscript(text);
          }
        });
        this.isRecording = false;
        this.callbacks?.onListeningStateChange(false);
        // 使用最新的语音识别结果
        const finalTranscript = this._currentTranscript;
        if (finalTranscript) {
          this.handleUserSpeech(finalTranscript);
        }
      }
      return true;
    } catch (error) {
      console.error('Failed to stop recording:', error);
      this.isRecording = false;
      this.callbacks?.onListeningStateChange(false);
      return false;
    }
  }
  async handleUserSpeech(text: string) {
    if (!this.isCallActive || this.isProcessingResponse || this.isPaused) return;
    // 暂停语音识别，避免在AI回复时继续识别
    const { asrServiceType } = store.getState().settings;
    if (asrServiceType === 'browser') {
      this.recognition?.stop();
    } else if (asrServiceType === 'local' || asrServiceType === 'openai') {
      ASRService.cancelRecording();
    }
    this.isProcessingResponse = true;
    try {
      // 获取当前助手
      const assistant = getDefaultAssistant();
      // 创建一个简单的Topic对象
      const topic = {
        id: 'voice-call',
        assistantId: assistant.id,
        name: 'Voice Call',
        createdAt: new Date().toISOString(),
        updatedAt: new Date().toISOString(),
        messages: []
      };
      // 创建用户消息
      const userMessage = getUserMessage({
        assistant,
        topic,
        type: 'text',
        content: text
      });
      // 创建助手消息
      const assistantMessage = getAssistantMessage({
        assistant,
        topic
      });
      // 更新对话历史
      this.conversationHistory.push({ role: 'user', content: text });
      // 构建消息列表
      // 将历史消息转换为正确的Message对象
      const historyMessages = this.conversationHistory.map(msg => {
        if (msg.role === 'user') {
          return getUserMessage({
            assistant,
            topic,
            type: 'text',
            content: msg.content
          });
        } else {
          const assistantMsg = getAssistantMessage({
            assistant,
            topic
          });
          return { ...assistantMsg, content: msg.content, status: 'success' };
        }
      });
      // 修改用户消息，添加语音通话提示
      const voiceCallPrompt = `当前是语音通话模式。请注意：
 1. 简洁直接地回答问题，避免冗长的引导和总结。
 2. 避免使用复杂的格式化内容，如表格、代码块、Markdown等。
 3. 使用自然、口语化的表达方式，就像与人对话一样。
 4. 如果需要列出要点，使用简单的数字或文字标记，而不是复杂的格式。
 5. 回答应该简短有力，便于用户通过语音理解。
 6. 避免使用特殊符号、表情符号、标点符号等，因为这些在语音播放时会影响理解。
 7. 使用完整的句子而非简单的关键词列表。
 8. 尽量使用常见词汇，避免生僻或专业术语，除非用户特别询问。`;
      // 创建系统指令消息
      const systemMessage = getUserMessage({
        assistant,
        topic,
        type: 'text',
        content: voiceCallPrompt
      });
      // 修改用户消息的内容
      userMessage.content = text;
      // 构建最终消息列表
      // 使用类型断言解决类型问题
      const messages = [systemMessage, ...historyMessages, userMessage] as Message[];
      // 流式响应处理
      let fullResponse = '';
      try {
        // 调用真实的LLM API
        await fetchChatCompletion({
          message: assistantMessage,
          messages,
          assistant,
          onResponse: async (msg) => {
            if (msg.content && msg.content !== fullResponse) {
              fullResponse = msg.content;
              // 更新UI
              this.callbacks?.onResponse(fullResponse);
              // 如果TTS正在播放，停止它
              if (this.ttsService.isCurrentlyPlaying()) {
                this.ttsService.stop();
              }
            }
          }
        });
        // 播放完整响应
        if (!this.isMuted && this.isCallActive) {
          // 手动设置语音状态
          this.callbacks?.onSpeakingStateChange(true);
          this.ttsService.speak(fullResponse);
          // 确保语音结束后状态正确
          setTimeout(() => {
            if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) {
              this.callbacks?.onSpeakingStateChange(false);
            }
          }, 1000); // 1秒后检查TTS状态
        }
        // 更新对话历史
        this.conversationHistory.push({ role: 'assistant', content: fullResponse });
      } catch (innerError) {
        console.error('Error generating response:', innerError);
        // 如果出错，使用一个简单的回复
        fullResponse = `抱歉，处理您的请求时出错了。`;
        this.callbacks?.onResponse(fullResponse);
        if (!this.isMuted && this.isCallActive) {
          // 手动设置语音状态
          this.callbacks?.onSpeakingStateChange(true);
          this.ttsService.speak(fullResponse);
          // 确保语音结束后状态正确
          setTimeout(() => {
            if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) {
              this.callbacks?.onSpeakingStateChange(false);
            }
          }, 1000); // 1秒后检查TTS状态
        }
      }
    } catch (error) {
      console.error('Error processing voice response:', error);
    } finally {
      this.isProcessingResponse = false;
      // 不自动恢复语音识别，等待用户长按按钮
      // 长按说话模式下，我们不需要自动恢复语音识别
    }
  }
  /**
   * 取消录音，不发送给AI
   * @returns Promise<boolean> 是否成功取消录音
   */
  async cancelRecording(): Promise<boolean> {
    if (!this.isCallActive || !this.isRecording) {
      return false;
    }
    // 清除录音超时定时器
    if (this.recordingTimeout) {
      clearTimeout(this.recordingTimeout);
      this.recordingTimeout = null;
    }
    // 获取当前ASR服务类型
    const { asrServiceType } = store.getState().settings;
    try {
      if (asrServiceType === 'browser') {
        // 浏览器ASR
        if (!this.recognition) {
          throw new Error('Browser speech recognition not initialized');
        }
        this.recognition.stop();
        this.isRecording = false;
        this.callbacks?.onListeningStateChange(false);
      } else if (asrServiceType === 'local') {
        // 本地服务器ASR
        ASRService.cancelRecording();
        this.isRecording = false;
        this.callbacks?.onListeningStateChange(false);
      } else if (asrServiceType === 'openai') {
        // OpenAI ASR
        ASRService.cancelRecording();
        this.isRecording = false;
        this.callbacks?.onListeningStateChange(false);
      }
      // 清除当前识别结果
      this._currentTranscript = '';
      this.callbacks?.onTranscript('');
      return true;
    } catch (error) {
      console.error('Failed to cancel recording:', error);
      this.isRecording = false;
      this.callbacks?.onListeningStateChange(false);
      return false;
    }
  }
  setMuted(muted: boolean) {
    this.isMuted = muted;
    // 如果设置为静音，停止当前TTS播放
    if (muted && this.ttsService.isCurrentlyPlaying()) {
      this.ttsService.stop();
    }
  }
  setPaused(paused: boolean) {
    this.isPaused = paused;
    // 获取当前ASR服务类型
    const { asrServiceType } = store.getState().settings;
    if (paused) {
      // 暂停语音识别
      if (asrServiceType === 'browser') {
        this.recognition?.stop();
      } else if (asrServiceType === 'local' || asrServiceType === 'openai') {
        ASRService.cancelRecording();
      }
      // 暂停TTS
      if (this.ttsService.isCurrentlyPlaying()) {
        this.ttsService.stop();
      }
    }
    // 不自动恢复语音识别，等待用户长按按钮
  }
  endCall() {
    this.isCallActive = false;
    // 获取当前ASR服务类型
    const { asrServiceType } = store.getState().settings;
    // 停止语音识别
    if (asrServiceType === 'browser') {
      this.recognition?.stop();
    } else if (asrServiceType === 'local' || asrServiceType === 'openai') {
      ASRService.cancelRecording();
    }
    // 停止TTS
    if (this.ttsService.isCurrentlyPlaying()) {
      this.ttsService.stop();
    }
    this.callbacks = null;
  }
 }
 export const VoiceCallService = new VoiceCallServiceClass();