添加了 语音通话功能 相关服务并更新了设置

This commit is contained in:
1600822305 2025-04-11 03:37:16 +08:00
parent f6cc733421
commit a86b4ba404
10 changed files with 1173 additions and 17 deletions

View File

@ -64,6 +64,9 @@
startRecognition();
} else if (data.type === 'stop') {
stopRecognition();
} else if (data.type === 'reset') {
// 强制重置语音识别
forceResetRecognition();
} else {
console.warn('[Browser Page] Received unknown command type:', data.type);
}
@ -362,6 +365,30 @@
updateStatus("识别未运行。");
}
}
function forceResetRecognition() {
console.log('[Browser Page] Force resetting recognition...');
updateStatus("强制重置语音识别...");
// 先尝试停止当前的识别
if (recognition) {
try {
recognition.stop();
} catch (e) {
console.error('[Browser Page] Error stopping recognition during reset:', e);
}
}
// 强制设置为null丢弃所有后续结果
recognition = null;
// 通知服务器已重置
if (ws.readyState === WebSocket.OPEN) {
ws.send(JSON.stringify({ type: 'status', message: 'reset_complete' }));
}
updateStatus("语音识别已重置,等待新指令。");
}
</script>
</body>

View File

@ -124,6 +124,13 @@ wss.on('connection', (ws) => {
} else {
console.log('[Server] Cannot relay STOP: Browser not connected')
}
} else if (data.type === 'reset' && ws === electronConnection) {
if (browserConnection && browserConnection.readyState === WebSocket.OPEN) {
console.log('[Server] Relaying RESET command to browser')
browserConnection.send(JSON.stringify({ type: 'reset' }))
} else {
console.log('[Server] Cannot relay RESET: Browser not connected')
}
}
// 浏览器发送识别结果
else if (data.type === 'result' && ws === browserConnection) {

View File

@ -0,0 +1,55 @@
import React, { useState } from 'react';
import { Button, Tooltip } from 'antd';
import { PhoneOutlined, LoadingOutlined } from '@ant-design/icons';
import { useTranslation } from 'react-i18next';
import VoiceCallModal from './VoiceCallModal';
import { VoiceCallService } from '../services/VoiceCallService';
interface Props {
disabled?: boolean;
style?: React.CSSProperties;
}
const VoiceCallButton: React.FC<Props> = ({ disabled = false, style }) => {
const { t } = useTranslation();
const [isModalVisible, setIsModalVisible] = useState(false);
const [isLoading, setIsLoading] = useState(false);
const handleClick = async () => {
if (disabled || isLoading) return;
setIsLoading(true);
try {
// 初始化语音服务
await VoiceCallService.initialize();
setIsModalVisible(true);
} catch (error) {
console.error('Failed to initialize voice call:', error);
window.message.error(t('voice_call.initialization_failed'));
} finally {
setIsLoading(false);
}
};
return (
<>
<Tooltip title={t('voice_call.start')}>
<Button
type="text"
icon={isLoading ? <LoadingOutlined /> : <PhoneOutlined />}
onClick={handleClick}
disabled={disabled || isLoading}
style={style}
/>
</Tooltip>
{isModalVisible && (
<VoiceCallModal
visible={isModalVisible}
onClose={() => setIsModalVisible(false)}
/>
)}
</>
);
};
export default VoiceCallButton;

View File

@ -0,0 +1,263 @@
import React, { useEffect, useState } from 'react';
import { Modal, Button, Space, Tooltip } from 'antd';
import {
AudioMutedOutlined,
AudioOutlined,
CloseOutlined,
PauseCircleOutlined,
PlayCircleOutlined,
SoundOutlined
} from '@ant-design/icons';
import styled from 'styled-components';
import { useTranslation } from 'react-i18next';
import VoiceVisualizer from './VoiceVisualizer';
import { VoiceCallService } from '../services/VoiceCallService';
interface Props {
visible: boolean;
onClose: () => void;
}
const VoiceCallModal: React.FC<Props> = ({ visible, onClose }) => {
const { t } = useTranslation();
const [isMuted, setIsMuted] = useState(false);
const [isPaused, setIsPaused] = useState(false);
const [transcript, setTranscript] = useState('');
const [response, setResponse] = useState('');
const [isListening, setIsListening] = useState(false);
const [isSpeaking, setIsSpeaking] = useState(false);
const [isRecording, setIsRecording] = useState(false);
const [isProcessing, setIsProcessing] = useState(false);
useEffect(() => {
const startVoiceCall = async () => {
try {
await VoiceCallService.startCall({
onTranscript: (text) => setTranscript(text),
onResponse: (text) => setResponse(text),
onListeningStateChange: setIsListening,
onSpeakingStateChange: setIsSpeaking,
});
} catch (error) {
console.error('Voice call error:', error);
window.message.error(t('voice_call.error'));
handleClose();
}
};
if (visible) {
startVoiceCall();
}
return () => {
VoiceCallService.endCall();
};
}, [visible, t]);
const handleClose = () => {
VoiceCallService.endCall();
onClose();
};
const toggleMute = () => {
const newMuteState = !isMuted;
setIsMuted(newMuteState);
VoiceCallService.setMuted(newMuteState);
};
const togglePause = () => {
const newPauseState = !isPaused;
setIsPaused(newPauseState);
VoiceCallService.setPaused(newPauseState);
};
// 长按说话相关处理
const handleRecordStart = async (e: React.MouseEvent | React.TouchEvent) => {
e.preventDefault(); // 防止触摸事件的默认行为
if (isProcessing || isPaused) return;
setIsRecording(true);
await VoiceCallService.startRecording();
};
const handleRecordEnd = async (e: React.MouseEvent | React.TouchEvent) => {
e.preventDefault(); // 防止触摸事件的默认行为
if (!isRecording) return;
// 立即更新UI状态
setIsRecording(false);
setIsProcessing(true);
// 确保录音完全停止
try {
await VoiceCallService.stopRecording();
console.log('录音已停止');
} catch (error) {
console.error('停止录音出错:', error);
}
// 处理结果会通过回调函数返回,不需要在这里处理
setTimeout(() => {
setIsProcessing(false);
}, 500); // 添加短暂延迟,防止用户立即再次点击
};
// 处理鼠标/触摸离开按钮的情况
const handleRecordCancel = async (e: React.MouseEvent | React.TouchEvent) => {
e.preventDefault();
if (isRecording) {
// 立即更新UI状态
setIsRecording(false);
setIsProcessing(true);
// 取消录音不发送给AI
try {
await VoiceCallService.cancelRecording();
console.log('录音已取消');
} catch (error) {
console.error('取消录音出错:', error);
}
setTimeout(() => {
setIsProcessing(false);
}, 500);
}
};
return (
<Modal
title={t('voice_call.title')}
open={visible}
onCancel={handleClose}
footer={null}
width={500}
centered
maskClosable={false}
>
<Container>
<VisualizerContainer>
<VoiceVisualizer isActive={isListening || isRecording} type="input" />
<VoiceVisualizer isActive={isSpeaking} type="output" />
</VisualizerContainer>
<TranscriptContainer>
{transcript && (
<TranscriptText>
<UserLabel>{t('voice_call.you')}:</UserLabel> {transcript}
</TranscriptText>
)}
{response && (
<ResponseText>
<AILabel>{t('voice_call.ai')}:</AILabel> {response}
</ResponseText>
)}
</TranscriptContainer>
<ControlsContainer>
<Space>
<Button
type="text"
icon={isMuted ? <AudioMutedOutlined /> : <AudioOutlined />}
onClick={toggleMute}
size="large"
title={isMuted ? t('voice_call.unmute') : t('voice_call.mute')}
/>
<Button
type="text"
icon={isPaused ? <PlayCircleOutlined /> : <PauseCircleOutlined />}
onClick={togglePause}
size="large"
title={isPaused ? t('voice_call.resume') : t('voice_call.pause')}
/>
<Tooltip title={t('voice_call.press_to_talk')}>
<RecordButton
type={isRecording ? "primary" : "default"}
icon={<SoundOutlined />}
onMouseDown={handleRecordStart}
onMouseUp={handleRecordEnd}
onMouseLeave={handleRecordCancel}
onTouchStart={handleRecordStart}
onTouchEnd={handleRecordEnd}
onTouchCancel={handleRecordCancel}
size="large"
disabled={isProcessing || isPaused}
>
{isRecording ? t('voice_call.release_to_send') : t('voice_call.press_to_talk')}
</RecordButton>
</Tooltip>
<Button
type="primary"
icon={<CloseOutlined />}
onClick={handleClose}
danger
size="large"
title={t('voice_call.end')}
/>
</Space>
</ControlsContainer>
</Container>
</Modal>
);
};
const Container = styled.div`
display: flex;
flex-direction: column;
gap: 20px;
height: 400px;
`;
const VisualizerContainer = styled.div`
display: flex;
justify-content: space-between;
height: 100px;
`;
const TranscriptContainer = styled.div`
flex: 1;
overflow-y: auto;
border: 1px solid var(--color-border);
border-radius: 8px;
padding: 16px;
background-color: var(--color-background-2);
`;
const TranscriptText = styled.p`
margin-bottom: 8px;
color: var(--color-text-1);
`;
const ResponseText = styled.p`
margin-bottom: 8px;
color: var(--color-primary);
`;
const UserLabel = styled.span`
font-weight: bold;
color: var(--color-text-1);
`;
const AILabel = styled.span`
font-weight: bold;
color: var(--color-primary);
`;
const ControlsContainer = styled.div`
display: flex;
justify-content: center;
padding: 10px 0;
`;
const RecordButton = styled(Button)`
min-width: 150px;
transition: all 0.2s;
&:active {
transform: scale(0.95);
}
`;
export default VoiceCallModal;

View File

@ -0,0 +1,97 @@
import React, { useEffect, useRef } from 'react';
import styled from 'styled-components';
import { useTranslation } from 'react-i18next';
interface Props {
isActive: boolean;
type: 'input' | 'output';
}
const VoiceVisualizer: React.FC<Props> = ({ isActive, type }) => {
const { t } = useTranslation();
const canvasRef = useRef<HTMLCanvasElement>(null);
const animationRef = useRef<number | undefined>(undefined);
useEffect(() => {
const canvas = canvasRef.current;
if (!canvas) return;
const ctx = canvas.getContext('2d');
if (!ctx) return;
const width = canvas.width;
const height = canvas.height;
const drawVisualizer = () => {
ctx.clearRect(0, 0, width, height);
if (!isActive) {
// 绘制静态波形
ctx.beginPath();
ctx.moveTo(0, height / 2);
ctx.lineTo(width, height / 2);
ctx.strokeStyle = type === 'input' ? 'var(--color-text-2)' : 'var(--color-primary)';
ctx.lineWidth = 2;
ctx.stroke();
return;
}
// 绘制动态波形
const barCount = 30;
const barWidth = width / barCount;
const color = type === 'input' ? 'var(--color-text-1)' : 'var(--color-primary)';
for (let i = 0; i < barCount; i++) {
const barHeight = Math.random() * (height / 2) + 10;
const x = i * barWidth;
const y = height / 2 - barHeight / 2;
ctx.fillStyle = color;
ctx.fillRect(x, y, barWidth - 2, barHeight);
}
animationRef.current = requestAnimationFrame(drawVisualizer);
};
drawVisualizer();
return () => {
if (animationRef.current) {
cancelAnimationFrame(animationRef.current);
}
};
}, [isActive, type]);
return (
<Container $type={type}>
<Label>{type === 'input' ? t('voice_call.you') : t('voice_call.ai')}</Label>
<Canvas ref={canvasRef} width={200} height={50} />
</Container>
);
};
const Container = styled.div<{ $type: 'input' | 'output' }>`
display: flex;
flex-direction: column;
align-items: center;
width: 45%;
border-radius: 8px;
padding: 10px;
background-color: ${props =>
props.$type === 'input'
? 'var(--color-background-3)'
: 'var(--color-primary-bg)'
};
`;
const Label = styled.div`
margin-bottom: 8px;
font-weight: bold;
`;
const Canvas = styled.canvas`
width: 100%;
height: 50px;
`;
export default VoiceVisualizer;

View File

@ -1,5 +1,20 @@
{
"translation": {
"voice_call": {
"title": "Voice Call",
"start": "Start Voice Call",
"end": "End Call",
"mute": "Mute",
"unmute": "Unmute",
"pause": "Pause",
"resume": "Resume",
"you": "You",
"ai": "AI",
"press_to_talk": "Press to Talk",
"release_to_send": "Release to Send",
"initialization_failed": "Failed to initialize voice call",
"error": "Voice call error"
},
"agents": {
"add.button": "Add to Assistant",
"add.knowledge_base": "Knowledge Base",

View File

@ -1,5 +1,20 @@
{
"translation": {
"voice_call": {
"title": "语音通话",
"start": "开始语音通话",
"end": "结束通话",
"mute": "静音",
"unmute": "取消静音",
"pause": "暂停",
"resume": "继续",
"you": "您",
"ai": "AI",
"press_to_talk": "长按说话",
"release_to_send": "松开发送",
"initialization_failed": "初始化语音通话失败",
"error": "语音通话出错"
},
"agents": {
"add.button": "添加到助手",
"add.knowledge_base": "知识库",

View File

@ -16,6 +16,7 @@ import {
import ASRButton from '@renderer/components/ASRButton'
import { QuickPanelListItem, QuickPanelView, useQuickPanel } from '@renderer/components/QuickPanel'
import TranslateButton from '@renderer/components/TranslateButton'
import VoiceCallButton from '@renderer/components/VoiceCallButton'
import { isGenerateImageModel, isVisionModel, isWebSearchModel } from '@renderer/config/models'
import db from '@renderer/databases'
import { useAssistant } from '@renderer/hooks/useAssistant'
@ -1024,6 +1025,7 @@ const Inputbar: FC<Props> = ({ assistant: _assistant, setActiveTopic, topic }) =
})
}}
/>
<VoiceCallButton disabled={loading} />
{loading && (
<Tooltip placement="top" title={t('chat.input.pause')} arrow>
<ToolbarButton type="text" onClick={onPause} style={{ marginRight: -2, marginTop: 1 }}>

View File

@ -126,25 +126,36 @@ class ASRService {
// 如果没有收到最终结果,显示处理完成消息
window.message.success({ content: i18n.t('settings.asr.completed'), key: 'asr-processing' })
} else if (data.message === 'reset_complete') {
// 语音识别已重置
console.log('[ASRService] 语音识别已强制重置')
this.isRecording = false
this.resultCallback = null
// 显示重置完成消息
window.message.info({ content: '语音识别已重置', key: 'asr-reset' })
// 如果有回调函数,调用一次空字符串,触发按钮状态重置
if (this.resultCallback && typeof this.resultCallback === 'function') {
// 使用空字符串调用回调,不会影响输入框,但可以触发按钮状态重置
this.resultCallback('')
setTimeout(() => this.resultCallback(''), 100)
}
}
} else if (data.type === 'result' && data.data) {
// 处理识别结果
console.log('[ASRService] 收到识别结果:', data.data)
if (this.resultCallback && typeof this.resultCallback === 'function') {
// 只在收到最终结果时才调用回调
if (data.data.isFinal && data.data.text && data.data.text.trim()) {
console.log('[ASRService] 收到最终结果,调用回调函数,文本:', data.data.text)
this.resultCallback(data.data.text)
window.message.success({ content: i18n.t('settings.asr.success'), key: 'asr-processing' })
} else if (!data.data.isFinal) {
// 非最终结果,只输出日志,不调用回调
console.log('[ASRService] 收到中间结果,文本:', data.data.text)
// 将所有结果都传递给回调函数并包含isFinal状态
if (data.data.text && data.data.text.trim()) {
if (data.data.isFinal) {
console.log('[ASRService] 收到最终结果,调用回调函数,文本:', data.data.text)
this.resultCallback(data.data.text, true)
window.message.success({ content: i18n.t('settings.asr.success'), key: 'asr-processing' })
} else {
// 非最终结果,也调用回调,但标记为非最终
console.log('[ASRService] 收到中间结果,调用回调函数,文本:', data.data.text)
this.resultCallback(data.data.text, false)
}
} else {
console.log('[ASRService] 识别结果为空,不调用回调')
}
@ -189,9 +200,9 @@ class ASRService {
}
// 存储结果回调函数
resultCallback: ((text: string) => void) | null = null
resultCallback: ((text: string, isFinal?: boolean) => void) | null = null
startRecording = async (onTranscribed?: (text: string) => void): Promise<void> => {
startRecording = async (onTranscribed?: (text: string, isFinal?: boolean) => void): Promise<void> => {
try {
const { asrEnabled, asrServiceType } = store.getState().settings
@ -295,7 +306,7 @@ class ASRService {
* @param onTranscribed
* @returns Promise<void>
*/
stopRecording = async (onTranscribed: (text: string) => void): Promise<void> => {
stopRecording = async (onTranscribed: (text: string, isFinal?: boolean) => void): Promise<void> => {
const { asrServiceType } = store.getState().settings
// 如果是使用本地服务器
@ -318,7 +329,8 @@ class ASRService {
// 立即调用回调函数,使按钮状态立即更新
if (onTranscribed) {
// 使用空字符串调用回调,不会影响输入框,但可以触发按钮状态重置
setTimeout(() => onTranscribed(''), 100)
// 传递false表示这不是最终结果只是状态更新
setTimeout(() => onTranscribed('', false), 100)
}
} else {
throw new Error('WebSocket连接未就绪')
@ -493,14 +505,21 @@ class ASRService {
// 如果是使用本地服务器
if (asrServiceType === 'local') {
if (this.isRecording) {
// 先重置状态和回调,确保不会处理后续结果
this.isRecording = false
this.resultCallback = null
// 发送停止命令
if (this.ws && this.wsConnected) {
this.ws.send(JSON.stringify({ type: 'stop' }))
}
// 重置状态
this.isRecording = false
this.resultCallback = null
// 发送一个额外的命令,要求浏览器强制重置语音识别
setTimeout(() => {
if (this.ws && this.wsConnected) {
this.ws.send(JSON.stringify({ type: 'reset' }))
}
}, 100)
}
console.log('语音识别已取消')
window.message.info({ content: i18n.t('settings.asr.canceled'), key: 'asr-recording' })

View File

@ -0,0 +1,656 @@
import store from '@renderer/store';
import { fetchChatCompletion } from '@renderer/services/ApiService';
import { getAssistantMessage, getUserMessage } from '@renderer/services/MessagesService';
import { getDefaultAssistant } from '@renderer/services/AssistantService';
import TTSService from '@renderer/services/TTSService';
import ASRService from '@renderer/services/ASRService';
// 导入类型
import type { Message } from '@renderer/types';
interface VoiceCallCallbacks {
onTranscript: (text: string) => void;
onResponse: (text: string) => void;
onListeningStateChange: (isListening: boolean) => void;
onSpeakingStateChange: (isSpeaking: boolean) => void;
}
// 为TypeScript添加SpeechRecognition类型
declare global {
interface Window {
SpeechRecognition: any;
webkitSpeechRecognition: any;
}
}
class VoiceCallServiceClass {
private recognition: any = null;
private isCallActive = false;
private isRecording = false; // 新增录音状态
private isMuted = false;
private isPaused = false;
private callbacks: VoiceCallCallbacks | null = null;
private _currentTranscript = ''; // 使用下划线前缀避免未使用警告
private _accumulatedTranscript = ''; // 累积的语音识别结果
private conversationHistory: { role: string; content: string }[] = [];
private isProcessingResponse = false;
private ttsService = TTSService;
private recordingTimeout: NodeJS.Timeout | null = null; // 录音超时定时器
async initialize() {
// 检查麦克风权限
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
stream.getTracks().forEach(track => track.stop());
} catch (error) {
console.error('Microphone permission denied:', error);
throw new Error('Microphone permission denied');
}
// 获取当前ASR服务类型
const { asrServiceType } = store.getState().settings;
// 如果使用浏览器ASR检查浏览器支持
if (asrServiceType === 'browser') {
if (!('webkitSpeechRecognition' in window) && !('SpeechRecognition' in window)) {
throw new Error('Speech recognition not supported in this browser');
}
// 初始化浏览器语音识别
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
this.recognition = new SpeechRecognition();
this.recognition.continuous = true;
this.recognition.interimResults = true;
this.recognition.lang = navigator.language || 'zh-CN';
} else if (asrServiceType === 'local') {
// 如果使用本地服务器ASR检查连接
try {
// 尝试连接本地ASR服务器
const connected = await ASRService.connectToWebSocketServer();
if (!connected) {
throw new Error('无法连接到语音识别服务');
}
} catch (error) {
console.error('Failed to connect to ASR server:', error);
throw new Error('Failed to connect to ASR server');
}
}
return true;
}
async startCall(callbacks: VoiceCallCallbacks) {
this.callbacks = callbacks;
this.isCallActive = true;
this.conversationHistory = [];
// 获取当前ASR服务类型
const { asrServiceType } = store.getState().settings;
// 根据不同的ASR服务类型进行初始化
if (asrServiceType === 'browser') {
if (!this.recognition) {
throw new Error('Browser speech recognition not initialized');
}
// 设置浏览器语音识别事件处理
this.recognition.onresult = (event: any) => {
let interimTranscript = '';
let finalTranscript = '';
for (let i = event.resultIndex; i < event.results.length; ++i) {
if (event.results[i].isFinal) {
finalTranscript += event.results[i][0].transcript;
} else {
interimTranscript += event.results[i][0].transcript;
}
}
if (interimTranscript) {
// 更新当前的临时识别结果
this._currentTranscript = interimTranscript;
// 显示累积结果 + 当前临时结果
this.callbacks?.onTranscript(this._accumulatedTranscript + ' ' + interimTranscript);
}
if (finalTranscript) {
// 将最终结果累积到总结果中
if (this._accumulatedTranscript) {
// 如果已经有累积的文本,添加空格再追加
this._accumulatedTranscript += ' ' + finalTranscript;
} else {
// 如果是第一段文本,直接设置
this._accumulatedTranscript = finalTranscript;
}
// 更新当前的识别结果
this._currentTranscript = '';
// 显示累积的完整结果
this.callbacks?.onTranscript(this._accumulatedTranscript);
// 在录音过程中只更新transcript不触发handleUserSpeech
// 松开按钮后才会处理完整的录音内容
}
};
this.recognition.onstart = () => {
this.isRecording = true;
this.callbacks?.onListeningStateChange(true);
};
this.recognition.onend = () => {
this.isRecording = false;
this.callbacks?.onListeningStateChange(false);
};
this.recognition.onerror = (event: any) => {
console.error('Speech recognition error', event.error);
this.isRecording = false;
this.callbacks?.onListeningStateChange(false);
};
}
// 播放欢迎语音
const welcomeMessage = '您好我是您的AI助手请长按说话按钮进行对话。';
this.callbacks?.onResponse(welcomeMessage);
// 监听TTS状态
const ttsStateHandler = (isPlaying: boolean) => {
this.callbacks?.onSpeakingStateChange(isPlaying);
};
// 监听TTS播放状态
window.addEventListener('tts-state-change', (event: any) => {
ttsStateHandler(event.detail.isPlaying);
});
// 播放欢迎语音,并手动设置初始状态
this.callbacks?.onSpeakingStateChange(true);
this.ttsService.speak(welcomeMessage);
// 确保欢迎语音结束后状态正确
setTimeout(() => {
if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) {
this.callbacks?.onSpeakingStateChange(false);
}
}, 5000); // 5秒后检查TTS状态
return true;
}
/**
*
* @returns Promise<boolean>
*/
async startRecording(): Promise<boolean> {
if (!this.isCallActive || this.isPaused || this.isProcessingResponse || this.isRecording) {
return false;
}
// 重置累积的文本
this._accumulatedTranscript = '';
// 获取当前ASR服务类型
const { asrServiceType } = store.getState().settings;
try {
if (asrServiceType === 'browser') {
// 浏览器ASR
if (!this.recognition) {
throw new Error('Browser speech recognition not initialized');
}
this.recognition.start();
this.isRecording = true;
} else if (asrServiceType === 'local') {
// 本地服务器ASR
await ASRService.startRecording((text, isFinal) => {
if (text) {
if (isFinal) {
// 如果是最终结果,累积到总结果中
if (this._accumulatedTranscript) {
// 如果已经有累积的文本,添加空格再追加
this._accumulatedTranscript += ' ' + text;
} else {
// 如果是第一段文本,直接设置
this._accumulatedTranscript = text;
}
// 更新当前的识别结果
this._currentTranscript = '';
// 显示累积的完整结果
this.callbacks?.onTranscript(this._accumulatedTranscript);
} else {
// 如果是临时结果,更新当前的识别结果
this._currentTranscript = text;
// 显示累积结果 + 当前临时结果
this.callbacks?.onTranscript(this._accumulatedTranscript + ' ' + text);
}
// 在录音过程中只更新transcript不触发handleUserSpeech
// 松开按钮后才会处理完整的录音内容
}
});
this.isRecording = true;
this.callbacks?.onListeningStateChange(true);
} else if (asrServiceType === 'openai') {
// OpenAI ASR
await ASRService.startRecording();
this.isRecording = true;
this.callbacks?.onListeningStateChange(true);
}
// 设置最长录音时间,防止用户忘记松开
this.recordingTimeout = setTimeout(() => {
if (this.isRecording) {
this.stopRecording();
}
}, 60000); // 60秒最长录音时间
return true;
} catch (error) {
console.error('Failed to start recording:', error);
this.isRecording = false;
this.callbacks?.onListeningStateChange(false);
return false;
}
}
/**
* AI
* @returns Promise<boolean>
*/
async stopRecording(): Promise<boolean> {
if (!this.isCallActive || !this.isRecording) {
return false;
}
// 清除录音超时定时器
if (this.recordingTimeout) {
clearTimeout(this.recordingTimeout);
this.recordingTimeout = null;
}
// 获取当前ASR服务类型
const { asrServiceType } = store.getState().settings;
try {
// 存储当前的语音识别结果用于松开按钮后发送给AI
const currentTranscript = this._currentTranscript;
// 存储累积的语音识别结果
const accumulatedTranscript = this._accumulatedTranscript;
if (asrServiceType === 'browser') {
// 浏览器ASR
if (!this.recognition) {
throw new Error('Browser speech recognition not initialized');
}
this.recognition.stop();
// onend事件将设置isRecording = false
this.isRecording = false;
this.callbacks?.onListeningStateChange(false);
// 优先使用累积的文本,如果有的话
if (accumulatedTranscript) {
console.log('发送累积的语音识别结果给AI:', accumulatedTranscript);
this.handleUserSpeech(accumulatedTranscript);
} else if (currentTranscript) {
// 如果没有累积结果,使用当前结果
console.log('没有累积结果,使用当前结果:', currentTranscript);
this.handleUserSpeech(currentTranscript);
}
} else if (asrServiceType === 'local') {
// 本地服务器ASR
// 创建一个承诺,等待最终结果
const finalResultPromise = new Promise<string>((resolve) => {
// 设置一个超时器,确保不会无限等待
const timeoutId = setTimeout(() => {
console.log('等待最终结果超时,使用当前结果');
resolve(this._currentTranscript);
}, 1500); // 1.5秒超时
// 设置回调函数来接收最终结果
const resultCallback = (text: string) => {
// 如果是空字符串,表示只是重置状态,不处理
if (text === '') return;
if (text) {
clearTimeout(timeoutId);
console.log('收到最终语音识别结果:', text);
this._currentTranscript = text;
this.callbacks?.onTranscript(text);
resolve(text);
}
};
// 停止录音,但不取消,以获取最终结果
ASRService.stopRecording(resultCallback);
this.isRecording = false;
this.callbacks?.onListeningStateChange(false);
// 添加额外的安全措施,在停止后立即发送重置命令
setTimeout(() => {
// 发送重置命令,确保浏览器不会继续发送结果
ASRService.cancelRecording();
}, 2000); // 2秒后强制取消作为安全措施
});
// 等待最终结果
const finalText = await finalResultPromise;
// 优先使用累积的文本,如果有的话
if (accumulatedTranscript) {
console.log('发送累积的语音识别结果给AI:', accumulatedTranscript);
this.handleUserSpeech(accumulatedTranscript);
} else if (finalText) {
// 如果没有累积结果,使用最终结果
console.log('发送最终语音识别结果给AI:', finalText);
this.handleUserSpeech(finalText);
} else if (currentTranscript) {
// 如果没有最终结果,使用当前结果
console.log('没有最终结果,使用当前结果:', currentTranscript);
this.handleUserSpeech(currentTranscript);
}
} else if (asrServiceType === 'openai') {
// OpenAI ASR
await ASRService.stopRecording((text) => {
// 更新最终的语音识别结果
if (text) {
this._currentTranscript = text;
this.callbacks?.onTranscript(text);
}
});
this.isRecording = false;
this.callbacks?.onListeningStateChange(false);
// 使用最新的语音识别结果
const finalTranscript = this._currentTranscript;
if (finalTranscript) {
this.handleUserSpeech(finalTranscript);
}
}
return true;
} catch (error) {
console.error('Failed to stop recording:', error);
this.isRecording = false;
this.callbacks?.onListeningStateChange(false);
return false;
}
}
async handleUserSpeech(text: string) {
if (!this.isCallActive || this.isProcessingResponse || this.isPaused) return;
// 暂停语音识别避免在AI回复时继续识别
const { asrServiceType } = store.getState().settings;
if (asrServiceType === 'browser') {
this.recognition?.stop();
} else if (asrServiceType === 'local' || asrServiceType === 'openai') {
ASRService.cancelRecording();
}
this.isProcessingResponse = true;
try {
// 获取当前助手
const assistant = getDefaultAssistant();
// 创建一个简单的Topic对象
const topic = {
id: 'voice-call',
assistantId: assistant.id,
name: 'Voice Call',
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
messages: []
};
// 创建用户消息
const userMessage = getUserMessage({
assistant,
topic,
type: 'text',
content: text
});
// 创建助手消息
const assistantMessage = getAssistantMessage({
assistant,
topic
});
// 更新对话历史
this.conversationHistory.push({ role: 'user', content: text });
// 构建消息列表
// 将历史消息转换为正确的Message对象
const historyMessages = this.conversationHistory.map(msg => {
if (msg.role === 'user') {
return getUserMessage({
assistant,
topic,
type: 'text',
content: msg.content
});
} else {
const assistantMsg = getAssistantMessage({
assistant,
topic
});
return { ...assistantMsg, content: msg.content, status: 'success' };
}
});
// 修改用户消息,添加语音通话提示
const voiceCallPrompt = `当前是语音通话模式。请注意:
1.
2. 使Markdown等
3. 使
4. 使
5. 便
6. 使
7. 使
8. 使`;
// 创建系统指令消息
const systemMessage = getUserMessage({
assistant,
topic,
type: 'text',
content: voiceCallPrompt
});
// 修改用户消息的内容
userMessage.content = text;
// 构建最终消息列表
// 使用类型断言解决类型问题
const messages = [systemMessage, ...historyMessages, userMessage] as Message[];
// 流式响应处理
let fullResponse = '';
try {
// 调用真实的LLM API
await fetchChatCompletion({
message: assistantMessage,
messages,
assistant,
onResponse: async (msg) => {
if (msg.content && msg.content !== fullResponse) {
fullResponse = msg.content;
// 更新UI
this.callbacks?.onResponse(fullResponse);
// 如果TTS正在播放停止它
if (this.ttsService.isCurrentlyPlaying()) {
this.ttsService.stop();
}
}
}
});
// 播放完整响应
if (!this.isMuted && this.isCallActive) {
// 手动设置语音状态
this.callbacks?.onSpeakingStateChange(true);
this.ttsService.speak(fullResponse);
// 确保语音结束后状态正确
setTimeout(() => {
if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) {
this.callbacks?.onSpeakingStateChange(false);
}
}, 1000); // 1秒后检查TTS状态
}
// 更新对话历史
this.conversationHistory.push({ role: 'assistant', content: fullResponse });
} catch (innerError) {
console.error('Error generating response:', innerError);
// 如果出错,使用一个简单的回复
fullResponse = `抱歉,处理您的请求时出错了。`;
this.callbacks?.onResponse(fullResponse);
if (!this.isMuted && this.isCallActive) {
// 手动设置语音状态
this.callbacks?.onSpeakingStateChange(true);
this.ttsService.speak(fullResponse);
// 确保语音结束后状态正确
setTimeout(() => {
if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) {
this.callbacks?.onSpeakingStateChange(false);
}
}, 1000); // 1秒后检查TTS状态
}
}
} catch (error) {
console.error('Error processing voice response:', error);
} finally {
this.isProcessingResponse = false;
// 不自动恢复语音识别,等待用户长按按钮
// 长按说话模式下,我们不需要自动恢复语音识别
}
}
/**
* AI
* @returns Promise<boolean>
*/
async cancelRecording(): Promise<boolean> {
if (!this.isCallActive || !this.isRecording) {
return false;
}
// 清除录音超时定时器
if (this.recordingTimeout) {
clearTimeout(this.recordingTimeout);
this.recordingTimeout = null;
}
// 获取当前ASR服务类型
const { asrServiceType } = store.getState().settings;
try {
if (asrServiceType === 'browser') {
// 浏览器ASR
if (!this.recognition) {
throw new Error('Browser speech recognition not initialized');
}
this.recognition.stop();
this.isRecording = false;
this.callbacks?.onListeningStateChange(false);
} else if (asrServiceType === 'local') {
// 本地服务器ASR
ASRService.cancelRecording();
this.isRecording = false;
this.callbacks?.onListeningStateChange(false);
} else if (asrServiceType === 'openai') {
// OpenAI ASR
ASRService.cancelRecording();
this.isRecording = false;
this.callbacks?.onListeningStateChange(false);
}
// 清除当前识别结果
this._currentTranscript = '';
this.callbacks?.onTranscript('');
return true;
} catch (error) {
console.error('Failed to cancel recording:', error);
this.isRecording = false;
this.callbacks?.onListeningStateChange(false);
return false;
}
}
setMuted(muted: boolean) {
this.isMuted = muted;
// 如果设置为静音停止当前TTS播放
if (muted && this.ttsService.isCurrentlyPlaying()) {
this.ttsService.stop();
}
}
setPaused(paused: boolean) {
this.isPaused = paused;
// 获取当前ASR服务类型
const { asrServiceType } = store.getState().settings;
if (paused) {
// 暂停语音识别
if (asrServiceType === 'browser') {
this.recognition?.stop();
} else if (asrServiceType === 'local' || asrServiceType === 'openai') {
ASRService.cancelRecording();
}
// 暂停TTS
if (this.ttsService.isCurrentlyPlaying()) {
this.ttsService.stop();
}
}
// 不自动恢复语音识别,等待用户长按按钮
}
endCall() {
this.isCallActive = false;
// 获取当前ASR服务类型
const { asrServiceType } = store.getState().settings;
// 停止语音识别
if (asrServiceType === 'browser') {
this.recognition?.stop();
} else if (asrServiceType === 'local' || asrServiceType === 'openai') {
ASRService.cancelRecording();
}
// 停止TTS
if (this.ttsService.isCurrentlyPlaying()) {
this.ttsService.stop();
}
this.callbacks = null;
}
}
export const VoiceCallService = new VoiceCallServiceClass();