From 42ed1a48198e55bcfac943088871c684e9b1e211 Mon Sep 17 00:00:00 2001 From: 1600822305 <1600822305@qq.com> Date: Sat, 12 Apr 2025 12:04:27 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86=20TTS=20=E7=9B=B8?= =?UTF-8?q?=E5=85=B3=E6=9C=8D=E5=8A=A1=E5=B9=B6=E6=9B=B4=E6=96=B0=E4=BA=86?= =?UTF-8?q?=E8=AE=BE=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/services/MsTTSIpcHandler.ts | 53 +++++---- src/main/services/MsTTSService.ts | 2 +- src/renderer/src/components/ASRButton.tsx | 2 +- .../components/DraggableVoiceCallWindow.tsx | 108 +++++++++++------- src/renderer/src/i18n/locales/en-us.json | 7 +- src/renderer/src/i18n/locales/ja-jp.json | 10 +- src/renderer/src/i18n/locales/ru-ru.json | 10 +- src/renderer/src/i18n/locales/zh-tw.json | 10 +- .../src/pages/home/Inputbar/Inputbar.tsx | 3 +- .../src/pages/home/Messages/Message.tsx | 8 +- src/renderer/src/services/ASRService.ts | 3 +- .../src/services/tts/AudioStreamProcessor.ts | 34 +++--- src/renderer/src/services/tts/TTSService.ts | 14 +-- .../src/services/tts/TTSServiceInterface.ts | 8 +- 14 files changed, 159 insertions(+), 113 deletions(-) diff --git a/src/main/services/MsTTSIpcHandler.ts b/src/main/services/MsTTSIpcHandler.ts index 66292b09bd..95096c506a 100644 --- a/src/main/services/MsTTSIpcHandler.ts +++ b/src/main/services/MsTTSIpcHandler.ts @@ -16,32 +16,35 @@ export function registerMsTTSIpcHandlers(): void { ) // 流式合成语音 - ipcMain.handle(IpcChannel.MsTTS_SynthesizeStream, async (event, requestId: string, text: string, voice: string, outputFormat: string) => { - const window = BrowserWindow.fromWebContents(event.sender) - if (!window) return + ipcMain.handle( + IpcChannel.MsTTS_SynthesizeStream, + async (event, requestId: string, text: string, voice: string, outputFormat: string) => { + const window = BrowserWindow.fromWebContents(event.sender) + if (!window) return - try { - await MsTTSService.synthesizeStream( - text, - voice, - outputFormat, - (chunk: Uint8Array) => { - // 发送音频数据块 - if (!window.isDestroyed()) { - window.webContents.send(IpcChannel.MsTTS_StreamData, requestId, chunk) + try { + await MsTTSService.synthesizeStream( + text, + voice, + outputFormat, + (chunk: Uint8Array) => { + // 发送音频数据块 + if (!window.isDestroyed()) { + window.webContents.send(IpcChannel.MsTTS_StreamData, requestId, chunk) + } + }, + () => { + // 发送流结束信号 + if (!window.isDestroyed()) { + window.webContents.send(IpcChannel.MsTTS_StreamEnd, requestId) + } } - }, - () => { - // 发送流结束信号 - if (!window.isDestroyed()) { - window.webContents.send(IpcChannel.MsTTS_StreamEnd, requestId) - } - } - ) - return { success: true } - } catch (error) { - console.error('流式TTS合成失败:', error) - return { success: false, error: error instanceof Error ? error.message : String(error) } + ) + return { success: true } + } catch (error) { + console.error('流式TTS合成失败:', error) + return { success: false, error: error instanceof Error ? error.message : String(error) } + } } - }) + ) } diff --git a/src/main/services/MsTTSService.ts b/src/main/services/MsTTSService.ts index 2a056b80a7..b2112c8685 100644 --- a/src/main/services/MsTTSService.ts +++ b/src/main/services/MsTTSService.ts @@ -1,10 +1,10 @@ import fs from 'node:fs' import path from 'node:path' +import { MsEdgeTTS, OUTPUT_FORMAT } from 'edge-tts-node' // 新版支持流式的TTS库 import { app } from 'electron' import log from 'electron-log' import { EdgeTTS } from 'node-edge-tts' // 旧版TTS库 -import { MsEdgeTTS, OUTPUT_FORMAT } from 'edge-tts-node' // 新版支持流式的TTS库 // --- START OF HARDCODED VOICE LIST --- // WARNING: This list is static and may become outdated. diff --git a/src/renderer/src/components/ASRButton.tsx b/src/renderer/src/components/ASRButton.tsx index 3987e0ae26..ccfb05c770 100644 --- a/src/renderer/src/components/ASRButton.tsx +++ b/src/renderer/src/components/ASRButton.tsx @@ -33,7 +33,7 @@ const ASRButton: FC = ({ onTranscribed, disabled = false, style }) => { try { // 添加事件监听器,监听服务器发送的stopped消息 const originalCallback = ASRService.resultCallback - const stopCallback = (text: string, isFinal?: boolean) => { + const stopCallback = (text: string) => { // 如果是空字符串,只重置状态,不调用原始回调 if (text === '') { setIsProcessing(false) diff --git a/src/renderer/src/components/DraggableVoiceCallWindow.tsx b/src/renderer/src/components/DraggableVoiceCallWindow.tsx index beae254619..877f8698de 100644 --- a/src/renderer/src/components/DraggableVoiceCallWindow.tsx +++ b/src/renderer/src/components/DraggableVoiceCallWindow.tsx @@ -289,7 +289,9 @@ const DraggableVoiceCallWindow: React.FC = ({ } await VoiceCallService.startCall({ onTranscript: setTranscript, - onResponse: () => { /* 响应在聊天界面处理 */ }, + onResponse: () => { + /* 响应在聊天界面处理 */ + }, onListeningStateChange: setIsListening, onSpeakingStateChange: setIsSpeaking }) @@ -402,7 +404,7 @@ const DraggableVoiceCallWindow: React.FC = ({ await VoiceCallService.cancelRecording() setTranscript('') } catch (error) { - console.error('取消录音出错:', error); + console.error('取消录音出错:', error) } finally { setTimeout(() => setIsProcessing(false), 500) } @@ -412,23 +414,31 @@ const DraggableVoiceCallWindow: React.FC = ({ ) // --- 语音通话控制函数结束 --- - // --- 快捷键相关函数 --- const getKeyDisplayName = (keyCode: string) => { const keyMap: Record = { - Space: '空格键', Enter: '回车键', ShiftLeft: '左Shift键', ShiftRight: '右Shift键', - ControlLeft: '左Ctrl键', ControlRight: '右Ctrl键', AltLeft: '左Alt键', AltRight: '右Alt键' + Space: '空格键', + Enter: '回车键', + ShiftLeft: '左Shift键', + ShiftRight: '右Shift键', + ControlLeft: '左Ctrl键', + ControlRight: '右Ctrl键', + AltLeft: '左Alt键', + AltRight: '右Alt键' } return keyMap[keyCode] || keyCode } - const handleShortcutKeyChange = useCallback((e: KeyboardEvent) => { - e.preventDefault() - if (isRecordingShortcut) { - setTempShortcutKey(e.code) - setIsRecordingShortcut(false) - } - }, [isRecordingShortcut]) + const handleShortcutKeyChange = useCallback( + (e: KeyboardEvent) => { + e.preventDefault() + if (isRecordingShortcut) { + setTempShortcutKey(e.code) + setIsRecordingShortcut(false) + } + }, + [isRecordingShortcut] + ) const saveShortcutKey = useCallback(() => { setShortcutKey(tempShortcutKey) @@ -437,31 +447,42 @@ const DraggableVoiceCallWindow: React.FC = ({ }, [tempShortcutKey]) // 现在可以安全地使用 handleRecordStart/End - const handleKeyDown = useCallback((e: KeyboardEvent) => { - if (isRecordingShortcut) { - handleShortcutKeyChange(e) - return - } - if (e.code === shortcutKey && !isProcessing && !isPaused && visible && !isShortcutPressed) { - e.preventDefault() - setIsShortcutPressed(true) - const mockEvent = new MouseEvent('mousedown') as unknown as React.MouseEvent // 类型断言 - handleRecordStart(mockEvent) // 现在 handleRecordStart 已经定义 - } - }, [ - shortcutKey, isProcessing, isPaused, visible, isShortcutPressed, - handleRecordStart, // 依赖项 - isRecordingShortcut, handleShortcutKeyChange - ]) + const handleKeyDown = useCallback( + (e: KeyboardEvent) => { + if (isRecordingShortcut) { + handleShortcutKeyChange(e) + return + } + if (e.code === shortcutKey && !isProcessing && !isPaused && visible && !isShortcutPressed) { + e.preventDefault() + setIsShortcutPressed(true) + const mockEvent = new MouseEvent('mousedown') as unknown as React.MouseEvent // 类型断言 + handleRecordStart(mockEvent) // 现在 handleRecordStart 已经定义 + } + }, + [ + shortcutKey, + isProcessing, + isPaused, + visible, + isShortcutPressed, + handleRecordStart, // 依赖项 + isRecordingShortcut, + handleShortcutKeyChange + ] + ) - const handleKeyUp = useCallback((e: KeyboardEvent) => { - if (e.code === shortcutKey && isShortcutPressed && visible) { - e.preventDefault() - setIsShortcutPressed(false) - const mockEvent = new MouseEvent('mouseup') as unknown as React.MouseEvent // 类型断言 - handleRecordEnd(mockEvent) // 现在 handleRecordEnd 已经定义 - } - }, [shortcutKey, isShortcutPressed, visible, handleRecordEnd]) // 依赖项 + const handleKeyUp = useCallback( + (e: KeyboardEvent) => { + if (e.code === shortcutKey && isShortcutPressed && visible) { + e.preventDefault() + setIsShortcutPressed(false) + const mockEvent = new MouseEvent('mouseup') as unknown as React.MouseEvent // 类型断言 + handleRecordEnd(mockEvent) // 现在 handleRecordEnd 已经定义 + } + }, + [shortcutKey, isShortcutPressed, visible, handleRecordEnd] + ) // 依赖项 useEffect(() => { const savedShortcut = localStorage.getItem('voiceCallShortcutKey') @@ -483,7 +504,6 @@ const DraggableVoiceCallWindow: React.FC = ({ }, [visible, handleKeyDown, handleKeyUp]) // --- 快捷键相关函数结束 --- - // 如果不可见,直接返回 null if (!visible) return null @@ -511,10 +531,14 @@ const DraggableVoiceCallWindow: React.FC = ({ {isSettingsVisible && ( - {/* 使用 styled-component */} + + {' '} + {/* 使用 styled-component */} {t('voice_call.shortcut_key_setting')} {/* 使用 styled-component */} - setIsRecordingShortcut(true)}> {/* 使用 styled-component */} + setIsRecordingShortcut(true)}> + {' '} + {/* 使用 styled-component */} {isRecordingShortcut ? t('voice_call.press_any_key') : getKeyDisplayName(tempShortcutKey)} - {/* 使用 styled-component */} + + {' '} + {/* 使用 styled-component */} {t('voice_call.shortcut_key_tip')} @@ -579,4 +605,4 @@ const DraggableVoiceCallWindow: React.FC = ({ ) } -export default DraggableVoiceCallWindow \ No newline at end of file +export default DraggableVoiceCallWindow diff --git a/src/renderer/src/i18n/locales/en-us.json b/src/renderer/src/i18n/locales/en-us.json index bcf2d4194a..3e976dbe20 100644 --- a/src/renderer/src/i18n/locales/en-us.json +++ b/src/renderer/src/i18n/locales/en-us.json @@ -15,7 +15,12 @@ "initialization_failed": "Failed to initialize voice call", "error": "Voice call error", "initializing": "Initializing voice call...", - "ready": "Voice call ready" + "ready": "Voice call ready", + "shortcut_key_setting": "[to be translated]:语音识别快捷键设置", + "press_any_key": "[to be translated]:请按任意键...", + "save": "[to be translated]:保存", + "cancel": "[to be translated]:取消", + "shortcut_key_tip": "[to be translated]:按下此快捷键开始录音,松开快捷键结束录音并发送" }, "agents": { "add.button": "Add to Assistant", diff --git a/src/renderer/src/i18n/locales/ja-jp.json b/src/renderer/src/i18n/locales/ja-jp.json index cd8552b5b9..d5bcec3ee6 100644 --- a/src/renderer/src/i18n/locales/ja-jp.json +++ b/src/renderer/src/i18n/locales/ja-jp.json @@ -1421,8 +1421,7 @@ "mstts.voice": "[to be translated]:免费在线 TTS音色", "mstts.output_format": "[to be translated]:输出格式", "mstts.info": "[to be translated]:免费在线TTS服务不需要API密钥,完全免费使用。", - "error.no_mstts_voice": "[to be translated]:未设置免费在线 TTS音色", - "filter.emojis": "[to be translated]:过滤表情符号" + "error.no_mstts_voice": "[to be translated]:未设置免费在线 TTS音色" }, "asr": { "title": "音声認識", @@ -1544,7 +1543,12 @@ "initialization_failed": "[to be translated]:初始化语音通话失败", "error": "[to be translated]:语音通话出错", "initializing": "[to be translated]:正在初始化语音通话...", - "ready": "[to be translated]:语音通话已就绪" + "ready": "[to be translated]:语音通话已就绪", + "shortcut_key_setting": "[to be translated]:语音识别快捷键设置", + "press_any_key": "[to be translated]:请按任意键...", + "save": "[to be translated]:保存", + "cancel": "[to be translated]:取消", + "shortcut_key_tip": "[to be translated]:按下此快捷键开始录音,松开快捷键结束录音并发送" } } } \ No newline at end of file diff --git a/src/renderer/src/i18n/locales/ru-ru.json b/src/renderer/src/i18n/locales/ru-ru.json index efdee14c81..0833eb7f5c 100644 --- a/src/renderer/src/i18n/locales/ru-ru.json +++ b/src/renderer/src/i18n/locales/ru-ru.json @@ -1421,8 +1421,7 @@ "mstts.voice": "[to be translated]:免费在线 TTS音色", "mstts.output_format": "[to be translated]:输出格式", "mstts.info": "[to be translated]:免费在线TTS服务不需要API密钥,完全免费使用。", - "error.no_mstts_voice": "[to be translated]:未设置免费在线 TTS音色", - "filter.emojis": "[to be translated]:过滤表情符号" + "error.no_mstts_voice": "[to be translated]:未设置免费在线 TTS音色" }, "voice": { "title": "[to be translated]:语音功能", @@ -1544,7 +1543,12 @@ "initialization_failed": "[to be translated]:初始化语音通话失败", "error": "[to be translated]:语音通话出错", "initializing": "[to be translated]:正在初始化语音通话...", - "ready": "[to be translated]:语音通话已就绪" + "ready": "[to be translated]:语音通话已就绪", + "shortcut_key_setting": "[to be translated]:语音识别快捷键设置", + "press_any_key": "[to be translated]:请按任意键...", + "save": "[to be translated]:保存", + "cancel": "[to be translated]:取消", + "shortcut_key_tip": "[to be translated]:按下此快捷键开始录音,松开快捷键结束录音并发送" } } } \ No newline at end of file diff --git a/src/renderer/src/i18n/locales/zh-tw.json b/src/renderer/src/i18n/locales/zh-tw.json index 1039a1415e..def1809219 100644 --- a/src/renderer/src/i18n/locales/zh-tw.json +++ b/src/renderer/src/i18n/locales/zh-tw.json @@ -1421,8 +1421,7 @@ "mstts.voice": "[to be translated]:免费在线 TTS音色", "mstts.output_format": "[to be translated]:输出格式", "mstts.info": "[to be translated]:免费在线TTS服务不需要API密钥,完全免费使用。", - "error.no_mstts_voice": "[to be translated]:未设置免费在线 TTS音色", - "filter.emojis": "[to be translated]:过滤表情符号" + "error.no_mstts_voice": "[to be translated]:未设置免费在线 TTS音色" }, "voice": { "title": "[to be translated]:语音功能", @@ -1544,7 +1543,12 @@ "initialization_failed": "[to be translated]:初始化语音通话失败", "error": "[to be translated]:语音通话出错", "initializing": "[to be translated]:正在初始化语音通话...", - "ready": "[to be translated]:语音通话已就绪" + "ready": "[to be translated]:语音通话已就绪", + "shortcut_key_setting": "[to be translated]:语音识别快捷键设置", + "press_any_key": "[to be translated]:请按任意键...", + "save": "[to be translated]:保存", + "cancel": "[to be translated]:取消", + "shortcut_key_tip": "[to be translated]:按下此快捷键开始录音,松开快捷键结束录音并发送" } } } \ No newline at end of file diff --git a/src/renderer/src/pages/home/Inputbar/Inputbar.tsx b/src/renderer/src/pages/home/Inputbar/Inputbar.tsx index 97845a9a7a..7897ef2dda 100644 --- a/src/renderer/src/pages/home/Inputbar/Inputbar.tsx +++ b/src/renderer/src/pages/home/Inputbar/Inputbar.tsx @@ -77,7 +77,8 @@ let _files: FileType[] = [] const Inputbar: FC = ({ assistant: _assistant, setActiveTopic, topic }) => { const [text, setText] = useState(_text) - const [asrCurrentText, setAsrCurrentText] = useState('') + // 用于存储语音识别的中间结果,不直接显示在输入框中 + const [, setAsrCurrentText] = useState('') const [inputFocus, setInputFocus] = useState(false) const { assistant, addTopic, model, setModel, updateAssistant } = useAssistant(_assistant.id) const { diff --git a/src/renderer/src/pages/home/Messages/Message.tsx b/src/renderer/src/pages/home/Messages/Message.tsx index 36ddc2c4da..d565dab851 100644 --- a/src/renderer/src/pages/home/Messages/Message.tsx +++ b/src/renderer/src/pages/home/Messages/Message.tsx @@ -155,13 +155,7 @@ const MessageItem: FC = ({ useEffect(() => { // 如果是最后一条助手消息,且消息状态为成功,且不是正在生成中,且TTS已启用 // 注意:只有在语音通话窗口打开时才自动播放TTS - if ( - isLastMessage && - isAssistantMessage && - message.status === 'success' && - !generating && - ttsEnabled - ) { + if (isLastMessage && isAssistantMessage && message.status === 'success' && !generating && ttsEnabled) { // 如果语音通话窗口没有打开,则不自动播放TTS if (!isVoiceCallActive) { console.log('不自动播放TTS,因为语音通话窗口没有打开:', isVoiceCallActive) diff --git a/src/renderer/src/services/ASRService.ts b/src/renderer/src/services/ASRService.ts index 19b699bafd..5431040e55 100644 --- a/src/renderer/src/services/ASRService.ts +++ b/src/renderer/src/services/ASRService.ts @@ -170,7 +170,8 @@ class ASRService { // 直接调用回调函数 this.resultCallback(data.data.text, true) window.message.success({ content: i18n.t('settings.asr.success'), key: 'asr-processing' }) - } else if (this.isRecording) { // 只在录音中才处理中间结果 + } else if (this.isRecording) { + // 只在录音中才处理中间结果 // 非最终结果,也调用回调,但标记为非最终 console.log('[ASRService] 收到中间结果,调用回调函数,文本:', data.data.text) this.resultCallback(data.data.text, false) diff --git a/src/renderer/src/services/tts/AudioStreamProcessor.ts b/src/renderer/src/services/tts/AudioStreamProcessor.ts index be368431a7..c0b858f222 100644 --- a/src/renderer/src/services/tts/AudioStreamProcessor.ts +++ b/src/renderer/src/services/tts/AudioStreamProcessor.ts @@ -6,10 +6,10 @@ export class AudioStreamProcessor { private audioContext: AudioContext | null = null private audioQueue: Uint8Array[] = [] private isProcessing: boolean = false - + // 回调函数 public onAudioBuffer: ((buffer: AudioBuffer) => void) | null = null - + /** * 初始化音频处理器 */ @@ -19,7 +19,7 @@ export class AudioStreamProcessor { this.audioQueue = [] this.isProcessing = false } - + /** * 处理音频数据块 * @param chunk 音频数据块 @@ -28,16 +28,16 @@ export class AudioStreamProcessor { if (!this.audioContext) { throw new Error('AudioStreamProcessor not initialized') } - + // 将数据块添加到队列 this.audioQueue.push(chunk) - + // 如果没有正在处理,开始处理 if (!this.isProcessing) { this.processQueue() } } - + /** * 处理队列中的音频数据 */ @@ -46,20 +46,18 @@ export class AudioStreamProcessor { this.isProcessing = false return } - + this.isProcessing = true - + // 获取队列中的第一个数据块 const chunk = this.audioQueue.shift()! - + try { // 解码音频数据 // 将SharedArrayBuffer转换为ArrayBuffer - const arrayBuffer = chunk.buffer instanceof SharedArrayBuffer - ? new Uint8Array(chunk.buffer).buffer - : chunk.buffer - const audioBuffer = await this.audioContext.decodeAudioData(arrayBuffer) - + const arrayBuffer = chunk.buffer instanceof SharedArrayBuffer ? new Uint8Array(chunk.buffer).buffer : chunk.buffer + const audioBuffer = await this.audioContext.decodeAudioData(arrayBuffer as ArrayBuffer) + // 调用回调函数 if (this.onAudioBuffer) { this.onAudioBuffer(audioBuffer) @@ -67,20 +65,20 @@ export class AudioStreamProcessor { } catch (error) { console.error('解码音频数据失败:', error) } - + // 继续处理队列中的下一个数据块 this.processQueue() } - + /** * 完成处理 */ public async finish(): Promise { // 等待队列处理完成 while (this.audioQueue.length > 0) { - await new Promise(resolve => setTimeout(resolve, 100)) + await new Promise((resolve) => setTimeout(resolve, 100)) } - + // 关闭音频上下文 if (this.audioContext) { await this.audioContext.close() diff --git a/src/renderer/src/services/tts/TTSService.ts b/src/renderer/src/services/tts/TTSService.ts index 508ead169b..7c5f8ed34e 100644 --- a/src/renderer/src/services/tts/TTSService.ts +++ b/src/renderer/src/services/tts/TTSService.ts @@ -63,13 +63,13 @@ export class TTSService { // 只有在使用EdgeTTS且标记为正在播放时才检查 if (this.isPlaying && this.playingServiceType === 'edge') { // 检查是否还在播放 - const isSpeaking = window.speechSynthesis.speaking; + const isSpeaking = window.speechSynthesis.speaking if (!isSpeaking) { console.log('检测到speechSynthesis不再播放,更新状态') - this.updatePlayingState(false); + this.updatePlayingState(false) } } - }, 500); // 每500毫秒检查一次 + }, 500) // 每500毫秒检查一次 } } @@ -109,7 +109,7 @@ export class TTSService { private updatePlayingState(isPlaying: boolean): void { // 只有状态变化时才更新和触发事件 if (this.isPlaying !== isPlaying) { - this.isPlaying = isPlaying; + this.isPlaying = isPlaying console.log(`TTS播放状态更新: ${isPlaying ? '开始播放' : '停止播放'}`) // 触发自定义事件,通知其他组件TTS状态变化 @@ -118,11 +118,11 @@ export class TTSService { // 如果停止播放,清除服务类型 if (!isPlaying) { - this.playingServiceType = null; + this.playingServiceType = null // 确保Web Speech API也停止 if ('speechSynthesis' in window) { - window.speechSynthesis.cancel(); + window.speechSynthesis.cancel() } } } @@ -148,7 +148,7 @@ export class TTSService { if (this.isPlaying) { this.stop() // 添加短暂延迟,确保上一个播放完全停止 - await new Promise(resolve => setTimeout(resolve, 100)) + await new Promise((resolve) => setTimeout(resolve, 100)) } // 确保文本不为空 diff --git a/src/renderer/src/services/tts/TTSServiceInterface.ts b/src/renderer/src/services/tts/TTSServiceInterface.ts index 48b7c31bb3..7592acdc82 100644 --- a/src/renderer/src/services/tts/TTSServiceInterface.ts +++ b/src/renderer/src/services/tts/TTSServiceInterface.ts @@ -19,5 +19,11 @@ export interface TTSServiceInterface { * @param onError 错误回调 * @returns 返回请求ID */ - synthesizeStream?(text: string, onStart: () => void, onData: (audioChunk: AudioBuffer) => void, onEnd: () => void, onError: (error: Error) => void): Promise + synthesizeStream?( + text: string, + onStart: () => void, + onData: (audioChunk: AudioBuffer) => void, + onEnd: () => void, + onError: (error: Error) => void + ): Promise }