diff --git a/src/main/index.ts b/src/main/index.ts index 127518b648..ded41250cc 100644 --- a/src/main/index.ts +++ b/src/main/index.ts @@ -6,8 +6,8 @@ import installExtension, { REACT_DEVELOPER_TOOLS, REDUX_DEVTOOLS } from 'electro import { registerIpc } from './ipc' import { configManager } from './services/ConfigManager' -import { CHERRY_STUDIO_PROTOCOL, handleProtocolUrl, registerProtocolClient } from './services/ProtocolClient' import { registerMsTTSIpcHandlers } from './services/MsTTSIpcHandler' +import { CHERRY_STUDIO_PROTOCOL, handleProtocolUrl, registerProtocolClient } from './services/ProtocolClient' import { registerShortcuts } from './services/ShortcutService' import { TrayService } from './services/TrayService' import { windowService } from './services/WindowService' diff --git a/src/main/ipc.ts b/src/main/ipc.ts index 0f33056f60..f5ada1f94e 100644 --- a/src/main/ipc.ts +++ b/src/main/ipc.ts @@ -19,11 +19,11 @@ import FileStorage from './services/FileStorage' import { GeminiService } from './services/GeminiService' import KnowledgeService from './services/KnowledgeService' import mcpService from './services/MCPService' +import * as MsTTSService from './services/MsTTSService' import * as NutstoreService from './services/NutstoreService' import ObsidianVaultService from './services/ObsidianVaultService' import { ProxyConfig, proxyManager } from './services/ProxyManager' import { searchService } from './services/SearchService' -import * as MsTTSService from './services/MsTTSService' import { registerShortcuts, unregisterAllShortcuts } from './services/ShortcutService' import { TrayService } from './services/TrayService' import { windowService } from './services/WindowService' @@ -307,9 +307,7 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) { // 注册MsTTS IPC处理程序 ipcMain.handle(IpcChannel.MsTTS_GetVoices, MsTTSService.getVoices) - ipcMain.handle( - IpcChannel.MsTTS_Synthesize, - (_, text: string, voice: string, outputFormat: string) => - MsTTSService.synthesize(text, voice, outputFormat) + ipcMain.handle(IpcChannel.MsTTS_Synthesize, (_, text: string, voice: string, outputFormat: string) => + MsTTSService.synthesize(text, voice, outputFormat) ) } diff --git a/src/main/services/ASRServerService.ts b/src/main/services/ASRServerService.ts index b9ecd0c505..1aec4c8498 100644 --- a/src/main/services/ASRServerService.ts +++ b/src/main/services/ASRServerService.ts @@ -27,7 +27,7 @@ class ASRServerService { * 启动ASR服务器 * @returns Promise<{success: boolean, pid?: number, error?: string}> */ - private async startServer(): Promise<{success: boolean, pid?: number, error?: string}> { + private async startServer(): Promise<{ success: boolean; pid?: number; error?: string }> { try { if (this.asrServerProcess) { return { success: true, pid: this.asrServerProcess.pid } @@ -90,7 +90,7 @@ class ASRServerService { }) // 等待一段时间确保服务器启动 - await new Promise(resolve => setTimeout(resolve, 1000)) + await new Promise((resolve) => setTimeout(resolve, 1000)) return { success: true, pid: this.asrServerProcess.pid } } catch (error) { @@ -105,7 +105,10 @@ class ASRServerService { * @param pid 进程ID * @returns Promise<{success: boolean, error?: string}> */ - private async stopServer(_event: Electron.IpcMainInvokeEvent, pid?: number): Promise<{success: boolean, error?: string}> { + private async stopServer( + _event: Electron.IpcMainInvokeEvent, + pid?: number + ): Promise<{ success: boolean; error?: string }> { try { if (!this.asrServerProcess) { return { success: true } @@ -120,7 +123,7 @@ class ASRServerService { this.asrServerProcess.kill() // 等待一段时间确保进程已经退出 - await new Promise(resolve => setTimeout(resolve, 500)) + await new Promise((resolve) => setTimeout(resolve, 500)) this.asrServerProcess = null return { success: true } diff --git a/src/main/services/MsEdgeTTSService.ts b/src/main/services/MsEdgeTTSService.ts index 1dfc62d9c1..21346872ec 100644 --- a/src/main/services/MsEdgeTTSService.ts +++ b/src/main/services/MsEdgeTTSService.ts @@ -1,23 +1,24 @@ -import { EdgeTTS } from 'node-edge-tts'; -import fs from 'node:fs'; -import path from 'node:path'; -import { app } from 'electron'; -import log from 'electron-log'; +import fs from 'node:fs' +import path from 'node:path' + +import { app } from 'electron' +import log from 'electron-log' +import { EdgeTTS } from 'node-edge-tts' /** * Microsoft Edge TTS服务 * 使用Microsoft Edge的在线TTS服务,不需要API密钥 */ class MsEdgeTTSService { - private static instance: MsEdgeTTSService; - private tempDir: string; + private static instance: MsEdgeTTSService + private tempDir: string private constructor() { - this.tempDir = path.join(app.getPath('temp'), 'cherry-tts'); + this.tempDir = path.join(app.getPath('temp'), 'cherry-tts') // 确保临时目录存在 if (!fs.existsSync(this.tempDir)) { - fs.mkdirSync(this.tempDir, { recursive: true }); + fs.mkdirSync(this.tempDir, { recursive: true }) } } @@ -26,9 +27,9 @@ class MsEdgeTTSService { */ public static getInstance(): MsEdgeTTSService { if (!MsEdgeTTSService.instance) { - MsEdgeTTSService.instance = new MsEdgeTTSService(); + MsEdgeTTSService.instance = new MsEdgeTTSService() } - return MsEdgeTTSService.instance; + return MsEdgeTTSService.instance } /** @@ -46,11 +47,11 @@ class MsEdgeTTSService { { name: 'zh-CN-XiaomoNeural', locale: 'zh-CN', gender: 'Female' }, { name: 'zh-CN-XiaoxuanNeural', locale: 'zh-CN', gender: 'Female' }, { name: 'zh-CN-XiaoruiNeural', locale: 'zh-CN', gender: 'Female' }, - { name: 'zh-CN-YunfengNeural', locale: 'zh-CN', gender: 'Male' }, - ]; + { name: 'zh-CN-YunfengNeural', locale: 'zh-CN', gender: 'Male' } + ] } catch (error) { - log.error('获取Microsoft Edge TTS语音列表失败:', error); - throw error; + log.error('获取Microsoft Edge TTS语音列表失败:', error) + throw error } } @@ -63,15 +64,15 @@ class MsEdgeTTSService { */ public async synthesize(text: string, voice: string, outputFormat: string): Promise { try { - log.info(`Microsoft Edge TTS合成语音: 文本="${text.substring(0, 30)}...", 语音=${voice}, 格式=${outputFormat}`); + log.info(`Microsoft Edge TTS合成语音: 文本="${text.substring(0, 30)}...", 语音=${voice}, 格式=${outputFormat}`) // 验证输入参数 if (!text || text.trim() === '') { - throw new Error('要合成的文本不能为空'); + throw new Error('要合成的文本不能为空') } if (!voice || voice.trim() === '') { - throw new Error('语音名称不能为空'); + throw new Error('语音名称不能为空') } // 创建一个新的EdgeTTS实例,并设置参数 @@ -79,58 +80,58 @@ class MsEdgeTTSService { voice: voice, outputFormat: outputFormat, timeout: 30000, // 30秒超时 - rate: '+0%', // 正常语速 + rate: '+0%', // 正常语速 pitch: '+0Hz', // 正常音调 - volume: '+0%' // 正常音量 - }); + volume: '+0%' // 正常音量 + }) // 生成临时文件路径 - const timestamp = Date.now(); - const fileExtension = outputFormat.includes('mp3') ? 'mp3' : outputFormat.split('-').pop() || 'audio'; - const outputPath = path.join(this.tempDir, `tts_${timestamp}.${fileExtension}`); + const timestamp = Date.now() + const fileExtension = outputFormat.includes('mp3') ? 'mp3' : outputFormat.split('-').pop() || 'audio' + const outputPath = path.join(this.tempDir, `tts_${timestamp}.${fileExtension}`) - log.info(`开始生成语音文件: ${outputPath}`); + log.info(`开始生成语音文件: ${outputPath}`) // 使用ttsPromise方法生成文件 - await tts.ttsPromise(text, outputPath); + await tts.ttsPromise(text, outputPath) // 验证生成的文件是否存在且大小大于0 if (!fs.existsSync(outputPath)) { - throw new Error(`生成的语音文件不存在: ${outputPath}`); + throw new Error(`生成的语音文件不存在: ${outputPath}`) } - const stats = fs.statSync(outputPath); + const stats = fs.statSync(outputPath) if (stats.size === 0) { - throw new Error(`生成的语音文件大小为0: ${outputPath}`); + throw new Error(`生成的语音文件大小为0: ${outputPath}`) } - log.info(`Microsoft Edge TTS合成成功: ${outputPath}, 文件大小: ${stats.size} 字节`); - return outputPath; + log.info(`Microsoft Edge TTS合成成功: ${outputPath}, 文件大小: ${stats.size} 字节`) + return outputPath } catch (error: any) { // 记录详细的错误信息 - log.error(`Microsoft Edge TTS语音合成失败 (语音=${voice}):`, error); + log.error(`Microsoft Edge TTS语音合成失败 (语音=${voice}):`, error) // 尝试提供更有用的错误信息 if (error.message && typeof error.message === 'string') { if (error.message.includes('Timed out')) { - throw new Error(`语音合成超时,请检查网络连接或尝试其他语音`); + throw new Error(`语音合成超时,请检查网络连接或尝试其他语音`) } else if (error.message.includes('ENOTFOUND')) { - throw new Error(`无法连接到Microsoft语音服务,请检查网络连接`); + throw new Error(`无法连接到Microsoft语音服务,请检查网络连接`) } else if (error.message.includes('ECONNREFUSED')) { - throw new Error(`连接被拒绝,请检查网络设置或代理配置`); + throw new Error(`连接被拒绝,请检查网络设置或代理配置`) } } - throw error; + throw error } } } // 导出单例方法 export const getVoices = async () => { - return await MsEdgeTTSService.getInstance().getVoices(); -}; + return await MsEdgeTTSService.getInstance().getVoices() +} export const synthesize = async (text: string, voice: string, outputFormat: string) => { - return await MsEdgeTTSService.getInstance().synthesize(text, voice, outputFormat); -}; + return await MsEdgeTTSService.getInstance().synthesize(text, voice, outputFormat) +} diff --git a/src/main/services/MsTTSIpcHandler.ts b/src/main/services/MsTTSIpcHandler.ts index 24a31f8e23..083d4b3554 100644 --- a/src/main/services/MsTTSIpcHandler.ts +++ b/src/main/services/MsTTSIpcHandler.ts @@ -1,18 +1,17 @@ -import { IpcChannel } from '@shared/IpcChannel'; -import { ipcMain } from 'electron'; -import * as MsTTSService from './MsTTSService'; +import { IpcChannel } from '@shared/IpcChannel' +import { ipcMain } from 'electron' + +import * as MsTTSService from './MsTTSService' /** * 注册MsTTS相关的IPC处理程序 */ export function registerMsTTSIpcHandlers(): void { // 获取可用的语音列表 - ipcMain.handle(IpcChannel.MsTTS_GetVoices, MsTTSService.getVoices); - + ipcMain.handle(IpcChannel.MsTTS_GetVoices, MsTTSService.getVoices) + // 合成语音 - ipcMain.handle( - IpcChannel.MsTTS_Synthesize, - (_, text: string, voice: string, outputFormat: string) => - MsTTSService.synthesize(text, voice, outputFormat) - ); + ipcMain.handle(IpcChannel.MsTTS_Synthesize, (_, text: string, voice: string, outputFormat: string) => + MsTTSService.synthesize(text, voice, outputFormat) + ) } diff --git a/src/main/services/MsTTSService.ts b/src/main/services/MsTTSService.ts index 1d7fcf42f3..30ced63fc8 100644 --- a/src/main/services/MsTTSService.ts +++ b/src/main/services/MsTTSService.ts @@ -1,115 +1,440 @@ -import { EdgeTTS } from 'node-edge-tts'; // listVoices is no longer needed here -import fs from 'node:fs'; -import path from 'node:path'; -import { app } from 'electron'; -import log from 'electron-log'; +import fs from 'node:fs' +import path from 'node:path' + +import { app } from 'electron' +import log from 'electron-log' +import { EdgeTTS } from 'node-edge-tts' // listVoices is no longer needed here // --- START OF HARDCODED VOICE LIST --- // WARNING: This list is static and may become outdated. // It's generally recommended to use listVoices() for the most up-to-date list. const hardcodedVoices = [ - { Name: 'Microsoft Server Speech Text to Speech Voice (af-ZA, AdriNeural)', ShortName: 'af-ZA-AdriNeural', Gender: 'Female', Locale: 'af-ZA' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (am-ET, MekdesNeural)', ShortName: 'am-ET-MekdesNeural', Gender: 'Female', Locale: 'am-ET' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (ar-AE, FatimaNeural)', ShortName: 'ar-AE-FatimaNeural', Gender: 'Female', Locale: 'ar-AE' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (ar-AE, HamdanNeural)', ShortName: 'ar-AE-HamdanNeural', Gender: 'Male', Locale: 'ar-AE' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (ar-BH, AliNeural)', ShortName: 'ar-BH-AliNeural', Gender: 'Male', Locale: 'ar-BH' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (ar-BH, LailaNeural)', ShortName: 'ar-BH-LailaNeural', Gender: 'Female', Locale: 'ar-BH' }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (af-ZA, AdriNeural)', + ShortName: 'af-ZA-AdriNeural', + Gender: 'Female', + Locale: 'af-ZA' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (am-ET, MekdesNeural)', + ShortName: 'am-ET-MekdesNeural', + Gender: 'Female', + Locale: 'am-ET' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (ar-AE, FatimaNeural)', + ShortName: 'ar-AE-FatimaNeural', + Gender: 'Female', + Locale: 'ar-AE' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (ar-AE, HamdanNeural)', + ShortName: 'ar-AE-HamdanNeural', + Gender: 'Male', + Locale: 'ar-AE' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (ar-BH, AliNeural)', + ShortName: 'ar-BH-AliNeural', + Gender: 'Male', + Locale: 'ar-BH' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (ar-BH, LailaNeural)', + ShortName: 'ar-BH-LailaNeural', + Gender: 'Female', + Locale: 'ar-BH' + }, // ... (Many other Arabic locales/voices) ... - { Name: 'Microsoft Server Speech Text to Speech Voice (ar-SA, ZariyahNeural)', ShortName: 'ar-SA-ZariyahNeural', Gender: 'Female', Locale: 'ar-SA' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (az-AZ, BabekNeural)', ShortName: 'az-AZ-BabekNeural', Gender: 'Male', Locale: 'az-AZ' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (az-AZ, BanuNeural)', ShortName: 'az-AZ-BanuNeural', Gender: 'Female', Locale: 'az-AZ' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (bg-BG, BorislavNeural)', ShortName: 'bg-BG-BorislavNeural', Gender: 'Male', Locale: 'bg-BG' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (bg-BG, KalinaNeural)', ShortName: 'bg-BG-KalinaNeural', Gender: 'Female', Locale: 'bg-BG' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (bn-BD, NabanitaNeural)', ShortName: 'bn-BD-NabanitaNeural', Gender: 'Female', Locale: 'bn-BD' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (bn-BD, PradeepNeural)', ShortName: 'bn-BD-PradeepNeural', Gender: 'Male', Locale: 'bn-BD' }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (ar-SA, ZariyahNeural)', + ShortName: 'ar-SA-ZariyahNeural', + Gender: 'Female', + Locale: 'ar-SA' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (az-AZ, BabekNeural)', + ShortName: 'az-AZ-BabekNeural', + Gender: 'Male', + Locale: 'az-AZ' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (az-AZ, BanuNeural)', + ShortName: 'az-AZ-BanuNeural', + Gender: 'Female', + Locale: 'az-AZ' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (bg-BG, BorislavNeural)', + ShortName: 'bg-BG-BorislavNeural', + Gender: 'Male', + Locale: 'bg-BG' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (bg-BG, KalinaNeural)', + ShortName: 'bg-BG-KalinaNeural', + Gender: 'Female', + Locale: 'bg-BG' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (bn-BD, NabanitaNeural)', + ShortName: 'bn-BD-NabanitaNeural', + Gender: 'Female', + Locale: 'bn-BD' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (bn-BD, PradeepNeural)', + ShortName: 'bn-BD-PradeepNeural', + Gender: 'Male', + Locale: 'bn-BD' + }, // ... (Catalan, Czech, Welsh, Danish, German, Greek, English variants) ... - { Name: 'Microsoft Server Speech Text to Speech Voice (en-AU, NatashaNeural)', ShortName: 'en-AU-NatashaNeural', Gender: 'Female', Locale: 'en-AU' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-AU, WilliamNeural)', ShortName: 'en-AU-WilliamNeural', Gender: 'Male', Locale: 'en-AU' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-CA, ClaraNeural)', ShortName: 'en-CA-ClaraNeural', Gender: 'Female', Locale: 'en-CA' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-CA, LiamNeural)', ShortName: 'en-CA-LiamNeural', Gender: 'Male', Locale: 'en-CA' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, LibbyNeural)', ShortName: 'en-GB-LibbyNeural', Gender: 'Female', Locale: 'en-GB' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, MaisieNeural)', ShortName: 'en-GB-MaisieNeural', Gender: 'Female', Locale: 'en-GB' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, RyanNeural)', ShortName: 'en-GB-RyanNeural', Gender: 'Male', Locale: 'en-GB' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, SoniaNeural)', ShortName: 'en-GB-SoniaNeural', Gender: 'Female', Locale: 'en-GB' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, ThomasNeural)', ShortName: 'en-GB-ThomasNeural', Gender: 'Male', Locale: 'en-GB' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-HK, SamNeural)', ShortName: 'en-HK-SamNeural', Gender: 'Male', Locale: 'en-HK' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-HK, YanNeural)', ShortName: 'en-HK-YanNeural', Gender: 'Female', Locale: 'en-HK' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-IE, ConnorNeural)', ShortName: 'en-IE-ConnorNeural', Gender: 'Male', Locale: 'en-IE' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-IE, EmilyNeural)', ShortName: 'en-IE-EmilyNeural', Gender: 'Female', Locale: 'en-IE' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-IN, NeerjaNeural)', ShortName: 'en-IN-NeerjaNeural', Gender: 'Female', Locale: 'en-IN' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-IN, PrabhatNeural)', ShortName: 'en-IN-PrabhatNeural', Gender: 'Male', Locale: 'en-IN' }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-AU, NatashaNeural)', + ShortName: 'en-AU-NatashaNeural', + Gender: 'Female', + Locale: 'en-AU' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-AU, WilliamNeural)', + ShortName: 'en-AU-WilliamNeural', + Gender: 'Male', + Locale: 'en-AU' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-CA, ClaraNeural)', + ShortName: 'en-CA-ClaraNeural', + Gender: 'Female', + Locale: 'en-CA' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-CA, LiamNeural)', + ShortName: 'en-CA-LiamNeural', + Gender: 'Male', + Locale: 'en-CA' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, LibbyNeural)', + ShortName: 'en-GB-LibbyNeural', + Gender: 'Female', + Locale: 'en-GB' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, MaisieNeural)', + ShortName: 'en-GB-MaisieNeural', + Gender: 'Female', + Locale: 'en-GB' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, RyanNeural)', + ShortName: 'en-GB-RyanNeural', + Gender: 'Male', + Locale: 'en-GB' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, SoniaNeural)', + ShortName: 'en-GB-SoniaNeural', + Gender: 'Female', + Locale: 'en-GB' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-GB, ThomasNeural)', + ShortName: 'en-GB-ThomasNeural', + Gender: 'Male', + Locale: 'en-GB' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-HK, SamNeural)', + ShortName: 'en-HK-SamNeural', + Gender: 'Male', + Locale: 'en-HK' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-HK, YanNeural)', + ShortName: 'en-HK-YanNeural', + Gender: 'Female', + Locale: 'en-HK' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-IE, ConnorNeural)', + ShortName: 'en-IE-ConnorNeural', + Gender: 'Male', + Locale: 'en-IE' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-IE, EmilyNeural)', + ShortName: 'en-IE-EmilyNeural', + Gender: 'Female', + Locale: 'en-IE' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-IN, NeerjaNeural)', + ShortName: 'en-IN-NeerjaNeural', + Gender: 'Female', + Locale: 'en-IN' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-IN, PrabhatNeural)', + ShortName: 'en-IN-PrabhatNeural', + Gender: 'Male', + Locale: 'en-IN' + }, // ... (Many more English variants: KE, NG, NZ, PH, SG, TZ, US, ZA) ... - { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)', ShortName: 'en-US-AriaNeural', Gender: 'Female', Locale: 'en-US' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, AnaNeural)', ShortName: 'en-US-AnaNeural', Gender: 'Female', Locale: 'en-US' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, ChristopherNeural)', ShortName: 'en-US-ChristopherNeural', Gender: 'Male', Locale: 'en-US' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, EricNeural)', ShortName: 'en-US-EricNeural', Gender: 'Male', Locale: 'en-US' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, GuyNeural)', ShortName: 'en-US-GuyNeural', Gender: 'Male', Locale: 'en-US' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, JennyNeural)', ShortName: 'en-US-JennyNeural', Gender: 'Female', Locale: 'en-US' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, MichelleNeural)', ShortName: 'en-US-MichelleNeural', Gender: 'Female', Locale: 'en-US' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, RogerNeural)', ShortName: 'en-US-RogerNeural', Gender: 'Male', Locale: 'en-US' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (en-US, SteffanNeural)', ShortName: 'en-US-SteffanNeural', Gender: 'Male', Locale: 'en-US' }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)', + ShortName: 'en-US-AriaNeural', + Gender: 'Female', + Locale: 'en-US' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-US, AnaNeural)', + ShortName: 'en-US-AnaNeural', + Gender: 'Female', + Locale: 'en-US' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-US, ChristopherNeural)', + ShortName: 'en-US-ChristopherNeural', + Gender: 'Male', + Locale: 'en-US' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-US, EricNeural)', + ShortName: 'en-US-EricNeural', + Gender: 'Male', + Locale: 'en-US' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-US, GuyNeural)', + ShortName: 'en-US-GuyNeural', + Gender: 'Male', + Locale: 'en-US' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-US, JennyNeural)', + ShortName: 'en-US-JennyNeural', + Gender: 'Female', + Locale: 'en-US' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-US, MichelleNeural)', + ShortName: 'en-US-MichelleNeural', + Gender: 'Female', + Locale: 'en-US' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-US, RogerNeural)', + ShortName: 'en-US-RogerNeural', + Gender: 'Male', + Locale: 'en-US' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (en-US, SteffanNeural)', + ShortName: 'en-US-SteffanNeural', + Gender: 'Male', + Locale: 'en-US' + }, // ... (Spanish variants) ... - { Name: 'Microsoft Server Speech Text to Speech Voice (es-MX, DaliaNeural)', ShortName: 'es-MX-DaliaNeural', Gender: 'Female', Locale: 'es-MX' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (es-MX, JorgeNeural)', ShortName: 'es-MX-JorgeNeural', Gender: 'Male', Locale: 'es-MX' }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (es-MX, DaliaNeural)', + ShortName: 'es-MX-DaliaNeural', + Gender: 'Female', + Locale: 'es-MX' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (es-MX, JorgeNeural)', + ShortName: 'es-MX-JorgeNeural', + Gender: 'Male', + Locale: 'es-MX' + }, // ... (Estonian, Basque, Persian, Finnish, Filipino, French, Irish, Galician, Gujarati, Hebrew, Hindi, Croatian, Hungarian, Indonesian, Icelandic, Italian, Japanese) ... - { Name: 'Microsoft Server Speech Text to Speech Voice (ja-JP, KeitaNeural)', ShortName: 'ja-JP-KeitaNeural', Gender: 'Male', Locale: 'ja-JP' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (ja-JP, NanamiNeural)', ShortName: 'ja-JP-NanamiNeural', Gender: 'Female', Locale: 'ja-JP' }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (ja-JP, KeitaNeural)', + ShortName: 'ja-JP-KeitaNeural', + Gender: 'Male', + Locale: 'ja-JP' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (ja-JP, NanamiNeural)', + ShortName: 'ja-JP-NanamiNeural', + Gender: 'Female', + Locale: 'ja-JP' + }, // ... (Javanese, Georgian, Kazakh, Khmer, Kannada, Korean) ... - { Name: 'Microsoft Server Speech Text to Speech Voice (ko-KR, InJoonNeural)', ShortName: 'ko-KR-InJoonNeural', Gender: 'Male', Locale: 'ko-KR' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (ko-KR, SunHiNeural)', ShortName: 'ko-KR-SunHiNeural', Gender: 'Female', Locale: 'ko-KR' }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (ko-KR, InJoonNeural)', + ShortName: 'ko-KR-InJoonNeural', + Gender: 'Male', + Locale: 'ko-KR' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (ko-KR, SunHiNeural)', + ShortName: 'ko-KR-SunHiNeural', + Gender: 'Female', + Locale: 'ko-KR' + }, // ... (Lao, Lithuanian, Latvian, Macedonian, Malayalam, Mongolian, Marathi, Malay, Maltese, Burmese, Norwegian, Dutch, Polish, Pashto, Portuguese) ... - { Name: 'Microsoft Server Speech Text to Speech Voice (pt-BR, AntonioNeural)', ShortName: 'pt-BR-AntonioNeural', Gender: 'Male', Locale: 'pt-BR' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (pt-BR, FranciscaNeural)', ShortName: 'pt-BR-FranciscaNeural', Gender: 'Female', Locale: 'pt-BR' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (pt-PT, DuarteNeural)', ShortName: 'pt-PT-DuarteNeural', Gender: 'Male', Locale: 'pt-PT' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (pt-PT, RaquelNeural)', ShortName: 'pt-PT-RaquelNeural', Gender: 'Female', Locale: 'pt-PT' }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (pt-BR, AntonioNeural)', + ShortName: 'pt-BR-AntonioNeural', + Gender: 'Male', + Locale: 'pt-BR' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (pt-BR, FranciscaNeural)', + ShortName: 'pt-BR-FranciscaNeural', + Gender: 'Female', + Locale: 'pt-BR' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (pt-PT, DuarteNeural)', + ShortName: 'pt-PT-DuarteNeural', + Gender: 'Male', + Locale: 'pt-PT' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (pt-PT, RaquelNeural)', + ShortName: 'pt-PT-RaquelNeural', + Gender: 'Female', + Locale: 'pt-PT' + }, // ... (Romanian, Russian, Sinhala, Slovak, Slovenian, Somali, Albanian, Serbian, Sundanese, Swedish, Swahili, Tamil, Telugu, Thai) ... - { Name: 'Microsoft Server Speech Text to Speech Voice (th-TH, NiwatNeural)', ShortName: 'th-TH-NiwatNeural', Gender: 'Male', Locale: 'th-TH' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (th-TH, PremwadeeNeural)', ShortName: 'th-TH-PremwadeeNeural', Gender: 'Female', Locale: 'th-TH' }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (th-TH, NiwatNeural)', + ShortName: 'th-TH-NiwatNeural', + Gender: 'Male', + Locale: 'th-TH' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (th-TH, PremwadeeNeural)', + ShortName: 'th-TH-PremwadeeNeural', + Gender: 'Female', + Locale: 'th-TH' + }, // ... (Turkish, Ukrainian, Urdu, Uzbek, Vietnamese) ... - { Name: 'Microsoft Server Speech Text to Speech Voice (vi-VN, HoaiMyNeural)', ShortName: 'vi-VN-HoaiMyNeural', Gender: 'Female', Locale: 'vi-VN' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (vi-VN, NamMinhNeural)', ShortName: 'vi-VN-NamMinhNeural', Gender: 'Male', Locale: 'vi-VN' }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (vi-VN, HoaiMyNeural)', + ShortName: 'vi-VN-HoaiMyNeural', + Gender: 'Female', + Locale: 'vi-VN' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (vi-VN, NamMinhNeural)', + ShortName: 'vi-VN-NamMinhNeural', + Gender: 'Male', + Locale: 'vi-VN' + }, // ... (Chinese variants) ... - { Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)', ShortName: 'zh-CN-XiaoxiaoNeural', Gender: 'Female', Locale: 'zh-CN' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunxiNeural)', ShortName: 'zh-CN-YunxiNeural', Gender: 'Male', Locale: 'zh-CN' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunjianNeural)', ShortName: 'zh-CN-YunjianNeural', Gender: 'Male', Locale: 'zh-CN' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunxiaNeural)', ShortName: 'zh-CN-YunxiaNeural', Gender: 'Male', Locale: 'zh-CN' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunyangNeural)', ShortName: 'zh-CN-YunyangNeural', Gender: 'Male', Locale: 'zh-CN' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN-liaoning, XiaobeiNeural)', ShortName: 'zh-CN-liaoning-XiaobeiNeural', Gender: 'Female', Locale: 'zh-CN-liaoning' }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)', + ShortName: 'zh-CN-XiaoxiaoNeural', + Gender: 'Female', + Locale: 'zh-CN' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunxiNeural)', + ShortName: 'zh-CN-YunxiNeural', + Gender: 'Male', + Locale: 'zh-CN' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunjianNeural)', + ShortName: 'zh-CN-YunjianNeural', + Gender: 'Male', + Locale: 'zh-CN' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunxiaNeural)', + ShortName: 'zh-CN-YunxiaNeural', + Gender: 'Male', + Locale: 'zh-CN' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN, YunyangNeural)', + ShortName: 'zh-CN-YunyangNeural', + Gender: 'Male', + Locale: 'zh-CN' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN-liaoning, XiaobeiNeural)', + ShortName: 'zh-CN-liaoning-XiaobeiNeural', + Gender: 'Female', + Locale: 'zh-CN-liaoning' + }, // { Name: 'Microsoft Server Speech Text to Speech Voice (zh-CN-shaanxi, XiaoniNeural)', ShortName: 'zh-CN-shaanxi-XiaoniNeural', Gender: 'Female', Locale: 'zh-CN-shaanxi' }, // Example regional voice - { Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, HiuGaaiNeural)', ShortName: 'zh-HK-HiuGaaiNeural', Gender: 'Female', Locale: 'zh-HK' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, HiuMaanNeural)', ShortName: 'zh-HK-HiuMaanNeural', Gender: 'Female', Locale: 'zh-HK' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, WanLungNeural)', ShortName: 'zh-HK-WanLungNeural', Gender: 'Male', Locale: 'zh-HK' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, HsiaoChenNeural)', ShortName: 'zh-TW-HsiaoChenNeural', Gender: 'Female', Locale: 'zh-TW' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, HsiaoYuNeural)', ShortName: 'zh-TW-HsiaoYuNeural', Gender: 'Female', Locale: 'zh-TW' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, YunJheNeural)', ShortName: 'zh-TW-YunJheNeural', Gender: 'Male', Locale: 'zh-TW' }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, HiuGaaiNeural)', + ShortName: 'zh-HK-HiuGaaiNeural', + Gender: 'Female', + Locale: 'zh-HK' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, HiuMaanNeural)', + ShortName: 'zh-HK-HiuMaanNeural', + Gender: 'Female', + Locale: 'zh-HK' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (zh-HK, WanLungNeural)', + ShortName: 'zh-HK-WanLungNeural', + Gender: 'Male', + Locale: 'zh-HK' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, HsiaoChenNeural)', + ShortName: 'zh-TW-HsiaoChenNeural', + Gender: 'Female', + Locale: 'zh-TW' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, HsiaoYuNeural)', + ShortName: 'zh-TW-HsiaoYuNeural', + Gender: 'Female', + Locale: 'zh-TW' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (zh-TW, YunJheNeural)', + ShortName: 'zh-TW-YunJheNeural', + Gender: 'Male', + Locale: 'zh-TW' + }, // ... (Zulu) ... - { Name: 'Microsoft Server Speech Text to Speech Voice (zu-ZA, ThandoNeural)', ShortName: 'zu-ZA-ThandoNeural', Gender: 'Female', Locale: 'zu-ZA' }, - { Name: 'Microsoft Server Speech Text to Speech Voice (zu-ZA, ThembaNeural)', ShortName: 'zu-ZA-ThembaNeural', Gender: 'Male', Locale: 'zu-ZA' }, -]; + { + Name: 'Microsoft Server Speech Text to Speech Voice (zu-ZA, ThandoNeural)', + ShortName: 'zu-ZA-ThandoNeural', + Gender: 'Female', + Locale: 'zu-ZA' + }, + { + Name: 'Microsoft Server Speech Text to Speech Voice (zu-ZA, ThembaNeural)', + ShortName: 'zu-ZA-ThembaNeural', + Gender: 'Male', + Locale: 'zu-ZA' + } +] // --- END OF HARDCODED VOICE LIST --- - /** * 免费在线TTS服务 * 使用免费的在线TTS服务,不需要API密钥 */ class MsTTSService { - private static instance: MsTTSService; - private tempDir: string; + private static instance: MsTTSService + private tempDir: string private constructor() { - this.tempDir = path.join(app.getPath('temp'), 'cherry-tts'); + this.tempDir = path.join(app.getPath('temp'), 'cherry-tts') if (!fs.existsSync(this.tempDir)) { - fs.mkdirSync(this.tempDir, { recursive: true }); + fs.mkdirSync(this.tempDir, { recursive: true }) } - log.info('初始化免费在线TTS服务 (使用硬编码语音列表)'); + log.info('初始化免费在线TTS服务 (使用硬编码语音列表)') } public static getInstance(): MsTTSService { if (!MsTTSService.instance) { - MsTTSService.instance = new MsTTSService(); + MsTTSService.instance = new MsTTSService() } - return MsTTSService.instance; + return MsTTSService.instance } /** @@ -118,15 +443,15 @@ class MsTTSService { */ public async getVoices(): Promise { try { - log.info(`返回硬编码的 ${hardcodedVoices.length} 个语音列表`); + log.info(`返回硬编码的 ${hardcodedVoices.length} 个语音列表`) // 直接返回硬编码的列表 // 注意:保持 async 是为了接口兼容性,虽然这里没有实际的异步操作 - return hardcodedVoices; + return hardcodedVoices } catch (error) { - // 这个 try/catch 在这里意义不大了,因为返回静态数据不会出错 - // 但保留结构以防未来改动 - log.error('获取硬编码语音列表时出错 (理论上不应发生):', error); - return []; // 返回空列表以防万一 + // 这个 try/catch 在这里意义不大了,因为返回静态数据不会出错 + // 但保留结构以防未来改动 + log.error('获取硬编码语音列表时出错 (理论上不应发生):', error) + return [] // 返回空列表以防万一 } } @@ -140,15 +465,15 @@ class MsTTSService { public async synthesize(text: string, voice: string, outputFormat: string): Promise { try { // 记录详细的请求信息 - log.info(`微软在线TTS合成语音: 文本="${text.substring(0, 30)}...", 语音=${voice}, 格式=${outputFormat}`); + log.info(`微软在线TTS合成语音: 文本="${text.substring(0, 30)}...", 语音=${voice}, 格式=${outputFormat}`) // 验证输入参数 if (!text || text.trim() === '') { - throw new Error('要合成的文本不能为空'); + throw new Error('要合成的文本不能为空') } if (!voice || voice.trim() === '') { - throw new Error('语音名称不能为空'); + throw new Error('语音名称不能为空') } // 创建一个新的EdgeTTS实例,并设置参数 @@ -157,80 +482,80 @@ class MsTTSService { voice: voice, outputFormat: outputFormat, timeout: 30000, // 30秒超时 - rate: '+0%', // 正常语速 + rate: '+0%', // 正常语速 pitch: '+0Hz', // 正常音调 - volume: '+0%' // 正常音量 - }); + volume: '+0%' // 正常音量 + }) // 生成临时文件路径 - const timestamp = Date.now(); - const fileExtension = outputFormat.includes('mp3') ? 'mp3' : outputFormat.split('-').pop() || 'audio'; - const outputPath = path.join(this.tempDir, `tts_${timestamp}.${fileExtension}`); + const timestamp = Date.now() + const fileExtension = outputFormat.includes('mp3') ? 'mp3' : outputFormat.split('-').pop() || 'audio' + const outputPath = path.join(this.tempDir, `tts_${timestamp}.${fileExtension}`) - log.info(`开始生成语音文件: ${outputPath}`); + log.info(`开始生成语音文件: ${outputPath}`) // 使用ttsPromise方法生成文件 - await tts.ttsPromise(text, outputPath); + await tts.ttsPromise(text, outputPath) // 验证生成的文件是否存在且大小大于0 if (!fs.existsSync(outputPath)) { - throw new Error(`生成的语音文件不存在: ${outputPath}`); + throw new Error(`生成的语音文件不存在: ${outputPath}`) } - const stats = fs.statSync(outputPath); + const stats = fs.statSync(outputPath) if (stats.size === 0) { - throw new Error(`生成的语音文件大小为0: ${outputPath}`); + throw new Error(`生成的语音文件大小为0: ${outputPath}`) } - log.info(`微软在线TTS合成成功: ${outputPath}, 文件大小: ${stats.size} 字节`); - return outputPath; + log.info(`微软在线TTS合成成功: ${outputPath}, 文件大小: ${stats.size} 字节`) + return outputPath } catch (error: any) { // 记录详细的错误信息 - log.error(`微软在线TTS语音合成失败 (语音=${voice}):`, error); + log.error(`微软在线TTS语音合成失败 (语音=${voice}):`, error) // 尝试提供更有用的错误信息 if (error.message && typeof error.message === 'string') { if (error.message.includes('Timed out')) { - throw new Error(`语音合成超时,请检查网络连接或尝试其他语音`); + throw new Error(`语音合成超时,请检查网络连接或尝试其他语音`) } else if (error.message.includes('ENOTFOUND')) { - throw new Error(`无法连接到微软语音服务,请检查网络连接`); + throw new Error(`无法连接到微软语音服务,请检查网络连接`) } else if (error.message.includes('ECONNREFUSED')) { - throw new Error(`连接被拒绝,请检查网络设置或代理配置`); + throw new Error(`连接被拒绝,请检查网络设置或代理配置`) } } - throw error; + throw error } } - /** + /** * (可选) 清理临时文件目录 */ public async cleanupTempDir(): Promise { // (Cleanup method remains the same) - try { - const files = await fs.promises.readdir(this.tempDir); + try { + const files = await fs.promises.readdir(this.tempDir) for (const file of files) { if (file.startsWith('tts_')) { - await fs.promises.unlink(path.join(this.tempDir, file)); + await fs.promises.unlink(path.join(this.tempDir, file)) } } - log.info('TTS 临时文件已清理'); + log.info('TTS 临时文件已清理') } catch (error) { - log.error('清理 TTS 临时文件失败:', error); + log.error('清理 TTS 临时文件失败:', error) } } } // 导出单例方法 (保持不变) export const getVoices = async () => { - return await MsTTSService.getInstance().getVoices(); -}; + return await MsTTSService.getInstance().getVoices() +} export const synthesize = async (text: string, voice: string, outputFormat: string) => { - return await MsTTSService.getInstance().synthesize(text, voice, outputFormat); -}; + return await MsTTSService.getInstance().synthesize(text, voice, outputFormat) +} export const cleanupTtsTempFiles = async () => { - await MsTTSService.getInstance().cleanupTempDir(); -}; \ No newline at end of file + await MsTTSService.getInstance().cleanupTempDir() +} diff --git a/src/renderer/src/components/VoiceCallButton.tsx b/src/renderer/src/components/VoiceCallButton.tsx index 00d784b4ed..9f95c569ae 100644 --- a/src/renderer/src/components/VoiceCallButton.tsx +++ b/src/renderer/src/components/VoiceCallButton.tsx @@ -1,35 +1,36 @@ -import React, { useState } from 'react'; -import { Button, Tooltip } from 'antd'; -import { PhoneOutlined, LoadingOutlined } from '@ant-design/icons'; -import { useTranslation } from 'react-i18next'; -import VoiceCallModal from './VoiceCallModal'; -import { VoiceCallService } from '../services/VoiceCallService'; +import { LoadingOutlined, PhoneOutlined } from '@ant-design/icons' +import { Button, Tooltip } from 'antd' +import React, { useState } from 'react' +import { useTranslation } from 'react-i18next' + +import { VoiceCallService } from '../services/VoiceCallService' +import VoiceCallModal from './VoiceCallModal' interface Props { - disabled?: boolean; - style?: React.CSSProperties; + disabled?: boolean + style?: React.CSSProperties } const VoiceCallButton: React.FC = ({ disabled = false, style }) => { - const { t } = useTranslation(); - const [isModalVisible, setIsModalVisible] = useState(false); - const [isLoading, setIsLoading] = useState(false); + const { t } = useTranslation() + const [isModalVisible, setIsModalVisible] = useState(false) + const [isLoading, setIsLoading] = useState(false) const handleClick = async () => { - if (disabled || isLoading) return; - - setIsLoading(true); + if (disabled || isLoading) return + + setIsLoading(true) try { // 初始化语音服务 - await VoiceCallService.initialize(); - setIsModalVisible(true); + await VoiceCallService.initialize() + setIsModalVisible(true) } catch (error) { - console.error('Failed to initialize voice call:', error); - window.message.error(t('voice_call.initialization_failed')); + console.error('Failed to initialize voice call:', error) + window.message.error(t('voice_call.initialization_failed')) } finally { - setIsLoading(false); + setIsLoading(false) } - }; + } return ( <> @@ -42,14 +43,9 @@ const VoiceCallButton: React.FC = ({ disabled = false, style }) => { style={style} /> - {isModalVisible && ( - setIsModalVisible(false)} - /> - )} + {isModalVisible && setIsModalVisible(false)} />} - ); -}; + ) +} -export default VoiceCallButton; +export default VoiceCallButton diff --git a/src/renderer/src/components/VoiceCallModal.tsx b/src/renderer/src/components/VoiceCallModal.tsx index dbceac6062..55368cc0b9 100644 --- a/src/renderer/src/components/VoiceCallModal.tsx +++ b/src/renderer/src/components/VoiceCallModal.tsx @@ -1,5 +1,3 @@ -import React, { useEffect, useState } from 'react'; -import { Modal, Button, Space, Tooltip } from 'antd'; import { AudioMutedOutlined, AudioOutlined, @@ -7,125 +5,128 @@ import { PauseCircleOutlined, PlayCircleOutlined, SoundOutlined -} from '@ant-design/icons'; -import styled from 'styled-components'; -import { useTranslation } from 'react-i18next'; -import VoiceVisualizer from './VoiceVisualizer'; -import { VoiceCallService } from '../services/VoiceCallService'; +} from '@ant-design/icons' +import { Button, Modal, Space, Tooltip } from 'antd' +import React, { useCallback, useEffect, useState } from 'react' +import { useTranslation } from 'react-i18next' +import styled from 'styled-components' + +import { VoiceCallService } from '../services/VoiceCallService' +import VoiceVisualizer from './VoiceVisualizer' interface Props { - visible: boolean; - onClose: () => void; + visible: boolean + onClose: () => void } const VoiceCallModal: React.FC = ({ visible, onClose }) => { - const { t } = useTranslation(); - const [isMuted, setIsMuted] = useState(false); - const [isPaused, setIsPaused] = useState(false); - const [transcript, setTranscript] = useState(''); - const [response, setResponse] = useState(''); - const [isListening, setIsListening] = useState(false); - const [isSpeaking, setIsSpeaking] = useState(false); - const [isRecording, setIsRecording] = useState(false); - const [isProcessing, setIsProcessing] = useState(false); + const { t } = useTranslation() + const [isMuted, setIsMuted] = useState(false) + const [isPaused, setIsPaused] = useState(false) + const [transcript, setTranscript] = useState('') + const [response, setResponse] = useState('') + const [isListening, setIsListening] = useState(false) + const [isSpeaking, setIsSpeaking] = useState(false) + const [isRecording, setIsRecording] = useState(false) + const [isProcessing, setIsProcessing] = useState(false) + + const handleClose = useCallback(() => { + VoiceCallService.endCall() + onClose() + }, [onClose]) useEffect(() => { const startVoiceCall = async () => { try { await VoiceCallService.startCall({ - onTranscript: (text) => setTranscript(text), - onResponse: (text) => setResponse(text), + onTranscript: (text: string) => setTranscript(text), + onResponse: (text: string) => setResponse(text), onListeningStateChange: setIsListening, - onSpeakingStateChange: setIsSpeaking, - }); + onSpeakingStateChange: setIsSpeaking + }) } catch (error) { - console.error('Voice call error:', error); - window.message.error(t('voice_call.error')); - handleClose(); + console.error('Voice call error:', error) + window.message.error(t('voice_call.error')) + handleClose() } - }; + } if (visible) { - startVoiceCall(); + startVoiceCall() } return () => { - VoiceCallService.endCall(); - }; - }, [visible, t]); - - const handleClose = () => { - VoiceCallService.endCall(); - onClose(); - }; + VoiceCallService.endCall() + } + }, [visible, t, handleClose]) const toggleMute = () => { - const newMuteState = !isMuted; - setIsMuted(newMuteState); - VoiceCallService.setMuted(newMuteState); - }; + const newMuteState = !isMuted + setIsMuted(newMuteState) + VoiceCallService.setMuted(newMuteState) + } const togglePause = () => { - const newPauseState = !isPaused; - setIsPaused(newPauseState); - VoiceCallService.setPaused(newPauseState); - }; + const newPauseState = !isPaused + setIsPaused(newPauseState) + VoiceCallService.setPaused(newPauseState) + } // 长按说话相关处理 const handleRecordStart = async (e: React.MouseEvent | React.TouchEvent) => { - e.preventDefault(); // 防止触摸事件的默认行为 + e.preventDefault() // 防止触摸事件的默认行为 - if (isProcessing || isPaused) return; + if (isProcessing || isPaused) return - setIsRecording(true); - await VoiceCallService.startRecording(); - }; + setIsRecording(true) + await VoiceCallService.startRecording() + } const handleRecordEnd = async (e: React.MouseEvent | React.TouchEvent) => { - e.preventDefault(); // 防止触摸事件的默认行为 + e.preventDefault() // 防止触摸事件的默认行为 - if (!isRecording) return; + if (!isRecording) return // 立即更新UI状态 - setIsRecording(false); - setIsProcessing(true); + setIsRecording(false) + setIsProcessing(true) // 确保录音完全停止 try { - await VoiceCallService.stopRecording(); - console.log('录音已停止'); + await VoiceCallService.stopRecording() + console.log('录音已停止') } catch (error) { - console.error('停止录音出错:', error); + console.error('停止录音出错:', error) } // 处理结果会通过回调函数返回,不需要在这里处理 setTimeout(() => { - setIsProcessing(false); - }, 500); // 添加短暂延迟,防止用户立即再次点击 - }; + setIsProcessing(false) + }, 500) // 添加短暂延迟,防止用户立即再次点击 + } // 处理鼠标/触摸离开按钮的情况 const handleRecordCancel = async (e: React.MouseEvent | React.TouchEvent) => { - e.preventDefault(); + e.preventDefault() if (isRecording) { // 立即更新UI状态 - setIsRecording(false); - setIsProcessing(true); + setIsRecording(false) + setIsProcessing(true) // 取消录音,不发送给AI try { - await VoiceCallService.cancelRecording(); - console.log('录音已取消'); + await VoiceCallService.cancelRecording() + console.log('录音已取消') } catch (error) { - console.error('取消录音出错:', error); + console.error('取消录音出错:', error) } setTimeout(() => { - setIsProcessing(false); - }, 500); + setIsProcessing(false) + }, 500) } - }; + } return ( = ({ visible, onClose }) => { footer={null} width={500} centered - maskClosable={false} - > + maskClosable={false}> @@ -174,7 +174,7 @@ const VoiceCallModal: React.FC = ({ visible, onClose }) => { /> } onMouseDown={handleRecordStart} onMouseUp={handleRecordEnd} @@ -183,8 +183,7 @@ const VoiceCallModal: React.FC = ({ visible, onClose }) => { onTouchEnd={handleRecordEnd} onTouchCancel={handleRecordCancel} size="large" - disabled={isProcessing || isPaused} - > + disabled={isProcessing || isPaused}> {isRecording ? t('voice_call.release_to_send') : t('voice_call.press_to_talk')} @@ -200,21 +199,21 @@ const VoiceCallModal: React.FC = ({ visible, onClose }) => { - ); -}; + ) +} const Container = styled.div` display: flex; flex-direction: column; gap: 20px; height: 400px; -`; +` const VisualizerContainer = styled.div` display: flex; justify-content: space-between; height: 100px; -`; +` const TranscriptContainer = styled.div` flex: 1; @@ -223,33 +222,33 @@ const TranscriptContainer = styled.div` border-radius: 8px; padding: 16px; background-color: var(--color-background-2); -`; +` const TranscriptText = styled.p` margin-bottom: 8px; color: var(--color-text-1); -`; +` const ResponseText = styled.p` margin-bottom: 8px; color: var(--color-primary); -`; +` const UserLabel = styled.span` font-weight: bold; color: var(--color-text-1); -`; +` const AILabel = styled.span` font-weight: bold; color: var(--color-primary); -`; +` const ControlsContainer = styled.div` display: flex; justify-content: center; padding: 10px 0; -`; +` const RecordButton = styled(Button)` min-width: 150px; @@ -258,6 +257,6 @@ const RecordButton = styled(Button)` &:active { transform: scale(0.95); } -`; +` -export default VoiceCallModal; +export default VoiceCallModal diff --git a/src/renderer/src/components/VoiceVisualizer.tsx b/src/renderer/src/components/VoiceVisualizer.tsx index 31d5662c92..8b6a018e9c 100644 --- a/src/renderer/src/components/VoiceVisualizer.tsx +++ b/src/renderer/src/components/VoiceVisualizer.tsx @@ -1,74 +1,74 @@ -import React, { useEffect, useRef } from 'react'; -import styled from 'styled-components'; -import { useTranslation } from 'react-i18next'; +import React, { useEffect, useRef } from 'react' +import { useTranslation } from 'react-i18next' +import styled from 'styled-components' interface Props { - isActive: boolean; - type: 'input' | 'output'; + isActive: boolean + type: 'input' | 'output' } const VoiceVisualizer: React.FC = ({ isActive, type }) => { - const { t } = useTranslation(); - const canvasRef = useRef(null); - const animationRef = useRef(undefined); + const { t } = useTranslation() + const canvasRef = useRef(null) + const animationRef = useRef(undefined) useEffect(() => { - const canvas = canvasRef.current; - if (!canvas) return; + const canvas = canvasRef.current + if (!canvas) return - const ctx = canvas.getContext('2d'); - if (!ctx) return; + const ctx = canvas.getContext('2d') + if (!ctx) return - const width = canvas.width; - const height = canvas.height; + const width = canvas.width + const height = canvas.height const drawVisualizer = () => { - ctx.clearRect(0, 0, width, height); + ctx.clearRect(0, 0, width, height) if (!isActive) { // 绘制静态波形 - ctx.beginPath(); - ctx.moveTo(0, height / 2); - ctx.lineTo(width, height / 2); - ctx.strokeStyle = type === 'input' ? 'var(--color-text-2)' : 'var(--color-primary)'; - ctx.lineWidth = 2; - ctx.stroke(); - return; + ctx.beginPath() + ctx.moveTo(0, height / 2) + ctx.lineTo(width, height / 2) + ctx.strokeStyle = type === 'input' ? 'var(--color-text-2)' : 'var(--color-primary)' + ctx.lineWidth = 2 + ctx.stroke() + return } // 绘制动态波形 - const barCount = 30; - const barWidth = width / barCount; - const color = type === 'input' ? 'var(--color-text-1)' : 'var(--color-primary)'; + const barCount = 30 + const barWidth = width / barCount + const color = type === 'input' ? 'var(--color-text-1)' : 'var(--color-primary)' for (let i = 0; i < barCount; i++) { - const barHeight = Math.random() * (height / 2) + 10; - const x = i * barWidth; - const y = height / 2 - barHeight / 2; + const barHeight = Math.random() * (height / 2) + 10 + const x = i * barWidth + const y = height / 2 - barHeight / 2 - ctx.fillStyle = color; - ctx.fillRect(x, y, barWidth - 2, barHeight); + ctx.fillStyle = color + ctx.fillRect(x, y, barWidth - 2, barHeight) } - animationRef.current = requestAnimationFrame(drawVisualizer); - }; + animationRef.current = requestAnimationFrame(drawVisualizer) + } - drawVisualizer(); + drawVisualizer() return () => { if (animationRef.current) { - cancelAnimationFrame(animationRef.current); + cancelAnimationFrame(animationRef.current) } - }; - }, [isActive, type]); + } + }, [isActive, type]) return ( - ); -}; + ) +} const Container = styled.div<{ $type: 'input' | 'output' }>` display: flex; @@ -77,21 +77,17 @@ const Container = styled.div<{ $type: 'input' | 'output' }>` width: 45%; border-radius: 8px; padding: 10px; - background-color: ${props => - props.$type === 'input' - ? 'var(--color-background-3)' - : 'var(--color-primary-bg)' - }; -`; + background-color: ${(props) => (props.$type === 'input' ? 'var(--color-background-3)' : 'var(--color-primary-bg)')}; +` const Label = styled.div` margin-bottom: 8px; font-weight: bold; -`; +` const Canvas = styled.canvas` width: 100%; height: 50px; -`; +` -export default VoiceVisualizer; +export default VoiceVisualizer diff --git a/src/renderer/src/i18n/locales/ja-jp.json b/src/renderer/src/i18n/locales/ja-jp.json index 16f380f5ab..82dd26e79a 100644 --- a/src/renderer/src/i18n/locales/ja-jp.json +++ b/src/renderer/src/i18n/locales/ja-jp.json @@ -1507,6 +1507,21 @@ "quit": "終了", "show_window": "ウィンドウを表示", "visualization": "可視化" + }, + "voice_call": { + "title": "[to be translated]:语音通话", + "start": "[to be translated]:开始语音通话", + "end": "[to be translated]:结束通话", + "mute": "[to be translated]:静音", + "unmute": "[to be translated]:取消静音", + "pause": "[to be translated]:暂停", + "resume": "[to be translated]:继续", + "you": "[to be translated]:您", + "ai": "[to be translated]:AI", + "press_to_talk": "[to be translated]:长按说话", + "release_to_send": "[to be translated]:松开发送", + "initialization_failed": "[to be translated]:初始化语音通话失败", + "error": "[to be translated]:语音通话出错" } } } \ No newline at end of file diff --git a/src/renderer/src/i18n/locales/ru-ru.json b/src/renderer/src/i18n/locales/ru-ru.json index 113b8a1864..b2ebfa5df2 100644 --- a/src/renderer/src/i18n/locales/ru-ru.json +++ b/src/renderer/src/i18n/locales/ru-ru.json @@ -1507,6 +1507,21 @@ "quit": "Выйти", "show_window": "Показать окно", "visualization": "Визуализация" + }, + "voice_call": { + "title": "[to be translated]:语音通话", + "start": "[to be translated]:开始语音通话", + "end": "[to be translated]:结束通话", + "mute": "[to be translated]:静音", + "unmute": "[to be translated]:取消静音", + "pause": "[to be translated]:暂停", + "resume": "[to be translated]:继续", + "you": "[to be translated]:您", + "ai": "[to be translated]:AI", + "press_to_talk": "[to be translated]:长按说话", + "release_to_send": "[to be translated]:松开发送", + "initialization_failed": "[to be translated]:初始化语音通话失败", + "error": "[to be translated]:语音通话出错" } } } \ No newline at end of file diff --git a/src/renderer/src/i18n/locales/zh-tw.json b/src/renderer/src/i18n/locales/zh-tw.json index 78808393a7..b2efedb48e 100644 --- a/src/renderer/src/i18n/locales/zh-tw.json +++ b/src/renderer/src/i18n/locales/zh-tw.json @@ -1507,6 +1507,21 @@ "quit": "結束", "show_window": "顯示視窗", "visualization": "視覺化" + }, + "voice_call": { + "title": "[to be translated]:语音通话", + "start": "[to be translated]:开始语音通话", + "end": "[to be translated]:结束通话", + "mute": "[to be translated]:静音", + "unmute": "[to be translated]:取消静音", + "pause": "[to be translated]:暂停", + "resume": "[to be translated]:继续", + "you": "[to be translated]:您", + "ai": "[to be translated]:AI", + "press_to_talk": "[to be translated]:长按说话", + "release_to_send": "[to be translated]:松开发送", + "initialization_failed": "[to be translated]:初始化语音通话失败", + "error": "[to be translated]:语音通话出错" } } } \ No newline at end of file diff --git a/src/renderer/src/pages/home/Messages/MessageMenubar.tsx b/src/renderer/src/pages/home/Messages/MessageMenubar.tsx index 006a1d495a..f778a4adb1 100644 --- a/src/renderer/src/pages/home/Messages/MessageMenubar.tsx +++ b/src/renderer/src/pages/home/Messages/MessageMenubar.tsx @@ -407,10 +407,12 @@ const MessageMenubar: FC = (props) => { )} {isAssistantMessage && ttsEnabled && ( - { - console.log('点击MessageMenubar中的TTS按钮,开始播放消息') - TTSService.speakFromMessage(message) - }}> + { + console.log('点击MessageMenubar中的TTS按钮,开始播放消息') + TTSService.speakFromMessage(message) + }}> diff --git a/src/renderer/src/pages/settings/TTSSettings/TTSSettings.tsx b/src/renderer/src/pages/settings/TTSSettings/TTSSettings.tsx index 4fea657369..dbc7f9c77b 100644 --- a/src/renderer/src/pages/settings/TTSSettings/TTSSettings.tsx +++ b/src/renderer/src/pages/settings/TTSSettings/TTSSettings.tsx @@ -14,19 +14,19 @@ import { setTtsEnabled, setTtsFilterOptions, setTtsModel, + setTtsMsOutputFormat, + setTtsMsVoice, setTtsServiceType, - setTtsVoice, setTtsSiliconflowApiKey, setTtsSiliconflowApiUrl, - setTtsSiliconflowVoice, setTtsSiliconflowModel, setTtsSiliconflowResponseFormat, setTtsSiliconflowSpeed, - setTtsMsVoice, - setTtsMsOutputFormat + setTtsSiliconflowVoice, + setTtsVoice } from '@renderer/store/settings' import { Button, Form, Input, InputNumber, message, Select, Space, Switch, Tabs, Tag } from 'antd' -import { FC, useEffect, useState, useCallback } from 'react' +import { FC, useCallback, useEffect, useState } from 'react' import { useTranslation } from 'react-i18next' import { useSelector } from 'react-redux' import styled from 'styled-components' @@ -176,32 +176,30 @@ const TTSSettings: FC = () => { // 免费在线TTS可用的语音列表 const [msTtsVoices, setMsTtsVoices] = useState<{ label: string; value: string }[]>([]) - - // 获取免费在线TTS可用的语音列表 const getMsTtsVoices = useCallback(async () => { try { // 调用API获取免费在线TTS语音列表 - const response = await window.api.msTTS.getVoices(); - console.log('获取到的免费在线TTS语音列表:', response); + const response = await window.api.msTTS.getVoices() + console.log('获取到的免费在线TTS语音列表:', response) // 转换为选项格式 const voices = response.map((voice: any) => ({ label: `${voice.ShortName} (${voice.Gender === 'Female' ? '女声' : '男声'})`, value: voice.ShortName - })); + })) // 按语言和性别排序 voices.sort((a: any, b: any) => { - const localeA = a.value.split('-')[0] + a.value.split('-')[1]; - const localeB = b.value.split('-')[0] + b.value.split('-')[1]; - if (localeA !== localeB) return localeA.localeCompare(localeB); - return a.label.localeCompare(b.label); - }); + const localeA = a.value.split('-')[0] + a.value.split('-')[1] + const localeB = b.value.split('-')[0] + b.value.split('-')[1] + if (localeA !== localeB) return localeA.localeCompare(localeB) + return a.label.localeCompare(b.label) + }) - setMsTtsVoices(voices); + setMsTtsVoices(voices) } catch (error) { - console.error('获取免费在线TTS语音列表失败:', error); + console.error('获取免费在线TTS语音列表失败:', error) // 如果获取失败,设置一些默认的中文语音 setMsTtsVoices([ { label: 'zh-CN-XiaoxiaoNeural (女声)', value: 'zh-CN-XiaoxiaoNeural' }, @@ -211,10 +209,10 @@ const TTSSettings: FC = () => { { label: 'zh-CN-XiaomoNeural (女声)', value: 'zh-CN-XiaomoNeural' }, { label: 'zh-CN-XiaoxuanNeural (女声)', value: 'zh-CN-XiaoxuanNeural' }, { label: 'zh-CN-XiaoruiNeural (女声)', value: 'zh-CN-XiaoruiNeural' }, - { label: 'zh-CN-YunfengNeural (男声)', value: 'zh-CN-YunfengNeural' }, - ]); + { label: 'zh-CN-YunfengNeural (男声)', value: 'zh-CN-YunfengNeural' } + ]) } - }, []); + }, []) // 获取浏览器可用的语音列表 const getVoices = useCallback(() => { @@ -323,8 +321,8 @@ const TTSSettings: FC = () => { // 获取免费在线TTS语音列表 useEffect(() => { // 获取免费在线TTS语音列表 - getMsTtsVoices(); - }, [getMsTtsVoices]); + getMsTtsVoices() + }, [getMsTtsVoices]) useEffect(() => { // 初始化语音合成引擎 @@ -634,9 +632,7 @@ const TTSSettings: FC = () => { dispatch(setTtsMsVoice(value))} - disabled={!ttsEnabled} - style={{ width: '100%' }} - options={msTtsVoices.length > 0 ? msTtsVoices : [ - { label: 'zh-CN-XiaoxiaoNeural (女声)', value: 'zh-CN-XiaoxiaoNeural' }, - { label: 'zh-CN-YunxiNeural (男声)', value: 'zh-CN-YunxiNeural' }, - { label: 'zh-CN-YunyangNeural (男声)', value: 'zh-CN-YunyangNeural' }, - { label: 'zh-CN-XiaohanNeural (女声)', value: 'zh-CN-XiaohanNeural' }, - { label: 'zh-CN-XiaomoNeural (女声)', value: 'zh-CN-XiaomoNeural' }, - { label: 'zh-CN-XiaoxuanNeural (女声)', value: 'zh-CN-XiaoxuanNeural' }, - { label: 'zh-CN-XiaoruiNeural (女声)', value: 'zh-CN-XiaoruiNeural' }, - { label: 'zh-CN-YunfengNeural (男声)', value: 'zh-CN-YunfengNeural' }, - ]} - showSearch - optionFilterProp="label" - placeholder={t('settings.tts.voice.placeholder', { defaultValue: '请选择音色' })} - notFoundContent={t('settings.tts.voice.not_found', { defaultValue: '未找到音色' })} + value={ttsMsVoice} + onChange={(value) => dispatch(setTtsMsVoice(value))} + disabled={!ttsEnabled} + style={{ width: '100%' }} + options={ + msTtsVoices.length > 0 + ? msTtsVoices + : [ + { label: 'zh-CN-XiaoxiaoNeural (女声)', value: 'zh-CN-XiaoxiaoNeural' }, + { label: 'zh-CN-YunxiNeural (男声)', value: 'zh-CN-YunxiNeural' }, + { label: 'zh-CN-YunyangNeural (男声)', value: 'zh-CN-YunyangNeural' }, + { label: 'zh-CN-XiaohanNeural (女声)', value: 'zh-CN-XiaohanNeural' }, + { label: 'zh-CN-XiaomoNeural (女声)', value: 'zh-CN-XiaomoNeural' }, + { label: 'zh-CN-XiaoxuanNeural (女声)', value: 'zh-CN-XiaoxuanNeural' }, + { label: 'zh-CN-XiaoruiNeural (女声)', value: 'zh-CN-XiaoruiNeural' }, + { label: 'zh-CN-YunfengNeural (男声)', value: 'zh-CN-YunfengNeural' } + ] + } + showSearch + optionFilterProp="label" + placeholder={t('settings.tts.voice.placeholder', { defaultValue: '请选择音色' })} + notFoundContent={t('settings.tts.voice.not_found', { defaultValue: '未找到音色' })} /> diff --git a/src/renderer/src/services/ASRService.ts b/src/renderer/src/services/ASRService.ts index 71b0542469..51695d1445 100644 --- a/src/renderer/src/services/ASRService.ts +++ b/src/renderer/src/services/ASRService.ts @@ -138,9 +138,9 @@ class ASRService { // 如果有回调函数,调用一次空字符串,触发按钮状态重置 if (this.resultCallback && typeof this.resultCallback === 'function') { // 使用空字符串调用回调,不会影响输入框,但可以触发按钮状态重置 - const callback = this.resultCallback as (text: string, isFinal?: boolean) => void; // 明确指定类型 + const callback = this.resultCallback as (text: string, isFinal?: boolean) => void // 明确指定类型 setTimeout(() => { - callback('', false); + callback('', false) }, 100) } } @@ -334,7 +334,7 @@ class ASRService { // 使用空字符串调用回调,不会影响输入框,但可以触发按钮状态重置 // 传递false表示这不是最终结果,只是状态更新 setTimeout(() => { - onTranscribed('', false); + onTranscribed('', false) }, 100) } } else { diff --git a/src/renderer/src/services/TTSService.ts b/src/renderer/src/services/TTSService.ts index 863c295ec4..b99844254a 100644 --- a/src/renderer/src/services/TTSService.ts +++ b/src/renderer/src/services/TTSService.ts @@ -3,9 +3,10 @@ * 这个文件仅作兼容性保留,将在后续版本中移除 */ -import { TTSService as NewTTSService } from './tts/index' import { Message } from '@renderer/types' +import { TTSService as NewTTSService } from './tts/index' + /** * TTS服务,用于将文本转换为语音 * @deprecated 请使用 src/renderer/src/services/tts/TTSService.ts diff --git a/src/renderer/src/services/VoiceCallService.ts b/src/renderer/src/services/VoiceCallService.ts index b4ffa99696..f89e9708a3 100644 --- a/src/renderer/src/services/VoiceCallService.ts +++ b/src/renderer/src/services/VoiceCallService.ts @@ -1,180 +1,180 @@ -import store from '@renderer/store'; -import { fetchChatCompletion } from '@renderer/services/ApiService'; -import { getAssistantMessage, getUserMessage } from '@renderer/services/MessagesService'; -import { getDefaultAssistant } from '@renderer/services/AssistantService'; -import TTSService from '@renderer/services/TTSService'; -import ASRService from '@renderer/services/ASRService'; +import { fetchChatCompletion } from '@renderer/services/ApiService' +import ASRService from '@renderer/services/ASRService' +import { getDefaultAssistant } from '@renderer/services/AssistantService' +import { getAssistantMessage, getUserMessage } from '@renderer/services/MessagesService' +import TTSService from '@renderer/services/TTSService' +import store from '@renderer/store' // 导入类型 -import type { Message } from '@renderer/types'; +import type { Message } from '@renderer/types' interface VoiceCallCallbacks { - onTranscript: (text: string) => void; - onResponse: (text: string) => void; - onListeningStateChange: (isListening: boolean) => void; - onSpeakingStateChange: (isSpeaking: boolean) => void; + onTranscript: (text: string) => void + onResponse: (text: string) => void + onListeningStateChange: (isListening: boolean) => void + onSpeakingStateChange: (isSpeaking: boolean) => void } // 为TypeScript添加SpeechRecognition类型 declare global { interface Window { - SpeechRecognition: any; - webkitSpeechRecognition: any; + SpeechRecognition: any + webkitSpeechRecognition: any } } class VoiceCallServiceClass { - private recognition: any = null; - private isCallActive = false; - private isRecording = false; // 新增录音状态 - private isMuted = false; - private isPaused = false; - private callbacks: VoiceCallCallbacks | null = null; - private _currentTranscript = ''; // 使用下划线前缀避免未使用警告 - private _accumulatedTranscript = ''; // 累积的语音识别结果 - private conversationHistory: { role: string; content: string }[] = []; - private isProcessingResponse = false; - private ttsService = TTSService; - private recordingTimeout: NodeJS.Timeout | null = null; // 录音超时定时器 + private recognition: any = null + private isCallActive = false + private isRecording = false // 新增录音状态 + private isMuted = false + private isPaused = false + private callbacks: VoiceCallCallbacks | null = null + private _currentTranscript = '' // 使用下划线前缀避免未使用警告 + private _accumulatedTranscript = '' // 累积的语音识别结果 + private conversationHistory: { role: string; content: string }[] = [] + private isProcessingResponse = false + private ttsService = TTSService + private recordingTimeout: NodeJS.Timeout | null = null // 录音超时定时器 async initialize() { // 检查麦克风权限 try { - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - stream.getTracks().forEach(track => track.stop()); + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }) + stream.getTracks().forEach((track) => track.stop()) } catch (error) { - console.error('Microphone permission denied:', error); - throw new Error('Microphone permission denied'); + console.error('Microphone permission denied:', error) + throw new Error('Microphone permission denied') } // 获取当前ASR服务类型 - const { asrServiceType } = store.getState().settings; + const { asrServiceType } = store.getState().settings // 如果使用浏览器ASR,检查浏览器支持 if (asrServiceType === 'browser') { if (!('webkitSpeechRecognition' in window) && !('SpeechRecognition' in window)) { - throw new Error('Speech recognition not supported in this browser'); + throw new Error('Speech recognition not supported in this browser') } // 初始化浏览器语音识别 - const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; - this.recognition = new SpeechRecognition(); - this.recognition.continuous = true; - this.recognition.interimResults = true; - this.recognition.lang = navigator.language || 'zh-CN'; + const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition + this.recognition = new SpeechRecognition() + this.recognition.continuous = true + this.recognition.interimResults = true + this.recognition.lang = navigator.language || 'zh-CN' } else if (asrServiceType === 'local') { // 如果使用本地服务器ASR,检查连接 try { // 尝试连接本地ASR服务器 - const connected = await ASRService.connectToWebSocketServer(); + const connected = await ASRService.connectToWebSocketServer() if (!connected) { - throw new Error('无法连接到语音识别服务'); + throw new Error('无法连接到语音识别服务') } } catch (error) { - console.error('Failed to connect to ASR server:', error); - throw new Error('Failed to connect to ASR server'); + console.error('Failed to connect to ASR server:', error) + throw new Error('Failed to connect to ASR server') } } - return true; + return true } async startCall(callbacks: VoiceCallCallbacks) { - this.callbacks = callbacks; - this.isCallActive = true; - this.conversationHistory = []; + this.callbacks = callbacks + this.isCallActive = true + this.conversationHistory = [] // 获取当前ASR服务类型 - const { asrServiceType } = store.getState().settings; + const { asrServiceType } = store.getState().settings // 根据不同的ASR服务类型进行初始化 if (asrServiceType === 'browser') { if (!this.recognition) { - throw new Error('Browser speech recognition not initialized'); + throw new Error('Browser speech recognition not initialized') } // 设置浏览器语音识别事件处理 this.recognition.onresult = (event: any) => { - let interimTranscript = ''; - let finalTranscript = ''; + let interimTranscript = '' + let finalTranscript = '' for (let i = event.resultIndex; i < event.results.length; ++i) { if (event.results[i].isFinal) { - finalTranscript += event.results[i][0].transcript; + finalTranscript += event.results[i][0].transcript } else { - interimTranscript += event.results[i][0].transcript; + interimTranscript += event.results[i][0].transcript } } if (interimTranscript) { // 更新当前的临时识别结果 - this._currentTranscript = interimTranscript; + this._currentTranscript = interimTranscript // 显示累积结果 + 当前临时结果 - this.callbacks?.onTranscript(this._accumulatedTranscript + ' ' + interimTranscript); + this.callbacks?.onTranscript(this._accumulatedTranscript + ' ' + interimTranscript) } if (finalTranscript) { // 将最终结果累积到总结果中 if (this._accumulatedTranscript) { // 如果已经有累积的文本,添加空格再追加 - this._accumulatedTranscript += ' ' + finalTranscript; + this._accumulatedTranscript += ' ' + finalTranscript } else { // 如果是第一段文本,直接设置 - this._accumulatedTranscript = finalTranscript; + this._accumulatedTranscript = finalTranscript } // 更新当前的识别结果 - this._currentTranscript = ''; + this._currentTranscript = '' // 显示累积的完整结果 - this.callbacks?.onTranscript(this._accumulatedTranscript); + this.callbacks?.onTranscript(this._accumulatedTranscript) // 在录音过程中只更新transcript,不触发handleUserSpeech // 松开按钮后才会处理完整的录音内容 } - }; + } this.recognition.onstart = () => { - this.isRecording = true; - this.callbacks?.onListeningStateChange(true); - }; + this.isRecording = true + this.callbacks?.onListeningStateChange(true) + } this.recognition.onend = () => { - this.isRecording = false; - this.callbacks?.onListeningStateChange(false); - }; + this.isRecording = false + this.callbacks?.onListeningStateChange(false) + } this.recognition.onerror = (event: any) => { - console.error('Speech recognition error', event.error); - this.isRecording = false; - this.callbacks?.onListeningStateChange(false); - }; + console.error('Speech recognition error', event.error) + this.isRecording = false + this.callbacks?.onListeningStateChange(false) + } } // 播放欢迎语音 - const welcomeMessage = '您好,我是您的AI助手,请长按说话按钮进行对话。'; - this.callbacks?.onResponse(welcomeMessage); + const welcomeMessage = '您好,我是您的AI助手,请长按说话按钮进行对话。' + this.callbacks?.onResponse(welcomeMessage) // 监听TTS状态 const ttsStateHandler = (isPlaying: boolean) => { - this.callbacks?.onSpeakingStateChange(isPlaying); - }; + this.callbacks?.onSpeakingStateChange(isPlaying) + } // 监听TTS播放状态 window.addEventListener('tts-state-change', (event: any) => { - ttsStateHandler(event.detail.isPlaying); - }); + ttsStateHandler(event.detail.isPlaying) + }) // 播放欢迎语音,并手动设置初始状态 - this.callbacks?.onSpeakingStateChange(true); - this.ttsService.speak(welcomeMessage); + this.callbacks?.onSpeakingStateChange(true) + this.ttsService.speak(welcomeMessage) // 确保欢迎语音结束后状态正确 setTimeout(() => { if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) { - this.callbacks?.onSpeakingStateChange(false); + this.callbacks?.onSpeakingStateChange(false) } - }, 5000); // 5秒后检查TTS状态 + }, 5000) // 5秒后检查TTS状态 - return true; + return true } /** @@ -183,25 +183,24 @@ class VoiceCallServiceClass { */ async startRecording(): Promise { if (!this.isCallActive || this.isPaused || this.isProcessingResponse || this.isRecording) { - return false; + return false } // 重置累积的文本 - this._accumulatedTranscript = ''; + this._accumulatedTranscript = '' // 获取当前ASR服务类型 - const { asrServiceType } = store.getState().settings; + const { asrServiceType } = store.getState().settings try { if (asrServiceType === 'browser') { // 浏览器ASR if (!this.recognition) { - throw new Error('Browser speech recognition not initialized'); + throw new Error('Browser speech recognition not initialized') } - this.recognition.start(); - this.isRecording = true; - + this.recognition.start() + this.isRecording = true } else if (asrServiceType === 'local') { // 本地服务器ASR await ASRService.startRecording((text, isFinal) => { @@ -210,51 +209,50 @@ class VoiceCallServiceClass { // 如果是最终结果,累积到总结果中 if (this._accumulatedTranscript) { // 如果已经有累积的文本,添加空格再追加 - this._accumulatedTranscript += ' ' + text; + this._accumulatedTranscript += ' ' + text } else { // 如果是第一段文本,直接设置 - this._accumulatedTranscript = text; + this._accumulatedTranscript = text } // 更新当前的识别结果 - this._currentTranscript = ''; + this._currentTranscript = '' // 显示累积的完整结果 - this.callbacks?.onTranscript(this._accumulatedTranscript); + this.callbacks?.onTranscript(this._accumulatedTranscript) } else { // 如果是临时结果,更新当前的识别结果 - this._currentTranscript = text; + this._currentTranscript = text // 显示累积结果 + 当前临时结果 - this.callbacks?.onTranscript(this._accumulatedTranscript + ' ' + text); + this.callbacks?.onTranscript(this._accumulatedTranscript + ' ' + text) } // 在录音过程中只更新transcript,不触发handleUserSpeech // 松开按钮后才会处理完整的录音内容 } - }); - - this.isRecording = true; - this.callbacks?.onListeningStateChange(true); + }) + this.isRecording = true + this.callbacks?.onListeningStateChange(true) } else if (asrServiceType === 'openai') { // OpenAI ASR - await ASRService.startRecording(); - this.isRecording = true; - this.callbacks?.onListeningStateChange(true); + await ASRService.startRecording() + this.isRecording = true + this.callbacks?.onListeningStateChange(true) } // 设置最长录音时间,防止用户忘记松开 this.recordingTimeout = setTimeout(() => { if (this.isRecording) { - this.stopRecording(); + this.stopRecording() } - }, 60000); // 60秒最长录音时间 + }, 60000) // 60秒最长录音时间 - return true; + return true } catch (error) { - console.error('Failed to start recording:', error); - this.isRecording = false; - this.callbacks?.onListeningStateChange(false); - return false; + console.error('Failed to start recording:', error) + this.isRecording = false + this.callbacks?.onListeningStateChange(false) + return false } } @@ -264,143 +262,141 @@ class VoiceCallServiceClass { */ async stopRecording(): Promise { if (!this.isCallActive || !this.isRecording) { - return false; + return false } // 清除录音超时定时器 if (this.recordingTimeout) { - clearTimeout(this.recordingTimeout); - this.recordingTimeout = null; + clearTimeout(this.recordingTimeout) + this.recordingTimeout = null } // 获取当前ASR服务类型 - const { asrServiceType } = store.getState().settings; + const { asrServiceType } = store.getState().settings try { // 存储当前的语音识别结果,用于松开按钮后发送给AI - const currentTranscript = this._currentTranscript; + const currentTranscript = this._currentTranscript // 存储累积的语音识别结果 - const accumulatedTranscript = this._accumulatedTranscript; + const accumulatedTranscript = this._accumulatedTranscript if (asrServiceType === 'browser') { // 浏览器ASR if (!this.recognition) { - throw new Error('Browser speech recognition not initialized'); + throw new Error('Browser speech recognition not initialized') } - this.recognition.stop(); + this.recognition.stop() // onend事件将设置isRecording = false - this.isRecording = false; - this.callbacks?.onListeningStateChange(false); + this.isRecording = false + this.callbacks?.onListeningStateChange(false) // 优先使用累积的文本,如果有的话 if (accumulatedTranscript) { - console.log('发送累积的语音识别结果给AI:', accumulatedTranscript); - this.handleUserSpeech(accumulatedTranscript); + console.log('发送累积的语音识别结果给AI:', accumulatedTranscript) + this.handleUserSpeech(accumulatedTranscript) } else if (currentTranscript) { // 如果没有累积结果,使用当前结果 - console.log('没有累积结果,使用当前结果:', currentTranscript); - this.handleUserSpeech(currentTranscript); + console.log('没有累积结果,使用当前结果:', currentTranscript) + this.handleUserSpeech(currentTranscript) } - } else if (asrServiceType === 'local') { // 本地服务器ASR // 创建一个承诺,等待最终结果 const finalResultPromise = new Promise((resolve) => { // 设置一个超时器,确保不会无限等待 const timeoutId = setTimeout(() => { - console.log('等待最终结果超时,使用当前结果'); - resolve(this._currentTranscript); - }, 1500); // 1.5秒超时 + console.log('等待最终结果超时,使用当前结果') + resolve(this._currentTranscript) + }, 1500) // 1.5秒超时 // 设置回调函数来接收最终结果 const resultCallback = (text: string) => { // 如果是空字符串,表示只是重置状态,不处理 - if (text === '') return; + if (text === '') return if (text) { - clearTimeout(timeoutId); - console.log('收到最终语音识别结果:', text); - this._currentTranscript = text; - this.callbacks?.onTranscript(text); - resolve(text); + clearTimeout(timeoutId) + console.log('收到最终语音识别结果:', text) + this._currentTranscript = text + this.callbacks?.onTranscript(text) + resolve(text) } - }; + } // 停止录音,但不取消,以获取最终结果 - ASRService.stopRecording(resultCallback); - this.isRecording = false; - this.callbacks?.onListeningStateChange(false); + ASRService.stopRecording(resultCallback) + this.isRecording = false + this.callbacks?.onListeningStateChange(false) // 添加额外的安全措施,在停止后立即发送重置命令 setTimeout(() => { // 发送重置命令,确保浏览器不会继续发送结果 - ASRService.cancelRecording(); - }, 2000); // 2秒后强制取消,作为安全措施 - }); + ASRService.cancelRecording() + }, 2000) // 2秒后强制取消,作为安全措施 + }) // 等待最终结果 - const finalText = await finalResultPromise; + const finalText = await finalResultPromise // 优先使用累积的文本,如果有的话 if (accumulatedTranscript) { - console.log('发送累积的语音识别结果给AI:', accumulatedTranscript); - this.handleUserSpeech(accumulatedTranscript); + console.log('发送累积的语音识别结果给AI:', accumulatedTranscript) + this.handleUserSpeech(accumulatedTranscript) } else if (finalText) { // 如果没有累积结果,使用最终结果 - console.log('发送最终语音识别结果给AI:', finalText); - this.handleUserSpeech(finalText); + console.log('发送最终语音识别结果给AI:', finalText) + this.handleUserSpeech(finalText) } else if (currentTranscript) { // 如果没有最终结果,使用当前结果 - console.log('没有最终结果,使用当前结果:', currentTranscript); - this.handleUserSpeech(currentTranscript); + console.log('没有最终结果,使用当前结果:', currentTranscript) + this.handleUserSpeech(currentTranscript) } - } else if (asrServiceType === 'openai') { // OpenAI ASR await ASRService.stopRecording((text) => { // 更新最终的语音识别结果 if (text) { - this._currentTranscript = text; - this.callbacks?.onTranscript(text); + this._currentTranscript = text + this.callbacks?.onTranscript(text) } - }); + }) - this.isRecording = false; - this.callbacks?.onListeningStateChange(false); + this.isRecording = false + this.callbacks?.onListeningStateChange(false) // 使用最新的语音识别结果 - const finalTranscript = this._currentTranscript; + const finalTranscript = this._currentTranscript if (finalTranscript) { - this.handleUserSpeech(finalTranscript); + this.handleUserSpeech(finalTranscript) } } - return true; + return true } catch (error) { - console.error('Failed to stop recording:', error); - this.isRecording = false; - this.callbacks?.onListeningStateChange(false); - return false; + console.error('Failed to stop recording:', error) + this.isRecording = false + this.callbacks?.onListeningStateChange(false) + return false } } async handleUserSpeech(text: string) { - if (!this.isCallActive || this.isProcessingResponse || this.isPaused) return; + if (!this.isCallActive || this.isProcessingResponse || this.isPaused) return // 暂停语音识别,避免在AI回复时继续识别 - const { asrServiceType } = store.getState().settings; + const { asrServiceType } = store.getState().settings if (asrServiceType === 'browser') { - this.recognition?.stop(); + this.recognition?.stop() } else if (asrServiceType === 'local' || asrServiceType === 'openai') { - ASRService.cancelRecording(); + ASRService.cancelRecording() } - this.isProcessingResponse = true; + this.isProcessingResponse = true try { // 获取当前助手 - const assistant = getDefaultAssistant(); + const assistant = getDefaultAssistant() // 创建一个简单的Topic对象 const topic = { @@ -410,7 +406,7 @@ class VoiceCallServiceClass { createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(), messages: [] - }; + } // 创建用户消息 const userMessage = getUserMessage({ @@ -418,35 +414,35 @@ class VoiceCallServiceClass { topic, type: 'text', content: text - }); + }) // 创建助手消息 const assistantMessage = getAssistantMessage({ assistant, topic - }); + }) // 更新对话历史 - this.conversationHistory.push({ role: 'user', content: text }); + this.conversationHistory.push({ role: 'user', content: text }) // 构建消息列表 // 将历史消息转换为正确的Message对象 - const historyMessages = this.conversationHistory.map(msg => { + const historyMessages = this.conversationHistory.map((msg) => { if (msg.role === 'user') { return getUserMessage({ assistant, topic, type: 'text', content: msg.content - }); + }) } else { const assistantMsg = getAssistantMessage({ assistant, topic - }); - return { ...assistantMsg, content: msg.content, status: 'success' }; + }) + return { ...assistantMsg, content: msg.content, status: 'success' } } - }); + }) // 修改用户消息,添加语音通话提示 const voiceCallPrompt = `当前是语音通话模式。请注意: @@ -457,7 +453,7 @@ class VoiceCallServiceClass { 5. 回答应该简短有力,便于用户通过语音理解。 6. 避免使用特殊符号、表情符号、标点符号等,因为这些在语音播放时会影响理解。 7. 使用完整的句子而非简单的关键词列表。 -8. 尽量使用常见词汇,避免生僻或专业术语,除非用户特别询问。`; +8. 尽量使用常见词汇,避免生僻或专业术语,除非用户特别询问。` // 创建系统指令消息 const systemMessage = getUserMessage({ @@ -465,17 +461,17 @@ class VoiceCallServiceClass { topic, type: 'text', content: voiceCallPrompt - }); + }) // 修改用户消息的内容 - userMessage.content = text; + userMessage.content = text // 构建最终消息列表 // 使用类型断言解决类型问题 - const messages = [systemMessage, ...historyMessages, userMessage] as Message[]; + const messages = [systemMessage, ...historyMessages, userMessage] as Message[] // 流式响应处理 - let fullResponse = ''; + let fullResponse = '' try { // 调用真实的LLM API @@ -485,60 +481,58 @@ class VoiceCallServiceClass { assistant, onResponse: async (msg) => { if (msg.content && msg.content !== fullResponse) { - fullResponse = msg.content; + fullResponse = msg.content // 更新UI - this.callbacks?.onResponse(fullResponse); + this.callbacks?.onResponse(fullResponse) // 如果TTS正在播放,停止它 if (this.ttsService.isCurrentlyPlaying()) { - this.ttsService.stop(); + this.ttsService.stop() } } } - }); + }) // 播放完整响应 if (!this.isMuted && this.isCallActive) { // 手动设置语音状态 - this.callbacks?.onSpeakingStateChange(true); - this.ttsService.speak(fullResponse); + this.callbacks?.onSpeakingStateChange(true) + this.ttsService.speak(fullResponse) // 确保语音结束后状态正确 setTimeout(() => { if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) { - this.callbacks?.onSpeakingStateChange(false); + this.callbacks?.onSpeakingStateChange(false) } - }, 1000); // 1秒后检查TTS状态 + }, 1000) // 1秒后检查TTS状态 } // 更新对话历史 - this.conversationHistory.push({ role: 'assistant', content: fullResponse }); - + this.conversationHistory.push({ role: 'assistant', content: fullResponse }) } catch (innerError) { - console.error('Error generating response:', innerError); + console.error('Error generating response:', innerError) // 如果出错,使用一个简单的回复 - fullResponse = `抱歉,处理您的请求时出错了。`; - this.callbacks?.onResponse(fullResponse); + fullResponse = `抱歉,处理您的请求时出错了。` + this.callbacks?.onResponse(fullResponse) if (!this.isMuted && this.isCallActive) { // 手动设置语音状态 - this.callbacks?.onSpeakingStateChange(true); - this.ttsService.speak(fullResponse); + this.callbacks?.onSpeakingStateChange(true) + this.ttsService.speak(fullResponse) // 确保语音结束后状态正确 setTimeout(() => { if (this.ttsService && !this.ttsService.isCurrentlyPlaying()) { - this.callbacks?.onSpeakingStateChange(false); + this.callbacks?.onSpeakingStateChange(false) } - }, 1000); // 1秒后检查TTS状态 + }, 1000) // 1秒后检查TTS状态 } } - } catch (error) { - console.error('Error processing voice response:', error); + console.error('Error processing voice response:', error) } finally { - this.isProcessingResponse = false; + this.isProcessingResponse = false // 不自动恢复语音识别,等待用户长按按钮 // 长按说话模式下,我们不需要自动恢复语音识别 @@ -551,106 +545,104 @@ class VoiceCallServiceClass { */ async cancelRecording(): Promise { if (!this.isCallActive || !this.isRecording) { - return false; + return false } // 清除录音超时定时器 if (this.recordingTimeout) { - clearTimeout(this.recordingTimeout); - this.recordingTimeout = null; + clearTimeout(this.recordingTimeout) + this.recordingTimeout = null } // 获取当前ASR服务类型 - const { asrServiceType } = store.getState().settings; + const { asrServiceType } = store.getState().settings try { if (asrServiceType === 'browser') { // 浏览器ASR if (!this.recognition) { - throw new Error('Browser speech recognition not initialized'); + throw new Error('Browser speech recognition not initialized') } - this.recognition.stop(); - this.isRecording = false; - this.callbacks?.onListeningStateChange(false); - + this.recognition.stop() + this.isRecording = false + this.callbacks?.onListeningStateChange(false) } else if (asrServiceType === 'local') { // 本地服务器ASR - ASRService.cancelRecording(); - this.isRecording = false; - this.callbacks?.onListeningStateChange(false); - + ASRService.cancelRecording() + this.isRecording = false + this.callbacks?.onListeningStateChange(false) } else if (asrServiceType === 'openai') { // OpenAI ASR - ASRService.cancelRecording(); - this.isRecording = false; - this.callbacks?.onListeningStateChange(false); + ASRService.cancelRecording() + this.isRecording = false + this.callbacks?.onListeningStateChange(false) } // 清除当前识别结果 - this._currentTranscript = ''; - this.callbacks?.onTranscript(''); + this._currentTranscript = '' + this.callbacks?.onTranscript('') - return true; + return true } catch (error) { - console.error('Failed to cancel recording:', error); - this.isRecording = false; - this.callbacks?.onListeningStateChange(false); - return false; + console.error('Failed to cancel recording:', error) + this.isRecording = false + this.callbacks?.onListeningStateChange(false) + return false } } setMuted(muted: boolean) { - this.isMuted = muted; + this.isMuted = muted // 如果设置为静音,停止当前TTS播放 if (muted && this.ttsService.isCurrentlyPlaying()) { - this.ttsService.stop(); + this.ttsService.stop() } } setPaused(paused: boolean) { - this.isPaused = paused; + this.isPaused = paused // 获取当前ASR服务类型 - const { asrServiceType } = store.getState().settings; + const { asrServiceType } = store.getState().settings if (paused) { // 暂停语音识别 if (asrServiceType === 'browser') { - this.recognition?.stop(); + this.recognition?.stop() } else if (asrServiceType === 'local' || asrServiceType === 'openai') { - ASRService.cancelRecording(); + ASRService.cancelRecording() } // 暂停TTS if (this.ttsService.isCurrentlyPlaying()) { - this.ttsService.stop(); + this.ttsService.stop() } } // 不自动恢复语音识别,等待用户长按按钮 } endCall() { - this.isCallActive = false; + this.isCallActive = false // 获取当前ASR服务类型 - const { asrServiceType } = store.getState().settings; + const { asrServiceType } = store.getState().settings // 停止语音识别 if (asrServiceType === 'browser') { - this.recognition?.stop(); + this.recognition?.stop() } else if (asrServiceType === 'local' || asrServiceType === 'openai') { - ASRService.cancelRecording(); + ASRService.cancelRecording() } // 停止TTS if (this.ttsService.isCurrentlyPlaying()) { - this.ttsService.stop(); + this.ttsService.stop() } - this.callbacks = null; + this.callbacks = null } } -export const VoiceCallService = new VoiceCallServiceClass(); +export const VoiceCallService = new VoiceCallServiceClass() diff --git a/src/renderer/src/services/tts/EdgeTTSService.ts b/src/renderer/src/services/tts/EdgeTTSService.ts index 027d72237e..72bd2d364d 100644 --- a/src/renderer/src/services/tts/EdgeTTSService.ts +++ b/src/renderer/src/services/tts/EdgeTTSService.ts @@ -1,22 +1,23 @@ -import { TTSServiceInterface } from './TTSServiceInterface'; -import i18n from '@renderer/i18n'; +import i18n from '@renderer/i18n' + +import { TTSServiceInterface } from './TTSServiceInterface' // 全局变量来跟踪当前正在播放的语音 -let currentUtterance: SpeechSynthesisUtterance | null = null; +let currentUtterance: SpeechSynthesisUtterance | null = null /** * Edge TTS服务实现类 */ export class EdgeTTSService implements TTSServiceInterface { - private edgeVoice: string; + private edgeVoice: string /** * 构造函数 * @param edgeVoice Edge语音 */ constructor(edgeVoice: string) { - this.edgeVoice = edgeVoice; - console.log('初始化EdgeTTSService,语音:', edgeVoice); + this.edgeVoice = edgeVoice + console.log('初始化EdgeTTSService,语音:', edgeVoice) } /** @@ -25,7 +26,7 @@ export class EdgeTTSService implements TTSServiceInterface { */ private validateParams(): void { if (!this.edgeVoice) { - throw new Error(i18n.t('settings.tts.error.no_edge_voice')); + throw new Error(i18n.t('settings.tts.error.no_edge_voice')) } } @@ -37,79 +38,79 @@ export class EdgeTTSService implements TTSServiceInterface { private playDirectly(text: string): boolean { try { // 验证参数 - this.validateParams(); + this.validateParams() // 使用Web Speech API if (!('speechSynthesis' in window)) { - throw new Error(i18n.t('settings.tts.error.browser_not_support')); + throw new Error(i18n.t('settings.tts.error.browser_not_support')) } // 停止当前正在播放的语音 - window.speechSynthesis.cancel(); + window.speechSynthesis.cancel() if (currentUtterance) { - currentUtterance = null; + currentUtterance = null } // 创建语音合成器实例 - const utterance = new SpeechSynthesisUtterance(text); - currentUtterance = utterance; + const utterance = new SpeechSynthesisUtterance(text) + currentUtterance = utterance // 获取可用的语音合成声音 - const voices = window.speechSynthesis.getVoices(); - console.log('可用的语音合成声音:', voices); + const voices = window.speechSynthesis.getVoices() + console.log('可用的语音合成声音:', voices) // 查找指定的语音 - let selectedVoice = voices.find((v) => v.name === this.edgeVoice); + let selectedVoice = voices.find((v) => v.name === this.edgeVoice) // 如果没有找到指定的语音,尝试使用中文语音 if (!selectedVoice) { - console.warn('未找到指定的语音:', this.edgeVoice); + console.warn('未找到指定的语音:', this.edgeVoice) // 尝试找中文语音 - selectedVoice = voices.find((v) => v.lang === 'zh-CN'); + selectedVoice = voices.find((v) => v.lang === 'zh-CN') if (selectedVoice) { - console.log('使用替代中文语音:', selectedVoice.name); + console.log('使用替代中文语音:', selectedVoice.name) } else { // 如果没有中文语音,使用第一个可用的语音 if (voices.length > 0) { - selectedVoice = voices[0]; - console.log('使用第一个可用的语音:', selectedVoice.name); + selectedVoice = voices[0] + console.log('使用第一个可用的语音:', selectedVoice.name) } else { - console.warn('没有可用的语音'); - return false; + console.warn('没有可用的语音') + return false } } } else { - console.log('已选择语音:', selectedVoice.name); + console.log('已选择语音:', selectedVoice.name) } // 设置语音 if (selectedVoice) { - utterance.voice = selectedVoice; + utterance.voice = selectedVoice } // 设置事件处理程序 utterance.onend = () => { - console.log('语音合成已结束'); - currentUtterance = null; + console.log('语音合成已结束') + currentUtterance = null // 分发一个自定义事件,通知语音合成已结束 // 这样TTSService可以监听这个事件并重置播放状态 - const event = new CustomEvent('edgeTTSComplete', { detail: { text } }); - document.dispatchEvent(event); - }; + const event = new CustomEvent('edgeTTSComplete', { detail: { text } }) + document.dispatchEvent(event) + } utterance.onerror = (event) => { - console.error('语音合成错误:', event); - currentUtterance = null; - }; + console.error('语音合成错误:', event) + currentUtterance = null + } // 开始语音合成 - window.speechSynthesis.speak(utterance); - return true; + window.speechSynthesis.speak(utterance) + return true } catch (error) { - console.error('直接播放语音失败:', error); - return false; + console.error('直接播放语音失败:', error) + return false } } @@ -120,151 +121,151 @@ export class EdgeTTSService implements TTSServiceInterface { */ async synthesize(text: string): Promise { // 验证参数 - this.validateParams(); + this.validateParams() // 先尝试直接播放 - const playResult = this.playDirectly(text); + const playResult = this.playDirectly(text) if (playResult) { // 如果直接播放成功,返回一个有效的音频Blob // 创建一个简单的音频文件,包含一个短暂停 // 这个文件可以被浏览器正常播放,但实际上不会发出声音 // 因为我们已经使用Web Speech API直接播放了语音 - const silentAudioBase64 = 'UklGRiQAAABXQVZFZm10IBAAAAABAAEARKwAAIhYAQACABAAZGF0YQAAAAA='; - const silentAudioBuffer = Uint8Array.from(atob(silentAudioBase64), c => c.charCodeAt(0)); - return new Blob([silentAudioBuffer], { type: 'audio/wav' }); + const silentAudioBase64 = 'UklGRiQAAABXQVZFZm10IBAAAAABAAEARKwAAIhYAQACABAAZGF0YQAAAAA=' + const silentAudioBuffer = Uint8Array.from(atob(silentAudioBase64), (c) => c.charCodeAt(0)) + return new Blob([silentAudioBuffer], { type: 'audio/wav' }) } // 如果直接播放失败,尝试录制方法 - console.log('直接播放失败,尝试录制方法'); + console.log('直接播放失败,尝试录制方法') try { - console.log('使用浏览器TTS生成语音,音色:', this.edgeVoice); + console.log('使用浏览器TTS生成语音,音色:', this.edgeVoice) // 使用Web Speech API if (!('speechSynthesis' in window)) { - throw new Error(i18n.t('settings.tts.error.browser_not_support')); + throw new Error(i18n.t('settings.tts.error.browser_not_support')) } // 停止当前正在播放的语音 - window.speechSynthesis.cancel(); + window.speechSynthesis.cancel() // 创建语音合成器实例 - const utterance = new SpeechSynthesisUtterance(text); + const utterance = new SpeechSynthesisUtterance(text) // 获取可用的语音合成声音 - const voices = window.speechSynthesis.getVoices(); - console.log('初始可用的语音合成声音:', voices); + const voices = window.speechSynthesis.getVoices() + console.log('初始可用的语音合成声音:', voices) // 如果没有可用的声音,等待声音加载 if (voices.length === 0) { try { await new Promise((resolve) => { const voicesChangedHandler = () => { - window.speechSynthesis.onvoiceschanged = null; - resolve(); - }; - window.speechSynthesis.onvoiceschanged = voicesChangedHandler; + window.speechSynthesis.onvoiceschanged = null + resolve() + } + window.speechSynthesis.onvoiceschanged = voicesChangedHandler // 设置超时,防止无限等待 setTimeout(() => { - window.speechSynthesis.onvoiceschanged = null; - resolve(); - }, 5000); - }); + window.speechSynthesis.onvoiceschanged = null + resolve() + }, 5000) + }) } catch (error) { - console.error('等待语音加载超时:', error); + console.error('等待语音加载超时:', error) } } // 重新获取可用的语音合成声音 - const updatedVoices = window.speechSynthesis.getVoices(); - console.log('更新后可用的语音合成声音:', updatedVoices); + const updatedVoices = window.speechSynthesis.getVoices() + console.log('更新后可用的语音合成声音:', updatedVoices) // 查找指定的语音 - let selectedVoice = updatedVoices.find((v) => v.name === this.edgeVoice); + let selectedVoice = updatedVoices.find((v) => v.name === this.edgeVoice) // 如果没有找到指定的语音,尝试使用中文语音 if (!selectedVoice) { - console.warn('未找到指定的语音:', this.edgeVoice); + console.warn('未找到指定的语音:', this.edgeVoice) // 尝试找中文语音 - selectedVoice = updatedVoices.find((v) => v.lang === 'zh-CN'); + selectedVoice = updatedVoices.find((v) => v.lang === 'zh-CN') if (selectedVoice) { - console.log('使用替代中文语音:', selectedVoice.name); + console.log('使用替代中文语音:', selectedVoice.name) } else { // 如果没有中文语音,使用第一个可用的语音 if (updatedVoices.length > 0) { - selectedVoice = updatedVoices[0]; - console.log('使用第一个可用的语音:', selectedVoice.name); + selectedVoice = updatedVoices[0] + console.log('使用第一个可用的语音:', selectedVoice.name) } else { - console.warn('没有可用的语音'); + console.warn('没有可用的语音') } } } else { - console.log('已选择语音:', selectedVoice.name); + console.log('已选择语音:', selectedVoice.name) } // 设置语音 if (selectedVoice) { - utterance.voice = selectedVoice; + utterance.voice = selectedVoice } // 创建一个Promise来等待语音合成完成 return await new Promise((resolve, reject) => { try { // 使用AudioContext捕获语音合成的音频 - const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)(); - const audioDestination = audioContext.createMediaStreamDestination(); - const mediaRecorder = new MediaRecorder(audioDestination.stream); - const audioChunks: BlobPart[] = []; + const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)() + const audioDestination = audioContext.createMediaStreamDestination() + const mediaRecorder = new MediaRecorder(audioDestination.stream) + const audioChunks: BlobPart[] = [] mediaRecorder.ondataavailable = (event) => { if (event.data.size > 0) { - audioChunks.push(event.data); + audioChunks.push(event.data) } - }; + } mediaRecorder.onstop = () => { - const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); - resolve(audioBlob); - }; + const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }) + resolve(audioBlob) + } // 开始录制 - mediaRecorder.start(); + mediaRecorder.start() // 设置语音合成事件 utterance.onend = () => { // 语音合成结束后停止录制 setTimeout(() => { - mediaRecorder.stop(); - }, 500); // 等待一下,确保所有音频都被捕获 - }; + mediaRecorder.stop() + }, 500) // 等待一下,确保所有音频都被捕获 + } utterance.onerror = (event) => { - console.error('语音合成错误:', event); - mediaRecorder.stop(); - reject(new Error('语音合成错误')); - }; + console.error('语音合成错误:', event) + mediaRecorder.stop() + reject(new Error('语音合成错误')) + } // 开始语音合成 - window.speechSynthesis.speak(utterance); + window.speechSynthesis.speak(utterance) // 设置超时,防止无限等待 setTimeout(() => { if (mediaRecorder.state === 'recording') { - console.warn('语音合成超时,强制停止'); - mediaRecorder.stop(); + console.warn('语音合成超时,强制停止') + mediaRecorder.stop() } - }, 10000); // 10秒超时 + }, 10000) // 10秒超时 } catch (error: any) { - console.error('浏览器TTS语音合成失败:', error); - reject(new Error(`浏览器TTS语音合成失败: ${error?.message || '未知错误'}`)); + console.error('浏览器TTS语音合成失败:', error) + reject(new Error(`浏览器TTS语音合成失败: ${error?.message || '未知错误'}`)) } - }); + }) } catch (error: any) { - console.error('浏览器TTS语音合成失败:', error); + console.error('浏览器TTS语音合成失败:', error) // 即使失败也返回一个空的Blob,而不是抛出异常 // 这样可以避免在UI上显示错误消息 - return new Blob([], { type: 'audio/wav' }); + return new Blob([], { type: 'audio/wav' }) } } } diff --git a/src/renderer/src/services/tts/MsTTSService.ts b/src/renderer/src/services/tts/MsTTSService.ts index 0905b3268d..7925ace53d 100644 --- a/src/renderer/src/services/tts/MsTTSService.ts +++ b/src/renderer/src/services/tts/MsTTSService.ts @@ -1,13 +1,14 @@ -import { TTSServiceInterface } from './TTSServiceInterface'; -import i18n from '@renderer/i18n'; +import i18n from '@renderer/i18n' + +import { TTSServiceInterface } from './TTSServiceInterface' /** * 免费在线TTS服务实现类 * 使用免费的在线TTS服务,不需要API密钥 */ export class MsTTSService implements TTSServiceInterface { - private voice: string; - private outputFormat: string; + private voice: string + private outputFormat: string /** * 构造函数 @@ -15,9 +16,9 @@ export class MsTTSService implements TTSServiceInterface { * @param outputFormat 输出格式 */ constructor(voice: string, outputFormat: string) { - this.voice = voice; - this.outputFormat = outputFormat; - console.log('初始化MsTTSService,语音:', voice, '输出格式:', outputFormat); + this.voice = voice + this.outputFormat = outputFormat + console.log('初始化MsTTSService,语音:', voice, '输出格式:', outputFormat) } /** @@ -26,7 +27,7 @@ export class MsTTSService implements TTSServiceInterface { */ private validateParams(): void { if (!this.voice) { - throw new Error(i18n.t('settings.tts.error.no_mstts_voice')); + throw new Error(i18n.t('settings.tts.error.no_mstts_voice')) } } @@ -37,22 +38,22 @@ export class MsTTSService implements TTSServiceInterface { */ async synthesize(text: string): Promise { // 验证参数 - this.validateParams(); + this.validateParams() try { - console.log('使用免费在线TTS生成语音,音色:', this.voice); + console.log('使用免费在线TTS生成语音,音色:', this.voice) // 通过IPC调用主进程的MsTTSService - const outputPath = await window.api.msTTS.synthesize(text, this.voice, this.outputFormat); + const outputPath = await window.api.msTTS.synthesize(text, this.voice, this.outputFormat) // 读取生成的音频文件 - const audioData = await window.api.fs.read(outputPath); + const audioData = await window.api.fs.read(outputPath) // 将Buffer转换为Blob - return new Blob([audioData], { type: 'audio/mp3' }); + return new Blob([audioData], { type: 'audio/mp3' }) } catch (error: any) { - console.error('免费在线TTS语音合成失败:', error); - throw new Error(`免费在线TTS语音合成失败: ${error?.message || '未知错误'}`); + console.error('免费在线TTS语音合成失败:', error) + throw new Error(`免费在线TTS语音合成失败: ${error?.message || '未知错误'}`) } } } diff --git a/src/renderer/src/services/tts/OpenAITTSService.ts b/src/renderer/src/services/tts/OpenAITTSService.ts index 26f71bfd0c..a759dc3f5f 100644 --- a/src/renderer/src/services/tts/OpenAITTSService.ts +++ b/src/renderer/src/services/tts/OpenAITTSService.ts @@ -1,14 +1,15 @@ -import { TTSServiceInterface } from './TTSServiceInterface'; -import i18n from '@renderer/i18n'; +import i18n from '@renderer/i18n' + +import { TTSServiceInterface } from './TTSServiceInterface' /** * OpenAI TTS服务实现类 */ export class OpenAITTSService implements TTSServiceInterface { - private apiKey: string; - private apiUrl: string; - private voice: string; - private model: string; + private apiKey: string + private apiUrl: string + private voice: string + private model: string /** * 构造函数 @@ -18,10 +19,10 @@ export class OpenAITTSService implements TTSServiceInterface { * @param model 模型 */ constructor(apiKey: string, apiUrl: string, voice: string, model: string) { - this.apiKey = apiKey; - this.apiUrl = apiUrl; - this.voice = voice; - this.model = model; + this.apiKey = apiKey + this.apiUrl = apiUrl + this.voice = voice + this.model = model } /** @@ -30,19 +31,19 @@ export class OpenAITTSService implements TTSServiceInterface { */ private validateParams(): void { if (!this.apiKey) { - throw new Error(i18n.t('settings.tts.error.no_api_key')); + throw new Error(i18n.t('settings.tts.error.no_api_key')) } if (!this.apiUrl) { - throw new Error(i18n.t('settings.tts.error.no_api_url')); + throw new Error(i18n.t('settings.tts.error.no_api_url')) } if (!this.voice) { - throw new Error(i18n.t('settings.tts.error.no_voice')); + throw new Error(i18n.t('settings.tts.error.no_voice')) } if (!this.model) { - throw new Error(i18n.t('settings.tts.error.no_model')); + throw new Error(i18n.t('settings.tts.error.no_model')) } } @@ -53,24 +54,24 @@ export class OpenAITTSService implements TTSServiceInterface { */ async synthesize(text: string): Promise { // 验证参数 - this.validateParams(); + this.validateParams() // 准备OpenAI TTS请求体 const requestBody: any = { input: text - }; + } // 只有当模型和音色不为空时才添加到请求体中 if (this.model) { - requestBody.model = this.model; + requestBody.model = this.model } if (this.voice) { - requestBody.voice = this.voice; + requestBody.voice = this.voice } // 调用OpenAI TTS API - console.log('调用OpenAI TTS API,开始合成语音'); + console.log('调用OpenAI TTS API,开始合成语音') const response = await fetch(this.apiUrl, { method: 'POST', headers: { @@ -78,15 +79,15 @@ export class OpenAITTSService implements TTSServiceInterface { Authorization: `Bearer ${this.apiKey}` }, body: JSON.stringify(requestBody) - }); + }) if (!response.ok) { - const errorData = await response.json(); - throw new Error(errorData.error?.message || 'OpenAI语音合成失败'); + const errorData = await response.json() + throw new Error(errorData.error?.message || 'OpenAI语音合成失败') } // 获取音频数据 - console.log('获取到OpenAI TTS响应,开始处理音频数据'); - return await response.blob(); + console.log('获取到OpenAI TTS响应,开始处理音频数据') + return await response.blob() } } diff --git a/src/renderer/src/services/tts/SiliconflowTTSService.ts b/src/renderer/src/services/tts/SiliconflowTTSService.ts index 3eae5e552d..f020f9ca12 100644 --- a/src/renderer/src/services/tts/SiliconflowTTSService.ts +++ b/src/renderer/src/services/tts/SiliconflowTTSService.ts @@ -1,16 +1,17 @@ -import { TTSServiceInterface } from './TTSServiceInterface'; -import i18n from '@renderer/i18n'; +import i18n from '@renderer/i18n' + +import { TTSServiceInterface } from './TTSServiceInterface' /** * 硅基流动TTS服务实现类 */ export class SiliconflowTTSService implements TTSServiceInterface { - private apiKey: string; - private apiUrl: string; - private voice: string; - private model: string; - private responseFormat: string; - private speed: number; + private apiKey: string + private apiUrl: string + private voice: string + private model: string + private responseFormat: string + private speed: number /** * 构造函数 @@ -29,12 +30,12 @@ export class SiliconflowTTSService implements TTSServiceInterface { responseFormat: string = 'mp3', speed: number = 1.0 ) { - this.apiKey = apiKey; - this.apiUrl = apiUrl || 'https://api.siliconflow.cn/v1/audio/speech'; - this.voice = voice; - this.model = model; - this.responseFormat = responseFormat; - this.speed = speed; + this.apiKey = apiKey + this.apiUrl = apiUrl || 'https://api.siliconflow.cn/v1/audio/speech' + this.voice = voice + this.model = model + this.responseFormat = responseFormat + this.speed = speed } /** @@ -43,15 +44,15 @@ export class SiliconflowTTSService implements TTSServiceInterface { */ private validateParams(): void { if (!this.apiKey) { - throw new Error(i18n.t('settings.tts.error.no_api_key')); + throw new Error(i18n.t('settings.tts.error.no_api_key')) } if (!this.voice) { - throw new Error(i18n.t('settings.tts.error.no_voice')); + throw new Error(i18n.t('settings.tts.error.no_voice')) } if (!this.model) { - throw new Error(i18n.t('settings.tts.error.no_model')); + throw new Error(i18n.t('settings.tts.error.no_model')) } } @@ -62,7 +63,7 @@ export class SiliconflowTTSService implements TTSServiceInterface { */ async synthesize(text: string): Promise { // 验证参数 - this.validateParams(); + this.validateParams() // 准备硅基流动TTS请求体 const requestBody: any = { @@ -73,44 +74,44 @@ export class SiliconflowTTSService implements TTSServiceInterface { response_format: this.responseFormat, stream: false, speed: this.speed - }; + } console.log('硅基流动TTS请求参数:', { model: this.model, voice: this.voice, response_format: 'mp3', speed: this.speed - }); + }) // 调用硅基流动TTS API - console.log('调用硅基流动TTS API,开始合成语音'); + console.log('调用硅基流动TTS API,开始合成语音') const response = await fetch(this.apiUrl, { method: 'POST', headers: { 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}` + Authorization: `Bearer ${this.apiKey}` }, body: JSON.stringify(requestBody) - }); + }) if (!response.ok) { - let errorMessage = '硅基流动语音合成失败'; + let errorMessage = '硅基流动语音合成失败' try { - const errorData = await response.json(); - errorMessage = errorData.error?.message || errorMessage; + const errorData = await response.json() + errorMessage = errorData.error?.message || errorMessage } catch (e) { // 如果无法解析JSON,使用默认错误消息 } - throw new Error(errorMessage); + throw new Error(errorMessage) } // 获取音频数据 - console.log('获取到硅基流动TTS响应,开始处理音频数据'); + console.log('获取到硅基流动TTS响应,开始处理音频数据') // 获取原始Blob - const originalBlob = await response.blob(); + const originalBlob = await response.blob() // 创建一个新的Blob,并指定正确的MIME类型 - return new Blob([originalBlob], { type: 'audio/mpeg' }); + return new Blob([originalBlob], { type: 'audio/mpeg' }) } } diff --git a/src/renderer/src/services/tts/TTSService.ts b/src/renderer/src/services/tts/TTSService.ts index 2dbd744809..1b4228702a 100644 --- a/src/renderer/src/services/tts/TTSService.ts +++ b/src/renderer/src/services/tts/TTSService.ts @@ -1,21 +1,22 @@ -import store from '@renderer/store'; -import i18n from '@renderer/i18n'; -import { TTSServiceFactory } from './TTSServiceFactory'; -import { TTSTextFilter } from './TTSTextFilter'; -import { Message } from '@renderer/types'; +import i18n from '@renderer/i18n' +import store from '@renderer/store' +import { Message } from '@renderer/types' + +import { TTSServiceFactory } from './TTSServiceFactory' +import { TTSTextFilter } from './TTSTextFilter' /** * TTS服务类 * 用于处理文本到语音的转换 */ export class TTSService { - private static instance: TTSService; - private audioElement: HTMLAudioElement | null = null; - private isPlaying = false; + private static instance: TTSService + private audioElement: HTMLAudioElement | null = null + private isPlaying = false // 错误消息节流控制 - private lastErrorTime = 0; - private errorThrottleTime = 2000; // 2秒内不重复显示相同错误 + private lastErrorTime = 0 + private errorThrottleTime = 2000 // 2秒内不重复显示相同错误 /** * 获取单例实例 @@ -25,8 +26,8 @@ export class TTSService { // 每次调用时强制重新创建实例,确保使用最新的设置 // 注意:这会导致每次调用时都创建新的音频元素,可能会有内存泄漏风险 // 但在当前情况下,这是解决TTS服务类型切换问题的最简单方法 - TTSService.instance = new TTSService(); - return TTSService.instance; + TTSService.instance = new TTSService() + return TTSService.instance } /** @@ -34,21 +35,21 @@ export class TTSService { */ private constructor() { // 创建音频元素 - this.audioElement = document.createElement('audio'); - this.audioElement.style.display = 'none'; - document.body.appendChild(this.audioElement); + this.audioElement = document.createElement('audio') + this.audioElement.style.display = 'none' + document.body.appendChild(this.audioElement) // 监听音频播放结束事件 this.audioElement.addEventListener('ended', () => { - this.isPlaying = false; - console.log('TTS播放结束'); - }); + this.isPlaying = false + console.log('TTS播放结束') + }) // 监听浏览器TTS直接播放结束的自定义事件 document.addEventListener('edgeTTSComplete', () => { - console.log('收到浏览器TTS直接播放结束事件'); - this.isPlaying = false; - }); + console.log('收到浏览器TTS直接播放结束事件') + this.isPlaying = false + }) } /** @@ -58,21 +59,21 @@ export class TTSService { */ public async speakFromMessage(message: Message): Promise { // 获取最新的TTS过滤选项 - const settings = store.getState().settings; + const settings = store.getState().settings const ttsFilterOptions = settings.ttsFilterOptions || { filterThinkingProcess: true, filterMarkdown: true, filterCodeBlocks: true, filterHtmlTags: true, maxTextLength: 4000 - }; + } // 应用过滤 - const filteredText = TTSTextFilter.filterText(message.content, ttsFilterOptions); - console.log('TTS过滤前文本长度:', message.content.length, '过滤后:', filteredText.length); + const filteredText = TTSTextFilter.filterText(message.content, ttsFilterOptions) + console.log('TTS过滤前文本长度:', message.content.length, '过滤后:', filteredText.length) // 播放过滤后的文本 - return this.speak(filteredText); + return this.speak(filteredText) } /** @@ -83,30 +84,30 @@ export class TTSService { public async speak(text: string): Promise { try { // 检查TTS是否启用 - const settings = store.getState().settings; - const ttsEnabled = settings.ttsEnabled; + const settings = store.getState().settings + const ttsEnabled = settings.ttsEnabled if (!ttsEnabled) { - this.showErrorMessage(i18n.t('settings.tts.error.not_enabled')); - return false; + this.showErrorMessage(i18n.t('settings.tts.error.not_enabled')) + return false } // 如果正在播放,先停止 if (this.isPlaying) { - this.stop(); + this.stop() } // 确保文本不为空 if (!text || text.trim() === '') { - this.showErrorMessage(i18n.t('settings.tts.error.empty_text')); - return false; + this.showErrorMessage(i18n.t('settings.tts.error.empty_text')) + return false } // 获取最新的设置 // 强制刷新状态对象,确保获取最新的设置 - const latestSettings = store.getState().settings; - const serviceType = latestSettings.ttsServiceType || 'openai'; - console.log('使用的TTS服务类型:', serviceType); + const latestSettings = store.getState().settings + const serviceType = latestSettings.ttsServiceType || 'openai' + console.log('使用的TTS服务类型:', serviceType) console.log('当前TTS设置详情:', { ttsServiceType: serviceType, ttsEdgeVoice: latestSettings.ttsEdgeVoice, @@ -115,18 +116,18 @@ export class TTSService { ttsSiliconflowModel: latestSettings.ttsSiliconflowModel, ttsSiliconflowResponseFormat: latestSettings.ttsSiliconflowResponseFormat, ttsSiliconflowSpeed: latestSettings.ttsSiliconflowSpeed - }); + }) try { // 使用工厂创建TTS服务 - const ttsService = TTSServiceFactory.createService(serviceType, latestSettings); + const ttsService = TTSServiceFactory.createService(serviceType, latestSettings) // 合成语音 - const audioBlob = await ttsService.synthesize(text); + const audioBlob = await ttsService.synthesize(text) // 播放音频 if (audioBlob) { - const audioUrl = URL.createObjectURL(audioBlob); + const audioUrl = URL.createObjectURL(audioBlob) if (this.audioElement) { // 打印音频Blob信息,帮助调试 @@ -134,36 +135,36 @@ export class TTSService { size: audioBlob.size, type: audioBlob.type, serviceType: serviceType - }); + }) - this.audioElement.src = audioUrl; + this.audioElement.src = audioUrl this.audioElement.play().catch((error) => { // 检查是否是浏览器TTS直接播放的情况 // 如果是浏览器TTS且音频大小很小,则不显示错误消息 - const isEdgeTTS = serviceType === 'edge'; - const isSmallBlob = audioBlob.size < 100; // 小于100字节的音频文件可能是我们的静音文件 + const isEdgeTTS = serviceType === 'edge' + const isSmallBlob = audioBlob.size < 100 // 小于100字节的音频文件可能是我们的静音文件 if (isEdgeTTS && isSmallBlob) { - console.log('浏览器TTS直接播放中,忽略音频元素错误'); + console.log('浏览器TTS直接播放中,忽略音频元素错误') } else { - console.error('播放TTS音频失败:', error); - console.error('音频URL:', audioUrl); - console.error('音频Blob类型:', audioBlob.type); - console.error('音频Blob大小:', audioBlob.size); - this.showErrorMessage(i18n.t('settings.tts.error.play_failed')); + console.error('播放TTS音频失败:', error) + console.error('音频URL:', audioUrl) + console.error('音频Blob类型:', audioBlob.type) + console.error('音频Blob大小:', audioBlob.size) + this.showErrorMessage(i18n.t('settings.tts.error.play_failed')) } - }); + }) - this.isPlaying = true; - console.log('开始播放TTS音频'); + this.isPlaying = true + console.log('开始播放TTS音频') // 释放URL对象 this.audioElement.onended = () => { - URL.revokeObjectURL(audioUrl); + URL.revokeObjectURL(audioUrl) // 检查是否是浏览器TTS直接播放的情况 - const isEdgeTTS = serviceType === 'edge'; - const isSmallBlob = audioBlob.size < 100; + const isEdgeTTS = serviceType === 'edge' + const isSmallBlob = audioBlob.size < 100 // 如果是浏览器TTS直接播放,则等待当前语音合成结束 if (isEdgeTTS && isSmallBlob) { @@ -171,33 +172,33 @@ export class TTSService { // 如果还在播放,则不重置播放状态 // 注意:这里我们无法直接访问 EdgeTTSService 中的 currentUtterance // 所以我们使用定时器来检查语音合成是否完成 - console.log('浏览器TTS直接播放中,等待语音合成结束'); + console.log('浏览器TTS直接播放中,等待语音合成结束') // 保持播放状态,直到语音合成结束 // 使用定时器来检查语音合成是否完成 // 大多数语音合成应该在几秒内完成 setTimeout(() => { - this.isPlaying = false; - console.log('浏览器TTS直接播放完成'); - }, 10000); // 10秒后自动重置状态 + this.isPlaying = false + console.log('浏览器TTS直接播放完成') + }, 10000) // 10秒后自动重置状态 } else { - this.isPlaying = false; + this.isPlaying = false } - }; + } - return true; + return true } } - return false; + return false } catch (error: any) { - console.error('TTS合成失败:', error); - this.showErrorMessage(error?.message || i18n.t('settings.tts.error.synthesis_failed')); - return false; + console.error('TTS合成失败:', error) + this.showErrorMessage(error?.message || i18n.t('settings.tts.error.synthesis_failed')) + return false } } catch (error) { - console.error('TTS播放失败:', error); - this.showErrorMessage(i18n.t('settings.tts.error.general')); - return false; + console.error('TTS播放失败:', error) + this.showErrorMessage(i18n.t('settings.tts.error.general')) + return false } } @@ -206,10 +207,10 @@ export class TTSService { */ public stop(): void { if (this.audioElement && this.isPlaying) { - this.audioElement.pause(); - this.audioElement.currentTime = 0; - this.isPlaying = false; - console.log('停止TTS播放'); + this.audioElement.pause() + this.audioElement.currentTime = 0 + this.isPlaying = false + console.log('停止TTS播放') } } @@ -218,7 +219,7 @@ export class TTSService { * @returns 是否正在播放 */ public isCurrentlyPlaying(): boolean { - return this.isPlaying; + return this.isPlaying } /** @@ -226,15 +227,15 @@ export class TTSService { * @param message 错误消息 */ private showErrorMessage(message: string): void { - const now = Date.now(); + const now = Date.now() // 如果距离上次错误消息的时间小于节流时间,则不显示 if (now - this.lastErrorTime < this.errorThrottleTime) { - console.log('错误消息被节流:', message); - return; + console.log('错误消息被节流:', message) + return } // 更新上次错误消息时间 - this.lastErrorTime = now; - window.message.error({ content: message, key: 'tts-error' }); + this.lastErrorTime = now + window.message.error({ content: message, key: 'tts-error' }) } } diff --git a/src/renderer/src/services/tts/TTSServiceFactory.ts b/src/renderer/src/services/tts/TTSServiceFactory.ts index ea2da98723..9d8fb0c4c7 100644 --- a/src/renderer/src/services/tts/TTSServiceFactory.ts +++ b/src/renderer/src/services/tts/TTSServiceFactory.ts @@ -1,9 +1,10 @@ -import { TTSServiceInterface } from './TTSServiceInterface'; -import { OpenAITTSService } from './OpenAITTSService'; -import { EdgeTTSService } from './EdgeTTSService'; -import { SiliconflowTTSService } from './SiliconflowTTSService'; -import { MsTTSService } from './MsTTSService'; -import i18n from '@renderer/i18n'; +import i18n from '@renderer/i18n' + +import { EdgeTTSService } from './EdgeTTSService' +import { MsTTSService } from './MsTTSService' +import { OpenAITTSService } from './OpenAITTSService' +import { SiliconflowTTSService } from './SiliconflowTTSService' +import { TTSServiceInterface } from './TTSServiceInterface' /** * TTS服务工厂类 @@ -17,24 +18,19 @@ export class TTSServiceFactory { * @returns TTS服务实例 */ static createService(serviceType: string, settings: any): TTSServiceInterface { - console.log('创建TTS服务实例,类型:', serviceType); + console.log('创建TTS服务实例,类型:', serviceType) switch (serviceType) { case 'openai': - console.log('创建OpenAI TTS服务实例'); - return new OpenAITTSService( - settings.ttsApiKey, - settings.ttsApiUrl, - settings.ttsVoice, - settings.ttsModel - ); + console.log('创建OpenAI TTS服务实例') + return new OpenAITTSService(settings.ttsApiKey, settings.ttsApiUrl, settings.ttsVoice, settings.ttsModel) case 'edge': - console.log('创建Edge TTS服务实例'); - return new EdgeTTSService(settings.ttsEdgeVoice); + console.log('创建Edge TTS服务实例') + return new EdgeTTSService(settings.ttsEdgeVoice) case 'siliconflow': - console.log('创建硅基流动 TTS服务实例'); + console.log('创建硅基流动 TTS服务实例') console.log('硅基流动TTS设置:', { apiKey: settings.ttsSiliconflowApiKey ? '已设置' : '未设置', apiUrl: settings.ttsSiliconflowApiUrl, @@ -42,7 +38,7 @@ export class TTSServiceFactory { model: settings.ttsSiliconflowModel, responseFormat: settings.ttsSiliconflowResponseFormat, speed: settings.ttsSiliconflowSpeed - }); + }) return new SiliconflowTTSService( settings.ttsSiliconflowApiKey, settings.ttsSiliconflowApiUrl, @@ -50,21 +46,18 @@ export class TTSServiceFactory { settings.ttsSiliconflowModel, settings.ttsSiliconflowResponseFormat, settings.ttsSiliconflowSpeed - ); + ) case 'mstts': - console.log('创建免费在线TTS服务实例'); + console.log('创建免费在线TTS服务实例') console.log('免费在线TTS设置:', { voice: settings.ttsMsVoice, outputFormat: settings.ttsMsOutputFormat - }); - return new MsTTSService( - settings.ttsMsVoice, - settings.ttsMsOutputFormat - ); + }) + return new MsTTSService(settings.ttsMsVoice, settings.ttsMsOutputFormat) default: - throw new Error(i18n.t('settings.tts.error.unsupported_service_type', { serviceType })); + throw new Error(i18n.t('settings.tts.error.unsupported_service_type', { serviceType })) } } } diff --git a/src/renderer/src/services/tts/TTSServiceInterface.ts b/src/renderer/src/services/tts/TTSServiceInterface.ts index e14a4f11b4..66ffe88eb0 100644 --- a/src/renderer/src/services/tts/TTSServiceInterface.ts +++ b/src/renderer/src/services/tts/TTSServiceInterface.ts @@ -8,5 +8,5 @@ export interface TTSServiceInterface { * @param text 要合成的文本 * @returns 返回音频Blob对象的Promise */ - synthesize(text: string): Promise; + synthesize(text: string): Promise } diff --git a/src/renderer/src/services/tts/TTSTextFilter.ts b/src/renderer/src/services/tts/TTSTextFilter.ts index 059ea90804..023d3b93a1 100644 --- a/src/renderer/src/services/tts/TTSTextFilter.ts +++ b/src/renderer/src/services/tts/TTSTextFilter.ts @@ -12,43 +12,43 @@ export class TTSTextFilter { public static filterText( text: string, options: { - filterThinkingProcess: boolean; - filterMarkdown: boolean; - filterCodeBlocks: boolean; - filterHtmlTags: boolean; - maxTextLength: number; + filterThinkingProcess: boolean + filterMarkdown: boolean + filterCodeBlocks: boolean + filterHtmlTags: boolean + maxTextLength: number } ): string { - if (!text) return ''; + if (!text) return '' - let filteredText = text; + let filteredText = text // 过滤思考过程 if (options.filterThinkingProcess) { - filteredText = this.filterThinkingProcess(filteredText); + filteredText = this.filterThinkingProcess(filteredText) } // 过滤Markdown标记 if (options.filterMarkdown) { - filteredText = this.filterMarkdown(filteredText); + filteredText = this.filterMarkdown(filteredText) } // 过滤代码块 if (options.filterCodeBlocks) { - filteredText = this.filterCodeBlocks(filteredText); + filteredText = this.filterCodeBlocks(filteredText) } // 过滤HTML标签 if (options.filterHtmlTags) { - filteredText = this.filterHtmlTags(filteredText); + filteredText = this.filterHtmlTags(filteredText) } // 限制文本长度 if (options.maxTextLength > 0 && filteredText.length > options.maxTextLength) { - filteredText = filteredText.substring(0, options.maxTextLength); + filteredText = filteredText.substring(0, options.maxTextLength) } - return filteredText.trim(); + return filteredText.trim() } /** @@ -58,27 +58,27 @@ export class TTSTextFilter { */ private static filterThinkingProcess(text: string): string { // 过滤标签内容 - text = text.replace(/[\s\S]*?<\/think>/g, ''); - + text = text.replace(/[\s\S]*?<\/think>/g, '') + // 过滤未闭合的标签 if (text.includes('')) { - const parts = text.split(''); - text = parts[0]; + const parts = text.split('') + text = parts[0] } - + // 过滤思考过程部分(###Thinking和###Response格式) - const thinkingMatch = text.match(/###\s*Thinking[\s\S]*?(?=###\s*Response|$)/); + const thinkingMatch = text.match(/###\s*Thinking[\s\S]*?(?=###\s*Response|$)/) if (thinkingMatch) { - text = text.replace(thinkingMatch[0], ''); + text = text.replace(thinkingMatch[0], '') } - + // 如果有Response部分,只保留Response部分 - const responseMatch = text.match(/###\s*Response\s*([\s\S]*?)(?=###|$)/); + const responseMatch = text.match(/###\s*Response\s*([\s\S]*?)(?=###|$)/) if (responseMatch) { - text = responseMatch[1]; + text = responseMatch[1] } - - return text; + + return text } /** @@ -88,29 +88,29 @@ export class TTSTextFilter { */ private static filterMarkdown(text: string): string { // 过滤标题标记 - text = text.replace(/#{1,6}\s+/g, ''); - + text = text.replace(/#{1,6}\s+/g, '') + // 过滤粗体和斜体标记 - text = text.replace(/(\*\*|__)(.*?)\1/g, '$2'); - text = text.replace(/(\*|_)(.*?)\1/g, '$2'); - + text = text.replace(/(\*\*|__)(.*?)\1/g, '$2') + text = text.replace(/(\*|_)(.*?)\1/g, '$2') + // 过滤链接 - text = text.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '$1'); - + text = text.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '$1') + // 过滤图片 - text = text.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, ''); - + text = text.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, '') + // 过滤引用 - text = text.replace(/^\s*>\s+/gm, ''); - + text = text.replace(/^\s*>\s+/gm, '') + // 过滤水平线 - text = text.replace(/^\s*[-*_]{3,}\s*$/gm, ''); - + text = text.replace(/^\s*[-*_]{3,}\s*$/gm, '') + // 过滤列表标记 - text = text.replace(/^\s*[-*+]\s+/gm, ''); - text = text.replace(/^\s*\d+\.\s+/gm, ''); - - return text; + text = text.replace(/^\s*[-*+]\s+/gm, '') + text = text.replace(/^\s*\d+\.\s+/gm, '') + + return text } /** @@ -120,15 +120,15 @@ export class TTSTextFilter { */ private static filterCodeBlocks(text: string): string { // 过滤围栏式代码块 - text = text.replace(/```[\s\S]*?```/g, ''); - + text = text.replace(/```[\s\S]*?```/g, '') + // 过滤缩进式代码块 - text = text.replace(/(?:^|\n)( {4}|\t).*(?:\n|$)/g, '\n'); - + text = text.replace(/(?:^|\n)( {4}|\t).*(?:\n|$)/g, '\n') + // 过滤行内代码 - text = text.replace(/`([^`]+)`/g, '$1'); - - return text; + text = text.replace(/`([^`]+)`/g, '$1') + + return text } /** @@ -138,11 +138,11 @@ export class TTSTextFilter { */ private static filterHtmlTags(text: string): string { // 过滤HTML标签 - text = text.replace(/<[^>]*>/g, ''); - + text = text.replace(/<[^>]*>/g, '') + // 过滤HTML实体 - text = text.replace(/&[a-zA-Z0-9#]+;/g, ' '); - - return text; + text = text.replace(/&[a-zA-Z0-9#]+;/g, ' ') + + return text } } diff --git a/src/renderer/src/services/tts/index.ts b/src/renderer/src/services/tts/index.ts index 78d52d164e..2083f5a456 100644 --- a/src/renderer/src/services/tts/index.ts +++ b/src/renderer/src/services/tts/index.ts @@ -1,7 +1,7 @@ -export * from './TTSService'; -export * from './TTSServiceInterface'; -export * from './TTSServiceFactory'; -export * from './OpenAITTSService'; -export * from './EdgeTTSService'; -export * from './SiliconflowTTSService'; -export * from './MsTTSService'; +export * from './EdgeTTSService' +export * from './MsTTSService' +export * from './OpenAITTSService' +export * from './SiliconflowTTSService' +export * from './TTSService' +export * from './TTSServiceFactory' +export * from './TTSServiceInterface'